{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#!pip3 install undetected-chromedriver\n", "#!pip3 install undetected-chromedriver selenium pandas\n", "#!pip3 install openpyxl\n", "#!pip3 install beautifulsoup4" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import time\n", "import datetime\n", "import pandas as pd\n", "import undetected_chromedriver as uc\n", "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "šŸ” Scraping Careers Page...\n", "\n", "\n", "āŒ No jobs found.\n" ] } ], "source": [ "\n", "\n", "# ---- Function to Scrape LinkedIn Jobs Using Selenium ----\n", "def scrape_linkedin_jobs(keyword, location):\n", " print(\"\\nšŸ” Scraping LinkedIn Jobs...\\n\")\n", "\n", " # Configure Selenium WebDriver (Headless Mode)\n", " options = webdriver.ChromeOptions()\n", " options.add_argument(\"--headless\") # Run without opening a browser\n", " options.add_argument(\"--no-sandbox\")\n", " options.add_argument(\"--disable-dev-shm-usage\")\n", "\n", " driver = uc.Chrome(options=options)\n", " \n", " # Generate LinkedIn job search URL\n", " search_url = f\"https://www.linkedin.com/jobs/search?keywords={keyword.replace(' ', '%20')}&location={location.replace(' ', '%20')}\"\n", " driver.get(search_url)\n", " \n", " # āœ… Scroll to load more jobs\n", " for _ in range(3): \n", " driver.execute_script(\"window.scrollBy(0, 800);\")\n", " time.sleep(2)\n", "\n", " # āœ… Wait for job listings to appear\n", " wait = WebDriverWait(driver, 15) # Increased wait time\n", " wait.until(EC.presence_of_element_located((By.CLASS_NAME, \"base-card\")))\n", "\n", " jobs = []\n", "\n", " # āœ… Find all job listings\n", " job_elements = driver.find_elements(By.CLASS_NAME, \"base-card\") \n", "\n", " for job in job_elements[:10]: # Limit to top 10 jobs\n", " try:\n", " title_element = job.find_element(By.CSS_SELECTOR, \"h3\") # Updated selector for job title\n", " title = title_element.text.strip()\n", "\n", " company_element = job.find_element(By.CSS_SELECTOR, \"h4\") # Updated selector for company name\n", " company = company_element.text.strip()\n", "\n", " link = job.find_element(By.TAG_NAME, \"a\").get_attribute(\"href\")\n", "\n", " jobs.append({\"title\": title, \"company\": company, \"link\": link, \"source\": \"LinkedIn\"})\n", " except Exception as e:\n", " print(f\"āš ļø Skipping a job entry due to error: {e}\")\n", " continue\n", "\n", " driver.quit()\n", " return jobs\n", "\n", "# ---- Run the Script and Save to Excel ----\n", "if __name__ == \"__main__\":\n", " keyword = input(\"Enter job title (e.g., Software Engineer): \")\n", " location = input(\"Enter location (e.g., Remote, New York, Berlin): \")\n", "\n", " linkedin_jobs = scrape_linkedin_jobs(keyword, location)\n", "\n", " if linkedin_jobs:\n", " df = pd.DataFrame(linkedin_jobs)\n", "\n", " # āœ… Save to Excel\n", " today_date = datetime.date.today().strftime(\"%Y-%m-%d\")\n", " filename = f\"linkedin_jobs_{today_date}.xlsx\"\n", " df.to_excel(filename, index=False)\n", " \n", " print(f\"\\nāœ… Jobs saved to {filename}\")\n", " else:\n", " print(\"\\nāŒ No LinkedIn jobs found.\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/marketing-internship-at-amazon-4111457309?position=1&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=sEZAo0lK6xh1n0%2BuUSWCmA%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/internship-pr-social-media-at-msm-digital-4121988576?position=2&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=EyLr1MNgmwM7eTzife4mRg%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/internship-marketing-at-pulse-advertising-4147607855?position=3&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=2XMBVlHfaCif61ir1QQXzw%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/internship-new-business-development-m-f-x-at-tietalent-4143635138?position=4&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=FPz6HjsyhYfcCZ6P1pzrsg%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/corporate-communications-and-events-intern-m-f-d-at-bat-4159358648?position=5&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=IdYmKG048vZobOumaZEyCg%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/communications-intern-northvolt-germany-at-northvolt-4121020101?position=6&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=dwLc50Bn25QiAH6PFbNWVQ%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/program-intern-northvolt-germany-at-northvolt-4121017542?position=7&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=a%2B4sry3aOnQDRfHKWy8lbQ%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/internship-finance-accounting-f-m-d-at-mutabor-4125908278?position=8&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=zFA7%2FT0SRn4ZgH3fuQvmdg%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/communications-intern-northvolt-germany-at-northvolt-poland-4092459625?position=9&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=W%2B7a6CHDCL1eZJjx0dooNw%3D%3D', 'source': 'LinkedIn'}, {'title': '', 'company': '', 'link': 'https://de.linkedin.com/jobs/view/program-intern-northvolt-germany-at-northvolt-poland-4090630146?position=10&pageNum=0&refId=3rcmXj%2F5XTlGX9JqFduCTw%3D%3D&trackingId=Z9%2BvQdDbSyWM8qzzWxsD5Q%3D%3D', 'source': 'LinkedIn'}]\n" ] } ], "source": [ "print(linkedin_jobs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "CV_R", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 2 }