Skip to content

Instantly share code, notes, and snippets.

Avatar

ChrisLovejoy chris-lovejoy

View GitHub Profile
View job_scraper.py
import urllib
import requests
from bs4 import BeautifulSoup
import selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import os
View job_scraper.py
def load_indeed_jobs_div(job_title, location):
getVars = {'q' : job_title, 'l' : location, 'fromage' : 'last', 'sort' : 'date'}
url = ('https://www.indeed.co.uk/jobs?' + urllib.parse.urlencode(getVars))
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
job_soup = soup.find(id="resultsCol")
return job_soup
@chris-lovejoy
chris-lovejoy / job_scraper.py
Last active Oct 5, 2020
job detail extraction
View job_scraper.py
def extract_job_title_indeed(job_elem):
title_elem = job_elem.find('h2', class_='title')
title = title_elem.text.strip()
return title
def extract_company_indeed(job_elem):
company_elem = job_elem.find('span', class_='company')
company = company_elem.text.strip()
return company
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
Finding each job card
View job_scraper.py
job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard')
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
Creating lists for each job characteristic
View job_scraper.py
cols = []
extracted_info = []
if 'titles' in desired_characs:
titles = []
cols.append('titles')
for job_elem in job_elems:
titles.append(extract_job_title_indeed(job_elem))
extracted_info.append(titles)
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
Creating the final jobs list
View job_scraper.py
jobs_list = {}
for j in range(len(cols)):
jobs_list[cols[j]] = extracted_info[j]
num_listings = len(extracted_info[0])
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
Exporting information to file
View job_scraper.py
def save_jobs_to_excel(jobs_list, filename):
jobs = pd.DataFrame(jobs_list)
jobs.to_excel(filename)
@chris-lovejoy
chris-lovejoy / job_scraper.py
Last active Oct 5, 2020
single function to call
View job_scraper.py
def find_jobs_from(website, job_title, location, desired_characs, filename="results.xls"):
"""
This function extracts all the desired characteristics of all new job postings
of the title and location specified and returns them in single file.
The arguments it takes are:
- Website: to specify which website to search (options: 'Indeed' or 'CWjobs')
- Job_title
- Location
- Desired_characs: this is a list of the job characteristics of interest,
from titles, companies, links and date_listed.
View job_scraper.py
def initiate_driver(location_of_driver, browser):
if browser == 'chrome':
driver = webdriver.Chrome(executable_path=(location_of_driver + "/chromedriver"))
elif browser == 'firefox':
driver = webdriver.Firefox(executable_path=(location_of_driver + "/firefoxdriver"))
elif browser == 'safari':
driver = webdriver.Safari(executable_path=(location_of_driver + "/safaridriver"))
elif browser == 'edge':
driver = webdriver.Edge(executable_path=(location_of_driver + "/edgedriver"))
return driver
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
use driver to extract HTML soup
View job_scraper.py
def make_job_search(job_title, location, driver):
driver.get('https://www.cwjobs.co.uk/')
# Select the job box
job_title_box = driver.find_element_by_name('Keywords')
# Send job information
job_title_box.send_keys(job_title)
# Selection location box