This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import requests | |
from bs4 import BeautifulSoup | |
import selenium | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
import pandas as pd | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def initiate_driver(location_of_driver, browser): | |
if browser == 'chrome': | |
driver = webdriver.Chrome(executable_path=(location_of_driver + "/chromedriver")) | |
elif browser == 'firefox': | |
driver = webdriver.Firefox(executable_path=(location_of_driver + "/firefoxdriver")) | |
elif browser == 'safari': | |
driver = webdriver.Safari(executable_path=(location_of_driver + "/safaridriver")) | |
elif browser == 'edge': | |
driver = webdriver.Edge(executable_path=(location_of_driver + "/edgedriver")) | |
return driver |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def find_title(item): | |
title = item['snippet']['title'] | |
return title | |
def find_viewcount(item, youtube_api): | |
video_id = item['id']['videoId'] | |
video_statistics = youtube_api.videos().list(id=video_id, part='statistics').execute() | |
viewcount = int(video_statistics['items'][0]['statistics']['viewCount']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, timedelta | |
# creating variable for time one week ago | |
today_date = datetime.today() | |
one_week_ago_date = today_date - timedelta(7) | |
one_week_ago_string = datetime(year=one_week_ago_date.year,month=one_week_ago_date.month, | |
day=one_week_ago_date.day).strftime('%Y-%m-%dT%H:%M:%SZ') | |
# updating the search by adding 'publishedAfter' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_indeed_jobs_div(job_title, location): | |
getVars = {'q' : job_title, 'l' : location, 'fromage' : 'last', 'sort' : 'date'} | |
url = ('https://www.indeed.co.uk/jobs?' + urllib.parse.urlencode(getVars)) | |
page = requests.get(url) | |
soup = BeautifulSoup(page.content, "html.parser") | |
job_soup = soup.find(id="resultsCol") | |
return job_soup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_job_title_indeed(job_elem): | |
title_elem = job_elem.find('h2', class_='title') | |
title = title_elem.text.strip() | |
return title | |
def extract_company_indeed(job_elem): | |
company_elem = job_elem.find('span', class_='company') | |
company = company_elem.text.strip() | |
return company |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cols = [] | |
extracted_info = [] | |
if 'titles' in desired_characs: | |
titles = [] | |
cols.append('titles') | |
for job_elem in job_elems: | |
titles.append(extract_job_title_indeed(job_elem)) | |
extracted_info.append(titles) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jobs_list = {} | |
for j in range(len(cols)): | |
jobs_list[cols[j]] = extracted_info[j] | |
num_listings = len(extracted_info[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def save_jobs_to_excel(jobs_list, filename): | |
jobs = pd.DataFrame(jobs_list) | |
jobs.to_excel(filename) |
OlderNewer