Skip to content

Instantly share code, notes, and snippets.

Avatar

ChrisLovejoy chris-lovejoy

View GitHub Profile
View job_scraper.py
import urllib
import requests
from bs4 import BeautifulSoup
import selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import os
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
use driver to extract HTML soup
View job_scraper.py
def make_job_search(job_title, location, driver):
driver.get('https://www.cwjobs.co.uk/')
# Select the job box
job_title_box = driver.find_element_by_name('Keywords')
# Send job information
job_title_box.send_keys(job_title)
# Selection location box
View job_scraper.py
def initiate_driver(location_of_driver, browser):
if browser == 'chrome':
driver = webdriver.Chrome(executable_path=(location_of_driver + "/chromedriver"))
elif browser == 'firefox':
driver = webdriver.Firefox(executable_path=(location_of_driver + "/firefoxdriver"))
elif browser == 'safari':
driver = webdriver.Safari(executable_path=(location_of_driver + "/safaridriver"))
elif browser == 'edge':
driver = webdriver.Edge(executable_path=(location_of_driver + "/edgedriver"))
return driver
View video_finder.py
from apiclient.discovery import build
api_key='AIzaKyAq3L9BiVO0PXrGBhhY0cNN9fkPmm_BsPg' # (not a real API key)
youtube_api = build('youtube','v3', developerKey=api_key)
# Search videos
video_search_results = youtube_api.search().list(q='productivity', part='snippet', type='video',
order='viewCount', maxResults=50).execute()
# Search channels
@chris-lovejoy
chris-lovejoy / video_finder.py
Last active Jun 17, 2020
ranking by view-to-subscriber ratio
View video_finder.py
import pandas as pd
def find_title(item):
title = item['snippet']['title']
return title
def find_viewcount(item, youtube_api):
video_id = item['id']['videoId']
video_statistics = youtube_api.videos().list(id=video_id, part='statistics').execute()
viewcount = int(video_statistics['items'][0]['statistics']['viewCount'])
@chris-lovejoy
chris-lovejoy / video_finder.py
Created Jun 17, 2020
including date published in model
View video_finder.py
from datetime import datetime, timedelta
# creating variable for time one week ago
today_date = datetime.today()
one_week_ago_date = today_date - timedelta(7)
one_week_ago_string = datetime(year=one_week_ago_date.year,month=one_week_ago_date.month,
day=one_week_ago_date.day).strftime('%Y-%m-%dT%H:%M:%SZ')
# updating the search by adding 'publishedAfter'
View job_scraper.py
def load_indeed_jobs_div(job_title, location):
getVars = {'q' : job_title, 'l' : location, 'fromage' : 'last', 'sort' : 'date'}
url = ('https://www.indeed.co.uk/jobs?' + urllib.parse.urlencode(getVars))
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
job_soup = soup.find(id="resultsCol")
return job_soup
@chris-lovejoy
chris-lovejoy / job_scraper.py
Last active Oct 5, 2020
job detail extraction
View job_scraper.py
def extract_job_title_indeed(job_elem):
title_elem = job_elem.find('h2', class_='title')
title = title_elem.text.strip()
return title
def extract_company_indeed(job_elem):
company_elem = job_elem.find('span', class_='company')
company = company_elem.text.strip()
return company
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
Finding each job card
View job_scraper.py
job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard')
@chris-lovejoy
chris-lovejoy / job_scraper.py
Created May 1, 2020
Creating lists for each job characteristic
View job_scraper.py
cols = []
extracted_info = []
if 'titles' in desired_characs:
titles = []
cols.append('titles')
for job_elem in job_elems:
titles.append(extract_job_title_indeed(job_elem))
extracted_info.append(titles)
You can’t perform that action at this time.