Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@chris-lovejoy
Last active October 5, 2020 16:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save chris-lovejoy/572bdef5d2cc83986d566e1697241800 to your computer and use it in GitHub Desktop.
Save chris-lovejoy/572bdef5d2cc83986d566e1697241800 to your computer and use it in GitHub Desktop.
single function to call
def find_jobs_from(website, job_title, location, desired_characs, filename="results.xls"):
"""
This function extracts all the desired characteristics of all new job postings
of the title and location specified and returns them in single file.
The arguments it takes are:
- Website: to specify which website to search (options: 'Indeed' or 'CWjobs')
- Job_title
- Location
- Desired_characs: this is a list of the job characteristics of interest,
from titles, companies, links and date_listed.
- Filename: to specify the filename and format of the output.
Default is .xls file called 'results.xls'
"""
if website == 'Indeed':
job_soup = load_indeed_jobs_div(job_title, location)
jobs_list, num_listings = extract_job_information_indeed(job_soup, desired_characs)
if website == 'CWjobs':
# TO DO LATER
save_jobs_to_excel(jobs_list, filename)
def extract_job_information_indeed(job_soup, desired_characs):
job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard')
cols = []
extracted_info = []
if 'titles' in desired_characs:
titles = []
cols.append('titles')
for job_elem in job_elems:
titles.append(extract_job_title_indeed(job_elem))
extracted_info.append(titles)
if 'companies' in desired_characs:
companies = []
cols.append('companies')
for job_elem in job_elems:
companies.append(extract_company_indeed(job_elem))
extracted_info.append(companies)
if 'links' in desired_characs:
links = []
cols.append('links')
for job_elem in job_elems:
links.append(extract_link_indeed(job_elem))
extracted_info.append(links)
if 'date_listed' in desired_characs:
dates = []
cols.append('date_listed')
for job_elem in job_elems:
dates.append(extract_date_indeed(job_elem))
extracted_info.append(dates)
jobs_list = {}
for j in range(len(cols)):
jobs_list[cols[j]] = extracted_info[j]
num_listings = len(extracted_info[0])
return jobs_list, num_listings
print('{} new job postings retrieved. Stored in {}.'.format(num_listings, filename))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment