Last active
October 5, 2020 16:20
-
-
Save chris-lovejoy/572bdef5d2cc83986d566e1697241800 to your computer and use it in GitHub Desktop.
single function to call
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_jobs_from(website, job_title, location, desired_characs, filename="results.xls"): | |
""" | |
This function extracts all the desired characteristics of all new job postings | |
of the title and location specified and returns them in single file. | |
The arguments it takes are: | |
- Website: to specify which website to search (options: 'Indeed' or 'CWjobs') | |
- Job_title | |
- Location | |
- Desired_characs: this is a list of the job characteristics of interest, | |
from titles, companies, links and date_listed. | |
- Filename: to specify the filename and format of the output. | |
Default is .xls file called 'results.xls' | |
""" | |
if website == 'Indeed': | |
job_soup = load_indeed_jobs_div(job_title, location) | |
jobs_list, num_listings = extract_job_information_indeed(job_soup, desired_characs) | |
if website == 'CWjobs': | |
# TO DO LATER | |
save_jobs_to_excel(jobs_list, filename) | |
def extract_job_information_indeed(job_soup, desired_characs): | |
job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard') | |
cols = [] | |
extracted_info = [] | |
if 'titles' in desired_characs: | |
titles = [] | |
cols.append('titles') | |
for job_elem in job_elems: | |
titles.append(extract_job_title_indeed(job_elem)) | |
extracted_info.append(titles) | |
if 'companies' in desired_characs: | |
companies = [] | |
cols.append('companies') | |
for job_elem in job_elems: | |
companies.append(extract_company_indeed(job_elem)) | |
extracted_info.append(companies) | |
if 'links' in desired_characs: | |
links = [] | |
cols.append('links') | |
for job_elem in job_elems: | |
links.append(extract_link_indeed(job_elem)) | |
extracted_info.append(links) | |
if 'date_listed' in desired_characs: | |
dates = [] | |
cols.append('date_listed') | |
for job_elem in job_elems: | |
dates.append(extract_date_indeed(job_elem)) | |
extracted_info.append(dates) | |
jobs_list = {} | |
for j in range(len(cols)): | |
jobs_list[cols[j]] = extracted_info[j] | |
num_listings = len(extracted_info[0]) | |
return jobs_list, num_listings | |
print('{} new job postings retrieved. Stored in {}.'.format(num_listings, filename)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment