chris-lovejoy/job_scraper.py

## job_scraper.py
def find_jobs_from(website, job_title, location, desired_characs, filename="results.xls"):
    """
    This function extracts all the desired characteristics of all new job postings
    of the title and location specified and returns them in single file.
    The arguments it takes are:
        - Website: to specify which website to search (options: 'Indeed' or 'CWjobs')
        - Job_title
        - Location
        - Desired_characs: this is a list of the job characteristics of interest,
            from titles, companies, links and date_listed.
        - Filename: to specify the filename and format of the output.
            Default is .xls file called 'results.xls'
    """

    if website == 'Indeed':
        job_soup = load_indeed_jobs_div(job_title, location)
        jobs_list, num_listings = extract_job_information_indeed(job_soup, desired_characs)

    if website == 'CWjobs':
        # TO DO LATER

    save_jobs_to_excel(jobs_list, filename)

def extract_job_information_indeed(job_soup, desired_characs):
    job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard')

    cols = []
    extracted_info = []


    if 'titles' in desired_characs:
        titles = []
        cols.append('titles')
        for job_elem in job_elems:
            titles.append(extract_job_title_indeed(job_elem))
        extracted_info.append(titles)

    if 'companies' in desired_characs:
        companies = []
        cols.append('companies')
        for job_elem in job_elems:
            companies.append(extract_company_indeed(job_elem))
        extracted_info.append(companies)

    if 'links' in desired_characs:
        links = []
        cols.append('links')
        for job_elem in job_elems:
            links.append(extract_link_indeed(job_elem))
        extracted_info.append(links)

    if 'date_listed' in desired_characs:
        dates = []
        cols.append('date_listed')
        for job_elem in job_elems:
            dates.append(extract_date_indeed(job_elem))
        extracted_info.append(dates)

    jobs_list = {}

    for j in range(len(cols)):
        jobs_list[cols[j]] = extracted_info[j]

    num_listings = len(extracted_info[0])

    return jobs_list, num_listings
    print('{} new job postings retrieved. Stored in {}.'.format(num_listings, filename))
	def find_jobs_from(website, job_title, location, desired_characs, filename="results.xls"):
	"""
	This function extracts all the desired characteristics of all new job postings
	of the title and location specified and returns them in single file.
	The arguments it takes are:
	- Website: to specify which website to search (options: 'Indeed' or 'CWjobs')
	- Job_title
	- Location
	- Desired_characs: this is a list of the job characteristics of interest,
	from titles, companies, links and date_listed.
	- Filename: to specify the filename and format of the output.
	Default is .xls file called 'results.xls'
	"""

	if website == 'Indeed':
	job_soup = load_indeed_jobs_div(job_title, location)
	jobs_list, num_listings = extract_job_information_indeed(job_soup, desired_characs)

	if website == 'CWjobs':
	# TO DO LATER

	save_jobs_to_excel(jobs_list, filename)

	def extract_job_information_indeed(job_soup, desired_characs):
	job_elems = job_soup.find_all('div', class_='jobsearch-SerpJobCard')

	cols = []
	extracted_info = []


	if 'titles' in desired_characs:
	titles = []
	cols.append('titles')
	for job_elem in job_elems:
	titles.append(extract_job_title_indeed(job_elem))
	extracted_info.append(titles)

	if 'companies' in desired_characs:
	companies = []
	cols.append('companies')
	for job_elem in job_elems:
	companies.append(extract_company_indeed(job_elem))
	extracted_info.append(companies)

	if 'links' in desired_characs:
	links = []
	cols.append('links')
	for job_elem in job_elems:
	links.append(extract_link_indeed(job_elem))
	extracted_info.append(links)

	if 'date_listed' in desired_characs:
	dates = []
	cols.append('date_listed')
	for job_elem in job_elems:
	dates.append(extract_date_indeed(job_elem))
	extracted_info.append(dates)

	jobs_list = {}

	for j in range(len(cols)):
	jobs_list[cols[j]] = extracted_info[j]

	num_listings = len(extracted_info[0])

	return jobs_list, num_listings
	print('{} new job postings retrieved. Stored in {}.'.format(num_listings, filename))