Created
February 3, 2019 16:30
-
-
Save nijave/69e7e7b163144f7bf8ae26a8459bdef0 to your computer and use it in GitHub Desktop.
Dump jobs from Columbus Techlife
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import logging | |
import requests | |
from bs4 import BeautifulSoup | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
job_list = {} | |
url = "http://www.techlifecolumbus.com/jobs/" | |
while url is not None: | |
logger.info("Getting %s", url) | |
response = requests.get(url) | |
logger.info("Parsing results") | |
soup = BeautifulSoup(response.content) | |
link_tag = soup.find('a', {'class': 'next'}) | |
url = link_tag['href'] if link_tag is not None else None | |
logger.info("Adding job results") | |
for row in soup.find_all('div', {'class': 'wpjb-grid-row'}): | |
company = row.find('span', {'class': 'wpjb-company_name'}).text | |
title = row.find('a', {'class': 'wpjb-job_title'}).text | |
if company not in job_list: | |
job_list[company] = [] | |
job_list[company].append(title) | |
with open('job_list.csv', 'w') as f: | |
writer = csv.writer(f, lineterminator="\n") | |
for company, titles in job_list.items(): | |
for title in titles: | |
_ = writer.writerow([company,title]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment