Skip to content

Instantly share code, notes, and snippets.

@nijave
Created February 3, 2019 16:30
Show Gist options
  • Save nijave/69e7e7b163144f7bf8ae26a8459bdef0 to your computer and use it in GitHub Desktop.
Save nijave/69e7e7b163144f7bf8ae26a8459bdef0 to your computer and use it in GitHub Desktop.
Dump jobs from Columbus Techlife
import csv
import logging
import requests
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
job_list = {}
url = "http://www.techlifecolumbus.com/jobs/"
while url is not None:
logger.info("Getting %s", url)
response = requests.get(url)
logger.info("Parsing results")
soup = BeautifulSoup(response.content)
link_tag = soup.find('a', {'class': 'next'})
url = link_tag['href'] if link_tag is not None else None
logger.info("Adding job results")
for row in soup.find_all('div', {'class': 'wpjb-grid-row'}):
company = row.find('span', {'class': 'wpjb-company_name'}).text
title = row.find('a', {'class': 'wpjb-job_title'}).text
if company not in job_list:
job_list[company] = []
job_list[company].append(title)
with open('job_list.csv', 'w') as f:
writer = csv.writer(f, lineterminator="\n")
for company, titles in job_list.items():
for title in titles:
_ = writer.writerow([company,title])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment