Skip to content

Instantly share code, notes, and snippets.

@alairock
Created December 12, 2019 20:09
Show Gist options
  • Save alairock/35ee57222d3fb77e828dbe2bd3955125 to your computer and use it in GitHub Desktop.
Save alairock/35ee57222d3fb77e828dbe2bd3955125 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
url = "https://www.azjobconnection.gov/ada/r/search/jobs?utf8=%E2%9C%93&per_page=50&keywords=Python&refiners=%7B%7D&is_subsequent_search=false&status=Active"
r = requests.get(url)
soup = BeautifulSoup(r.content, features="html.parser")
jobs_to_look_at = []
for link in soup.findAll('a'):
if "/jobs/" in str(link):
jobs_to_look_at.append("https://www.azjobconnection.gov" + link.get('href'))
good_kw = [' python ', ' sql ', ' qa ', ' quality assurance ']
bad_kw = [' nurse ', ' military ', ' loud ', 'data scientist']
keepers = []
for job in jobs_to_look_at:
r = str(requests.get(url).content).lower()
if any([kw in r for kw in bad_kw]):
print('bad')
continue
results = [kw in r for kw in good_kw]
if any(results):
print('good', job, len([x for x in results if x]))
from pprint import pprint
pprint(keepers)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment