Skip to content

Instantly share code, notes, and snippets.

@vrootic
Created July 29, 2017 00:09
Show Gist options
  • Save vrootic/627dddeab82ae6b4617cff301c4638ea to your computer and use it in GitHub Desktop.
Save vrootic/627dddeab82ae6b4617cff301c4638ea to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
def get_soup(url, headers, payload):
total_pages = 0
r = requests.get(url, params=payload, headers=headers)
soup = BeautifulSoup(r.text, 'html5lib')
return soup
def list_jobs(soup):
posts = soup.find('div', 'leftColumn').findAll('div', {'class', 's-res'})
career_url = 'http://salesforce.careermount.com/career/'
for p in posts:
print(p.find('a').text)
print(career_url + p.find('a')['href'])
print(p.find('span').text)
print()
if __name__ == '__main__':
url = 'http://salesforce.careermount.com/candidate/job_search/quick/results'
payload = {
'location': 'California',
'keyword': 'software',
'sort_dir': 'desc',
'sort_field': 'post_date',
'relevance': 'false'
}
# The headers would expired sometime.
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Cookie': '__utmt=1; __utma=10312119.1119980742.1501264404.1501264404.1501264404.1; __utmb=10312119.1.10.1501264404; __utmc=10312119; __utmz=10312119.1501264404.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __atuvc=1%7C30; __atuvs=597b7a13cbbf5ab1000; JSESSIONID=D236CEC4A43668618F47703D2899E77F.node01; logged=""'
}
list_jobs(get_soup(url, headers, payload))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment