Skip to content

Instantly share code, notes, and snippets.

@miglen
Created August 26, 2015 07:25
Show Gist options
  • Save miglen/0e9d50437788b9a93cc0 to your computer and use it in GitHub Desktop.
Save miglen/0e9d50437788b9a93cc0 to your computer and use it in GitHub Desktop.
Jobsbg viewcount
from BeautifulSoup import BeautifulSoup
import urllib2
import math
import re
# coding: utf-8
"""
Get job views from company postings in jobs.bg
"""
def soup_url(url):
page=urllib2.urlopen(url)
page=page.read()
page=page.decode('utf-8')
page_text = page.encode('utf-8').decode('ascii', 'ignore')
soup = BeautifulSoup(page_text)
return soup
def jobviews(jobid):
soup = soup_url("http://www.jobs.bg/" + jobid)
def get_pagenum(url):
soup = soup_url(url)
pages=soup.find("td",{"class" : "pagingtotal"})
pages=pages.text.split(" ")
pages=math.ceil(int(pages[-1])/20.0)
return pages
def get_views(soup):
table=soup.findAll("table",{"width" : "980"})
td=table[1].findAll("td")
bb=td[0].findAll("b")
return int(bb[0].text)
def print_jobs(soup):
joblinks=soup.findAll("a",{"class" : "joblink"})
for jobs in joblinks:
soup=soup_url(str("http://www.jobs.bg/%s" % jobs['href']))
viewz=get_views(soup)
print "%s %s" % (viewz,jobs.text)
# Company ID from jobs.bg
cid=14495
pages = get_pagenum(str("http://www.jobs.bg/company_profile_view.php?frompage=0&c_sid=%s" % cid))
for i in range(0,int(pages)*20,20):
soup = soup_url(str("http://www.jobs.bg/company_profile_view.php?frompage=%s&c_sid=%s" % (i,cid)))
print_jobs(soup)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment