Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Get the hourly wages of machine learning contractors on oDesk
# John Horton
# Description: Answer to Quora question about machine learning hourly rates
# ""
from BeautifulSoup import BeautifulSoup
import urllib2
def contractors(skill, offset):
"""gets search results for skills; offset should be a multiple of 10"""
base_url = ""
return base_url % (skill, offset)
def get_wage(x):
"""extracts the hourly wage from the returned HTML;
verbose because John sucks at regular expressions """
return float(x.split(">")[1].split("<")[0].replace("$","").replace("/hr",""))
def wages(skill, n):
"""gets at least n contractors (if they are available) who have that skill,
returning a list"""
pages = n / 10 + 1
wages = []
for i in range(pages):
url = contractors(skill, 10*i)
f = urllib2.urlopen(url)
soup = BeautifulSoup(f)
for r in range(1,10):
x = soup.findAll(attrs={"name" : "rate_%s" % r})
return wages
# there were a couple of false positives (we're working on this)
# so I excluded everyone listing less than $15/hour
cleaned_wages = [w for w in wages("machine-learning", 30) if w > 15]
print """
Min: %s
Max: %s
Mean: %s""" % (min(cleaned_wages),
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment