Get the hourly wages of machine learning contractors on oDesk

  • Download Gist
get_machine_learning_wages_on_odesk.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
# John Horton
# www.john-joseph-horton.com
 
# Description: Answer to Quora question about machine learning hourly rates
# "http://www.quora.com/Machine-Learning/What-do-contractors-in-machine-learning-charge-by-the-hour"
 
from BeautifulSoup import BeautifulSoup
import urllib2
 
def contractors(skill, offset):
"""gets search results for skills; offset should be a multiple of 10"""
base_url = "https://www.odesk.com/contractors?nbs=1&q=%s&skip=%s"
return base_url % (skill, offset)
 
def get_wage(x):
"""extracts the hourly wage from the returned HTML;
verbose because John sucks at regular expressions """
return float(x.split(">")[1].split("<")[0].replace("$","").replace("/hr",""))
 
def wages(skill, n):
"""gets at least n contractors (if they are available) who have that skill,
returning a list"""
pages = n / 10 + 1
wages = []
for i in range(pages):
url = contractors(skill, 10*i)
f = urllib2.urlopen(url)
soup = BeautifulSoup(f)
for r in range(1,10):
x = soup.findAll(attrs={"name" : "rate_%s" % r})
wages.append(get_wage(str(x[0])))
return wages
 
# there were a couple of false positives (we're working on this)
# so I excluded everyone listing less than $15/hour
cleaned_wages = [w for w in wages("machine-learning", 30) if w > 15]
 
print """
Min: %s
Max: %s
Mean: %s""" % (min(cleaned_wages),
max(cleaned_wages),
round(sum(cleaned_wages)/float(len(cleaned_wages),2)
))

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.