Skip to content

Instantly share code, notes, and snippets.

@jcrubino
Last active October 13, 2015 00:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrubino/4107933 to your computer and use it in GitHub Desktop.
Save jcrubino/4107933 to your computer and use it in GitHub Desktop.
MarketNeeds for Programming Languages
Rough Estimation on Programming Jobs by Language per Github user claiming primary proficiency in descending order.
Data Sources: Indeed.com & Github
Scroll Down For Code
LANGUAGE Ave Jobs / User
assembly 2.01309674694
visual basic 1.60952380952
matlab 0.645004061738
r 0.524292965917
perl 0.460672614962
coldfusion 0.404006677796
c++ 0.235418596937
c# 0.198516879086
shell 0.160797944688
puppet 0.160337552743
c 0.155728587319
java 0.146827937321
groovy 0.0797171327547
actionscript 0.0555555555556
python 0.0466836990282
javascript 0.0451142342788
scala 0.0434624477178
php 0.0399441118884
erlang 0.0284882511957
clojure 0.026557711951
lua 0.0253580344239
objective-c 0.0220096842611
coffeescript 0.0204039142203
ruby 0.0160951101244
haskell 0.0116519174041
arduino 0.0116033755274
common lisp 0.002920317063
emacs lisp 0.00100502512563
go 0.000727802037846
viml 0.000128501670522
Method Explained:
search indeed for primary languages listed on Github
search 1: "{language} programmer"
search 2: "{language} developer"
search 3: {language}+programming
Retrieve number of job posts per search
calculate mean number of posts per language
calculate mean job posts divided by users on github claiming proficiency
Last Run Today
with Code Corrections
import urllib2
# todo
# stablize work flow
# slqlitedb integration
# salary estimation
# language market share
# local search
# reporting
users = [('JavaScript', 129427),
('Ruby', 102702),
('Java', 87472),
('PHP', 71333),
('Python', 66540),
('C', 44051),
('C++', 29141),
('C#', 26206),
('Objective-C', 24989),
('Shell', 13234),
('VimL', 7782),
('Perl', 7285),
('ActionScript', 4548),
('Scala', 3666),
('Lua', 2537),
('Haskell', 2260),
('Groovy', 2074),
('Emacs Lisp', 1990),
('Clojure', 1958),
('Erlang', 1603),
('CoffeeScript', 1601),
('R', 1379),
('Go', 1374),
('Matlab', 1231),
('Visual Basic', 980),
('Common Lisp', 799),
('Puppet', 790),
('Assembly', 789),
('Arduino', 632),
('ColdFusion', 599)]
n_user = {}
for item in users:
n_user[item[0].lower()] = float(item[1])
jobs = [x[0].lower() for x in users]
def query1(Term):
if ' ' in Term:
Term = Term.lower().replace(' ','+')
head = 'http://www.indeed.com/jobs?q='
Term = Term.lower().replace(' ','+')
q = '"'+Term+'+developer"&l='
return head+q
def query2(Term):
if ' ' in Term:
Term = Term.lower().replace(' ','+')
head = 'http://www.indeed.com/jobs?q='
Term = Term.lower().replace(' ','+')
q = '"'+Term+'+programmer"&l='
return head+q
def query3(Term):
if ' ' in Term:
Term = Term.lower().replace(' ','+')
head = 'http://www.indeed.com/jobs?q='
q = Term+'+programming&l='
return head+q
def search(Term):
q1 = query1(Term)
q2 = query2(Term)
q3 = query3(Term)
r1 = urllib2.urlopen(q1).readlines()
r2 = urllib2.urlopen(q2).readlines()
r3 = urllib2.urlopen(q3).readlines()
return r1,r2,r3
def jobs_exist(ResultsList):
chk = countjobs(ResultsList)
if chk[0] == 0:
s = chk[1]
n = int(s.split('</div')[0].split(' ')[-1].replace(',',''))
return n+1
if chk[0] == 1:
return 1
def countjobs(LineList):
for line in LineList:
if '<div id="searchCount">' in line:
return (0,line)
return (1,None)
def job_tally(Lang):
edge_case = {'go':'golang'}
if Lang not in edge_case.keys():
r = search(Lang.lower())
if Lang in edge_case.keys():
r = search(edge_case[Lang.lower()])
jr = map(jobs_exist, r)
jn = sum(jr)/float(len(jr))
if '%23' in Lang:
Lang = Lang.replace('%23','#')
print Lang, jr, jn,'\n'
return (Lang,jn)
if __name__ == "__main__":
book = {}
for job in jobs:
if '#' in job:
job = job.replace('#','%23')
r = job_tally(job)
book[job] = r[1]
raw_jobs = list(sorted(book, key=book.__getitem__, reverse=True))
rbook = {}
ut= [(x[0].lower(),x[1]) for x in users]
xuser= dict(ut)
for job in book.keys():
try:
rbook[job] = book[job]/xuser[job]
except KeyError:
oj = job
nj = job.replace('%23','#')
rbook[nj] = book[job]/xuser[nj.lower()]
final_results = list(sorted(rbook, key=rbook.__getitem__, reverse=True))
for job in final_results:
print job, rbook[job]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment