Last active
October 13, 2015 00:07
-
-
Save jcrubino/4107933 to your computer and use it in GitHub Desktop.
MarketNeeds for Programming Languages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rough Estimation on Programming Jobs by Language per Github user claiming primary proficiency in descending order. | |
Data Sources: Indeed.com & Github | |
Scroll Down For Code | |
LANGUAGE Ave Jobs / User | |
assembly 2.01309674694 | |
visual basic 1.60952380952 | |
matlab 0.645004061738 | |
r 0.524292965917 | |
perl 0.460672614962 | |
coldfusion 0.404006677796 | |
c++ 0.235418596937 | |
c# 0.198516879086 | |
shell 0.160797944688 | |
puppet 0.160337552743 | |
c 0.155728587319 | |
java 0.146827937321 | |
groovy 0.0797171327547 | |
actionscript 0.0555555555556 | |
python 0.0466836990282 | |
javascript 0.0451142342788 | |
scala 0.0434624477178 | |
php 0.0399441118884 | |
erlang 0.0284882511957 | |
clojure 0.026557711951 | |
lua 0.0253580344239 | |
objective-c 0.0220096842611 | |
coffeescript 0.0204039142203 | |
ruby 0.0160951101244 | |
haskell 0.0116519174041 | |
arduino 0.0116033755274 | |
common lisp 0.002920317063 | |
emacs lisp 0.00100502512563 | |
go 0.000727802037846 | |
viml 0.000128501670522 | |
Method Explained: | |
search indeed for primary languages listed on Github | |
search 1: "{language} programmer" | |
search 2: "{language} developer" | |
search 3: {language}+programming | |
Retrieve number of job posts per search | |
calculate mean number of posts per language | |
calculate mean job posts divided by users on github claiming proficiency | |
Last Run Today | |
with Code Corrections |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
# todo | |
# stablize work flow | |
# slqlitedb integration | |
# salary estimation | |
# language market share | |
# local search | |
# reporting | |
users = [('JavaScript', 129427), | |
('Ruby', 102702), | |
('Java', 87472), | |
('PHP', 71333), | |
('Python', 66540), | |
('C', 44051), | |
('C++', 29141), | |
('C#', 26206), | |
('Objective-C', 24989), | |
('Shell', 13234), | |
('VimL', 7782), | |
('Perl', 7285), | |
('ActionScript', 4548), | |
('Scala', 3666), | |
('Lua', 2537), | |
('Haskell', 2260), | |
('Groovy', 2074), | |
('Emacs Lisp', 1990), | |
('Clojure', 1958), | |
('Erlang', 1603), | |
('CoffeeScript', 1601), | |
('R', 1379), | |
('Go', 1374), | |
('Matlab', 1231), | |
('Visual Basic', 980), | |
('Common Lisp', 799), | |
('Puppet', 790), | |
('Assembly', 789), | |
('Arduino', 632), | |
('ColdFusion', 599)] | |
n_user = {} | |
for item in users: | |
n_user[item[0].lower()] = float(item[1]) | |
jobs = [x[0].lower() for x in users] | |
def query1(Term): | |
if ' ' in Term: | |
Term = Term.lower().replace(' ','+') | |
head = 'http://www.indeed.com/jobs?q=' | |
Term = Term.lower().replace(' ','+') | |
q = '"'+Term+'+developer"&l=' | |
return head+q | |
def query2(Term): | |
if ' ' in Term: | |
Term = Term.lower().replace(' ','+') | |
head = 'http://www.indeed.com/jobs?q=' | |
Term = Term.lower().replace(' ','+') | |
q = '"'+Term+'+programmer"&l=' | |
return head+q | |
def query3(Term): | |
if ' ' in Term: | |
Term = Term.lower().replace(' ','+') | |
head = 'http://www.indeed.com/jobs?q=' | |
q = Term+'+programming&l=' | |
return head+q | |
def search(Term): | |
q1 = query1(Term) | |
q2 = query2(Term) | |
q3 = query3(Term) | |
r1 = urllib2.urlopen(q1).readlines() | |
r2 = urllib2.urlopen(q2).readlines() | |
r3 = urllib2.urlopen(q3).readlines() | |
return r1,r2,r3 | |
def jobs_exist(ResultsList): | |
chk = countjobs(ResultsList) | |
if chk[0] == 0: | |
s = chk[1] | |
n = int(s.split('</div')[0].split(' ')[-1].replace(',','')) | |
return n+1 | |
if chk[0] == 1: | |
return 1 | |
def countjobs(LineList): | |
for line in LineList: | |
if '<div id="searchCount">' in line: | |
return (0,line) | |
return (1,None) | |
def job_tally(Lang): | |
edge_case = {'go':'golang'} | |
if Lang not in edge_case.keys(): | |
r = search(Lang.lower()) | |
if Lang in edge_case.keys(): | |
r = search(edge_case[Lang.lower()]) | |
jr = map(jobs_exist, r) | |
jn = sum(jr)/float(len(jr)) | |
if '%23' in Lang: | |
Lang = Lang.replace('%23','#') | |
print Lang, jr, jn,'\n' | |
return (Lang,jn) | |
if __name__ == "__main__": | |
book = {} | |
for job in jobs: | |
if '#' in job: | |
job = job.replace('#','%23') | |
r = job_tally(job) | |
book[job] = r[1] | |
raw_jobs = list(sorted(book, key=book.__getitem__, reverse=True)) | |
rbook = {} | |
ut= [(x[0].lower(),x[1]) for x in users] | |
xuser= dict(ut) | |
for job in book.keys(): | |
try: | |
rbook[job] = book[job]/xuser[job] | |
except KeyError: | |
oj = job | |
nj = job.replace('%23','#') | |
rbook[nj] = book[job]/xuser[nj.lower()] | |
final_results = list(sorted(rbook, key=rbook.__getitem__, reverse=True)) | |
for job in final_results: | |
print job, rbook[job] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment