Skip to content

Instantly share code, notes, and snippets.

@andyljones
Created August 1, 2015 10:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andyljones/57595f0a13113da881a5 to your computer and use it in GitHub Desktop.
Save andyljones/57595f0a13113da881a5 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
To use this, drop the file
'Full Results - Stack Overflow Developer Survey - 2015.csv'
from
https://drive.google.com/file/d/0Bzd_CzYvUxE5U1NSWnA2SFVKX00/view
into the same directory, then run the script.
"""
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
POUNDS_PER_DOLLAR = 0.64
data_path = 'Full Results - Stack Overflow Developer Survey - 2015.csv'
data = pd.read_csv(data_path, skiprows=1)
data = data[(data['Country'] == 'United Kingdom') & (data['Employment Status'] == 'Employed full-time')]
occupation_and_compensation = data[['Compensation', 'Occupation']].dropna()
salary_map = {'Rather not say': sp.nan,
'Unemployed': 0,
'Less than $20,000': 10000,
'$20,000 - $40,000': 30000,
'$40,000 - $60,000': 50000,
'$60,000 - $80,000': 70000,
'$80,000 - $100,000': 90000,
'$100,000 - $120,000': 110000,
'$120,000 - $140,000': 130000,
'$140,000 - $160,000': 150000,
'More than $160,000': 170000}
occupation_and_compensation['Compensation'] = POUNDS_PER_DOLLAR*occupation_and_compensation['Compensation'].apply(salary_map.get)
occupation_and_compensation = occupation_and_compensation.dropna()
compensations = occupation_and_compensation.groupby('Occupation')
def compare_compensations(compensations):
compensations_of_interest = compensations.filter(lambda x: len(x) > 25)
mean_compensation = compensations_of_interest.groupby('Occupation').aggregate(sp.mean)
ax = mean_compensation.sort('Compensation').plot(kind='barh')
ax.set_title('Mean compensation by occupation \nn = {:,}'.format(len(compensations_of_interest)))
ax.set_xticklabels(['{:,}'.format(int(x)) for x in plt.gca().xaxis.get_majorticklocs()])
ax.set_xlabel('Mean compensation (GBP)')
ax.figure.set_size_inches(10, 10)
ax.legend_.remove()
return ax
def job_compensation(compensations, name):
comps_for_job = compensations.get_group(name)['Compensation'].values
comps = sorted(sp.unique(comps_for_job))
counts = [100*(comps_for_job <= c).sum()/float(comps_for_job.size) for c in comps]
plt.plot(comps, counts, label=name)
plt.xlabel('Compensation (GBP)')
plt.ylabel('Cumulative percentage')
plt.title('Cumulative compensation distribution for {}s\nn = {:,}'.format(name.lower(), len(comps_for_job)))
plt.gca().set_xticklabels(['{:,}'.format(int(x)) for x in plt.gca().xaxis.get_majorticklocs()])
plt.gcf().set_size_inches(10, 10)
return plt.gca()
compare_compensations(compensations)
job_compensation(compensations, 'Full-stack web developer')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment