Skip to content

Instantly share code, notes, and snippets.

@abkosar
Created May 28, 2016 19:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abkosar/c583ae4c59a0da7b3e82b65fc9678417 to your computer and use it in GitHub Desktop.
Save abkosar/c583ae4c59a0da7b3e82b65fc9678417 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
%pylab inline
import matplotlib.pyplot as plt
import seaborn as sns
jobs_data_science['job_title'].value_counts()
jobs_data_science[jobs_data_science.job_title.str.contains("Big Data")==True]
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('Big Data.*' , 'Big Data Engineer')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('.*Data Sci.*' , 'Data Scientist')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('Data An.*' , 'Data Analyst')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('.*Data An.*' , 'Data Analyst')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('Data Arc.*' , 'Data Architect')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('.*Data Arc.*' , 'Data Architect')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('Data Mod.*' , 'Data Modeler')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('.*Data Mod.*' , 'Data Modeler')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('Business An.*' , 'Business Analyst')
jobs_data_science['job_title'] = jobs_data_science['job_title'].str.replace('.*Business An.*' , 'Business Analyst')
job_name_freq = jobs_data_science['job_title'].value_counts()
job_name_freq = pd.DataFrame(job_name_freq)
job_name_freq['job_title_name'] = job_name_freq.index
job_name_freq.job_title_name = job_name_freq.job_title_name.str.strip()
job_name_freq['job_title_freq'] = job_name_freq.job_title / len(jobs_data_science.job_title)
job_name_freq = job_name_freq.job_title[job_name_freq.job_title > 3]
job_name_freq = pd.DataFrame(job_name_freq)
job_name_freq['job_title_name'] = job_name_freq.index
job_name_freq.job_title_name = job_name_freq.job_title_name.str.strip()
job_name_freq['job_title_freq'] = job_name_freq.job_title / len(jobs_data_science.job_title)
job_name_freq
g = sns.barplot(x = "job_title_name", y="job_title_freq", data=job_name_freq, color = "salmon")
g.set_title("Number of Unique Job Titles",fontsize=25)
g.set_xlabel('Job Title', fontsize = 15)
g.set_ylabel('Frequency', fontsize = 15)
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
plt.rcParams['figure.figsize']=12,6
plt.savefig('freq_of_unique_titles.png', orientation = 'landscape')
plt.tick_params(axis = ['x', 'y'], labelsize = 5, width = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment