Skip to content

Instantly share code, notes, and snippets.

@Ze1598
Last active January 24, 2021 21:19
Show Gist options
  • Save Ze1598/af6f667405f323f19ec32692bcc05caf to your computer and use it in GitHub Desktop.
Save Ze1598/af6f667405f323f19ec32692bcc05caf to your computer and use it in GitHub Desktop.
Python Data Analysis Part 4: Programming Languages
import pandas as pd
from os import getcwd, path
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()
path_to_data = path.join(getcwd(), "data", "survey_results_public.csv")
data = pd.read_csv(path_to_data)
data = data[["LanguageWorkedWith"]]
data = data.dropna()
split_languages = data["LanguageWorkedWith"]\
.apply(lambda languages_row: pd.Series(languages_row.split(";")))
languages = split_languages.stack()
languages = languages.reset_index()
languages.columns = ["Respondent ID", "Language Reported Order", "Languages"]
languages_counts = languages[["Languages", "Respondent ID"]]\
.groupby(by=["Languages"])\
.count()\
.sort_values("Respondent ID", ascending=False)
languages_counts = languages_counts.head(n=15)
fig = px.bar(
languages_counts,
title="Top 15 Most Used Programming Languages",
)
fig.update_layout(
xaxis_title = "Programming Language",
yaxis_title = "Frequency",
title_x = 0.5,
showlegend = False
)
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment