Skip to content

Instantly share code, notes, and snippets.

@Ze1598
Last active January 24, 2021 17:39
Show Gist options
  • Save Ze1598/1eaabe8c91ed1bcbdb46f0e9b9ca1c4a to your computer and use it in GitHub Desktop.
Save Ze1598/1eaabe8c91ed1bcbdb46f0e9b9ca1c4a to your computer and use it in GitHub Desktop.
Python Data Analysis Part 2: Annual Compensation
import pandas as pd
from os import getcwd, path
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()
path_to_data = path.join(getcwd(), "data", "survey_results_public.csv")
data = pd.read_csv(path_to_data)
data = data[["ConvertedComp"]]
data = data[(data["ConvertedComp"] >= 0) & (data["ConvertedComp"] <= 200_000)]
print(f"Rows left: {data.shape[0]:,}")
bin_labels = [
f"[{int(i / 1_000):,}K, {int((i + 15_000) / 1_000):,}K)"
for i in range(0, 200_001, 15_000)
]
compensation_bins = pd.IntervalIndex.from_tuples(
[
(i, i + 15_000)
for i in range(0, 200_001, 15_000)
],
closed="left"
)
data = pd.cut(
data["ConvertedComp"],
compensation_bins,
precision=0,
include_lowest=True
)
data.sort_values(inplace=True)
data = data.astype("str")
fig = px.histogram(
data,
title="Annual Compensation (USD)",
)
fig.update_layout(
xaxis = {
"tickmode": "array",
"tickvals": data.unique(),
"ticktext": bin_labels
},
xaxis_title = "Annual Compensation",
yaxis_title = "Frequency",
title_x = 0.5,
bargap = 0,
showlegend = False
)
fig.update_traces(
marker = {
"line": {
"width": 2,
"color": "DarkSlateGrey"
}
}
)
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment