Skip to content

Instantly share code, notes, and snippets.

@Ze1598
Created January 24, 2021 15:26
Show Gist options
  • Save Ze1598/88678ed16829f0951450ab563df13d32 to your computer and use it in GitHub Desktop.
Save Ze1598/88678ed16829f0951450ab563df13d32 to your computer and use it in GitHub Desktop.
Python Data Analysis Part 1: Age of Respondents
import pandas as pd
from os import getcwd, path
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()
path_to_data = path.join(getcwd(), "data", "survey_results_public.csv")
data = pd.read_csv(path_to_data)
data = data[["Age"]]
fig = px.scatter(data, y="Age")
fig.show()
data = data.query("(Age >= 10) and (Age <= 75)")
print(f"Rows left: {data.shape[0]:,}")
fig = px.scatter(data, y="Age")
fig.show()
is_integer = lambda row: int(row["Age"]) == row["Age"]
data = data[data.apply(is_integer, axis="columns")]
print(f"Rows left: {data.shape[0]:,}")
data = data.dropna(axis="rows", how="any", subset=["Age"])
print(f"Rows left: {data.shape[0]:,}")
age_counts = data["Age"].value_counts()
fig = px.bar(age_counts, title="Age of respondents")
fig.update_layout(
xaxis_title = "Age",
yaxis_title = "Frequency",
title_x = 0.5,
showlegend = False
)
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment