Last active
March 13, 2023 19:02
-
-
Save Ze1598/b8c5091d62ec9545ef277c03d811feab to your computer and use it in GitHub Desktop.
freeCodeCamp grouped bar chart visualization: data pre-processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
# Load the CSV (load date data as proper date types) | |
df = pd.read_csv("page_views.csv") | |
df["date"] = pd.to_datetime(df["date"]) | |
# Sort the DF from oldest to most recent recordings | |
df.sort_values(by="date", inplace=True) | |
# Use the column of dates as the DF's index | |
df.set_index(["date"], inplace=True) | |
# Remove possible outliers (i.e, top and bottom 2.5 percentiles) | |
df = df[ | |
(df["page_views"] > df["page_views"].quantile(0.025)) & | |
(df["page_views"] < df["page_views"].quantile(0.975)) | |
] | |
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"] | |
# Create a column that has the year of each date recording | |
df["year"] = df.index.year | |
# Create a column that has the month (1-12) of each date recording | |
df["month"] = df.index.month | |
# Map the month integers to their proper names | |
df["month"] = df["month"].apply( | |
lambda data: months[data-1] | |
) | |
# Make this a categorical column so it can be sorted by the order of values\ | |
# in the `months` list, i.e., the proper month order | |
df["month"] = pd.Categorical(df["month"], categories=months) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment