Created
August 15, 2020 20:38
-
-
Save jlumbroso/50afaa12d8af8dac615331d515f0f0ff to your computer and use it in GitHub Desktop.
Minimum reproducible code for a Pandas question on how to plot time series with missing data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import pandas as pd | |
# Generate a random time series | |
def gapped_time_series(start=None, span=datetime.timedelta(days=30)): | |
if start is None: | |
start = datetime.datetime.now() | |
series = [] | |
now = start | |
while now < start + span: | |
series.append({ | |
"timestamp": now, | |
"inbound": random.choice([True, False]), | |
}) | |
now += datetime.timedelta(days=random.gammavariate(0.7, 2.0)) | |
return pd.DataFrame(series) | |
# Create a sample data frame | |
df = gapped_time_series() | |
# Plot the number of events by day | |
# First group and split by the value of inbound | |
c_sub_df1 = df[df["inbound"] == True].groupby([ | |
df["timestamp"].dt.year, df["timestamp"].dt.month, df["timestamp"].dt.day | |
])["inbound"].count() | |
c_sub_df1.index.names = ["year", "month", "day"] | |
c_sub_df2 = df[df["inbound"] == False].groupby([ | |
df["timestamp"].dt.year, df["timestamp"].dt.month, df["timestamp"].dt.day | |
])["inbound"].count() | |
c_sub_df2.index.names = ["year", "month", "day"] | |
# Combine on a stack graph | |
c_sub_df = pd.merge(c_sub_df1, c_sub_df2, how="outer", on=["year", "month", "day"]) | |
c_sub_df.rename(columns={"inbound_x": "received", "inbound_y": "sent"}, inplace=True) | |
ax = c_sub_df.plot(kind='bar', stacked=True, | |
figsize=[12,5], title="Number of events (by direction)") | |
_ = ax.set_xlabel("(year, month, day)") | |
_ = ax.get_figure().savefig("events_aggregate_count.pdf") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment