Skip to content

Instantly share code, notes, and snippets.

@jlumbroso
Created August 15, 2020 20:38
Show Gist options
  • Save jlumbroso/50afaa12d8af8dac615331d515f0f0ff to your computer and use it in GitHub Desktop.
Save jlumbroso/50afaa12d8af8dac615331d515f0f0ff to your computer and use it in GitHub Desktop.
Minimum reproducible code for a Pandas question on how to plot time series with missing data
import random
import pandas as pd
# Generate a random time series
def gapped_time_series(start=None, span=datetime.timedelta(days=30)):
if start is None:
start = datetime.datetime.now()
series = []
now = start
while now < start + span:
series.append({
"timestamp": now,
"inbound": random.choice([True, False]),
})
now += datetime.timedelta(days=random.gammavariate(0.7, 2.0))
return pd.DataFrame(series)
# Create a sample data frame
df = gapped_time_series()
# Plot the number of events by day
# First group and split by the value of inbound
c_sub_df1 = df[df["inbound"] == True].groupby([
df["timestamp"].dt.year, df["timestamp"].dt.month, df["timestamp"].dt.day
])["inbound"].count()
c_sub_df1.index.names = ["year", "month", "day"]
c_sub_df2 = df[df["inbound"] == False].groupby([
df["timestamp"].dt.year, df["timestamp"].dt.month, df["timestamp"].dt.day
])["inbound"].count()
c_sub_df2.index.names = ["year", "month", "day"]
# Combine on a stack graph
c_sub_df = pd.merge(c_sub_df1, c_sub_df2, how="outer", on=["year", "month", "day"])
c_sub_df.rename(columns={"inbound_x": "received", "inbound_y": "sent"}, inplace=True)
ax = c_sub_df.plot(kind='bar', stacked=True,
figsize=[12,5], title="Number of events (by direction)")
_ = ax.set_xlabel("(year, month, day)")
_ = ax.get_figure().savefig("events_aggregate_count.pdf")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment