Skip to content

Instantly share code, notes, and snippets.

@rosslazer
Created March 24, 2023 18:19
Show Gist options
  • Save rosslazer/c2e6252afcddadc18a9a10f56f5bf8f2 to your computer and use it in GitHub Desktop.
Save rosslazer/c2e6252afcddadc18a9a10f56f5bf8f2 to your computer and use it in GitHub Desktop.
Caltrain on time performance analysis
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
# Read data from CSV
file = "otp.csv"
data = pd.read_csv(file)
## Convert 'Month/Year' column to datetime format
data['Month/Year'] = pd.to_datetime(data['Month/Year'], format='%b-%y')
# Calculate monthly metrics
monthly_data = data.pivot_table(index='Month/Year', columns='Measure Names', values='Measure Values', aggfunc='sum')
monthly_data['Average OTP%'] = monthly_data['Train OTP%'] / (monthly_data['On Time Train Count'] + monthly_data['Total Late Train'])
# Analyze trends over time
yearly_data = monthly_data.resample('Y').mean()
# Visualize the data
fig, ax1 = plt.subplots(figsize=(10, 6))
# Plot average OTP% over time
ax1.plot(monthly_data.index, monthly_data['Average OTP%'], label='Average OTP%', color='blue')
ax1.set_xlabel('Month/Year')
ax1.set_ylabel('Average OTP%', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')
ax2 = ax1.twinx()
# Plot total late trains over time
ax2.plot(monthly_data.index, monthly_data['Total Late Train'], label='Total Late Train', color='red')
ax2.set_ylabel('Total Late Train', color='red')
ax2.tick_params(axis='y', labelcolor='red')
# Set title and grid
plt.title('Caltrain On-Time Performance Analysis')
ax1.grid(axis='both', linestyle='--', alpha=0.7)
# Add legend
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines = lines1 + lines2
labels = labels1 + labels2
ax1.legend(lines, labels, loc='best')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment