Created
January 4, 2024 02:47
-
-
Save smzn/1068a34bfd9e40f588afa47ea588cc49 to your computer and use it in GitHub Desktop.
total rides for each station (both start and end)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculating total rides for each station (both start and end) | |
total_rides_per_station = df['start_station_name'].value_counts() + df['end_station_name'].value_counts() | |
# Sorting stations by total rides and selecting the top 20% | |
sorted_total_rides = total_rides_per_station.sort_values(ascending=False) | |
cumulative_percentage_total = sorted_total_rides.cumsum() / sorted_total_rides.sum() | |
top_20_total_stations = sorted_total_rides[cumulative_percentage_total <= 0.20] | |
# Separating the counts for start and end stations for the top 20% | |
top_20_start_counts = df['start_station_name'].value_counts().loc[top_20_total_stations.index] | |
top_20_end_counts = df['end_station_name'].value_counts().loc[top_20_total_stations.index] | |
# Plotting stacked bar chart | |
plt.figure(figsize=(15, 10)) | |
top_20_start_counts.plot(kind='bar', color='skyblue', label='Start Station') | |
top_20_end_counts.plot(kind='bar', color='green', label='End Station', bottom=top_20_start_counts) | |
plt.title('Top 20% Stations by Total Rides (Start and End)') | |
plt.xlabel('Station Name') | |
plt.ylabel('Total Rides') | |
plt.xticks(rotation=90) | |
plt.legend() | |
plt.grid(axis='y') | |
plt.tight_layout() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment