Created
January 6, 2024 08:29
-
-
Save smzn/d941aecf83cdfdbd8108aa7a084e84c3 to your computer and use it in GitHub Desktop.
aggregation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = divvy_tripdata | |
# Recalculating electric and classic bike counts as actual counts instead of rates | |
electric_bike_start_counts = df[df['rideable_type'] == 'electric_bike'].groupby('start_station_name')['ride_id'].count() | |
classic_bike_start_counts = df[df['rideable_type'] == 'classic_bike'].groupby('start_station_name')['ride_id'].count() | |
# Electric and classic bike counts at both start and end stations | |
electric_bike_end_counts = df[df['rideable_type'] == 'electric_bike'].groupby('end_station_name')['ride_id'].count() | |
classic_bike_end_counts = df[df['rideable_type'] == 'classic_bike'].groupby('end_station_name')['ride_id'].count() | |
# Combining start and end counts | |
electric_bike_counts = electric_bike_start_counts.add(electric_bike_end_counts, fill_value=0) | |
classic_bike_counts = classic_bike_start_counts.add(classic_bike_end_counts, fill_value=0) | |
# Recalculating member and casual counts as actual counts instead of rates | |
member_counts = df[df['member_casual'] == 'member'].groupby('start_station_name')['ride_id'].count() | |
casual_counts = df[df['member_casual'] == 'casual'].groupby('start_station_name')['ride_id'].count() | |
# Member and casual counts at end stations | |
member_end_counts = df[df['member_casual'] == 'member'].groupby('end_station_name')['ride_id'].count() | |
casual_end_counts = df[df['member_casual'] == 'casual'].groupby('end_station_name')['ride_id'].count() | |
# Combining start and end counts for members and casual users | |
member_counts = member_counts.add(member_end_counts, fill_value=0) | |
casual_counts = casual_counts.add(casual_end_counts, fill_value=0) | |
# Total start and end counts for each station | |
start_counts = df.groupby('start_station_name')['ride_id'].count() | |
end_counts = df.groupby('end_station_name')['ride_id'].count() | |
# Average latitude and longitude for each start station | |
average_lat = df.groupby('start_station_name')['start_lat'].mean() | |
average_lng = df.groupby('start_station_name')['start_lng'].mean() | |
# Average latitude and longitude for each end station | |
average_lat_end = df.groupby('end_station_name')['end_lat'].mean() | |
average_lng_end = df.groupby('end_station_name')['end_lng'].mean() | |
# Combining start and end station latitude and longitude | |
average_lat = average_lat.add(average_lat_end, fill_value=0) / 2 | |
average_lng = average_lng.add(average_lng_end, fill_value=0) / 2 | |
# Combining the recalculated counts into a single DataFrame | |
aggregated_data = pd.DataFrame({ | |
'Electric Bike Count': electric_bike_counts, | |
'Classic Bike Count': classic_bike_counts, | |
'Start Count': start_counts, | |
'End Count': end_counts, | |
'Member Count': member_counts, | |
'Casual Count': casual_counts, | |
'Average Latitude': average_lat, | |
'Average Longitude': average_lng | |
}) | |
# Filling NaN values with 0 | |
aggregated_data = aggregated_data.fillna(0) | |
# 整数値に変換する列を指定 | |
columns_to_convert = ['Electric Bike Count', 'Classic Bike Count', | |
'Start Count', 'End Count', | |
'Member Count', 'Casual Count'] | |
# 指定された列を整数値に変換 | |
for column in columns_to_convert: | |
aggregated_data[column] = aggregated_data[column].astype(int) | |
# Displaying the first few rows of the new aggregated data | |
aggregated_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment