This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adjust the code to exclude the last column (row sum) in the normalization process | |
# We only use the numerical columns except the last one for division | |
numerical_part_excluding_sum = filtered_matrix.iloc[:, 1:-1] # Exclude the first (string) column and the last (row sum) column | |
row_sums_excluding_last_column = numerical_part_excluding_sum.sum(axis=1) | |
# Divide each cell by the row sum to normalize the rows, excluding the last column | |
normalized_matrix_excluding_sum = numerical_part_excluding_sum.div(row_sums_excluding_last_column, axis=0) | |
# Replace NaN values with 0 | |
normalized_matrix_excluding_sum = normalized_matrix_excluding_sum.fillna(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 各行の合計で各要素を割って推移確率行列を作成 | |
transition_probability_matrix = filtered_matrix.div(filtered_matrix.sum(axis=1), axis=0) | |
# 結果を表示 | |
transition_probability_matrix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 行と列の両方に存在するステーション名を抽出 | |
common_stations = set(transition_matrix.index) & set(transition_matrix.columns) | |
# 行と列の両方に含まれるステーションのみに行列を制限 | |
filtered_matrix = transition_matrix.loc[common_stations, common_stations] | |
# filtered_matrix の各行の合計を計算 | |
row_sums = filtered_matrix.sum(axis=1) | |
# 行和が0でないステーションのリストを取得 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ステーション間のトリップの回数をカウント | |
transition_counts = df.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='transition_count') | |
# 推移回数行列の作成 | |
transition_matrix = transition_counts.pivot_table(index='start_station_name', columns='end_station_name', values='transition_count', fill_value=0) | |
# 結果を表示(上位の行のみ) | |
transition_matrix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import folium | |
import matplotlib.pyplot as plt | |
import matplotlib.colors as mcolors | |
# 利用回数に応じた色の設定 | |
def get_color(usage, max_usage): | |
norm = plt.Normalize(aggregated_data['Total Usage'].min(), max_usage) | |
cmap = plt.cm.Reds #Reds | |
rgb = cmap(norm(usage))[:3] | |
return mcolors.rgb2hex(rgb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# 列名のリスト | |
columns = ['Electric Bike Count', 'Classic Bike Count', 'Start Count', 'End Count', 'Member Count', 'Casual Count'] | |
# 相関係数を計算 | |
correlation_matrix = aggregated_data[columns].corr() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
from itertools import combinations | |
# 列名のリスト | |
columns = ['Electric Bike Count', 'Classic Bike Count', 'Start Count', 'End Count', 'Member Count', 'Casual Count'] | |
# 全ての組み合わせの散布図を描画する | |
for col1, col2 in combinations(columns, 2): | |
plt.figure(figsize=(8, 5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = divvy_tripdata | |
# Recalculating electric and classic bike counts as actual counts instead of rates | |
electric_bike_start_counts = df[df['rideable_type'] == 'electric_bike'].groupby('start_station_name')['ride_id'].count() | |
classic_bike_start_counts = df[df['rideable_type'] == 'classic_bike'].groupby('start_station_name')['ride_id'].count() | |
# Electric and classic bike counts at both start and end stations | |
electric_bike_end_counts = df[df['rideable_type'] == 'electric_bike'].groupby('end_station_name')['ride_id'].count() | |
classic_bike_end_counts = df[df['rideable_type'] == 'classic_bike'].groupby('end_station_name')['ride_id'].count() | |
# Combining start and end counts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Recalculating electric and classic bike counts as actual counts instead of rates | |
electric_bike_start_counts = df[df['rideable_type'] == 'electric_bike'].groupby('start_station_name')['ride_id'].count() | |
classic_bike_start_counts = df[df['rideable_type'] == 'classic_bike'].groupby('start_station_name')['ride_id'].count() | |
# Recalculating member and casual counts as actual counts instead of rates | |
member_counts = df[df['member_casual'] == 'member'].groupby('start_station_name')['ride_id'].count() | |
casual_counts = df[df['member_casual'] == 'casual'].groupby('start_station_name')['ride_id'].count() | |
# Total start and end counts for each station | |
start_counts = df.groupby('start_station_name')['ride_id'].count() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculating the total number of rides for each station and member type combination | |
member_counts = df[df['member_casual'] == 'member'].groupby('start_station_name')['ride_id'].count() | |
casual_counts = df[df['member_casual'] == 'casual'].groupby('start_station_name')['ride_id'].count() | |
# Combining the counts for each station | |
combined_member_counts = member_counts.add(casual_counts, fill_value=0) | |
# Sorting stations by total rides and selecting the top stations | |
sorted_combined_member_counts = combined_member_counts.sort_values(ascending=False) | |
cumulative_percentage_members = sorted_combined_member_counts.cumsum() / sorted_combined_member_counts.sum() |