Skip to content

Instantly share code, notes, and snippets.

@smzn
smzn / gist:0e6cf3771b5c407db4d244ed632eb885
Created January 8, 2024 02:01
推移確率行列の作成
# Adjust the code to exclude the last column (row sum) in the normalization process
# We only use the numerical columns except the last one for division
numerical_part_excluding_sum = filtered_matrix.iloc[:, 1:-1] # Exclude the first (string) column and the last (row sum) column
row_sums_excluding_last_column = numerical_part_excluding_sum.sum(axis=1)
# Divide each cell by the row sum to normalize the rows, excluding the last column
normalized_matrix_excluding_sum = numerical_part_excluding_sum.div(row_sums_excluding_last_column, axis=0)
# Replace NaN values with 0
normalized_matrix_excluding_sum = normalized_matrix_excluding_sum.fillna(0)
@smzn
smzn / gist:0e446b57c012b5214c47ff41ec25fedb
Created January 7, 2024 01:28
各行の合計で各要素を割って推移確率行列を作成
# 各行の合計で各要素を割って推移確率行列を作成
transition_probability_matrix = filtered_matrix.div(filtered_matrix.sum(axis=1), axis=0)
# 結果を表示
transition_probability_matrix
@smzn
smzn / gist:3d17c9562b22dc7dcc9758544f58b4f3
Last active January 8, 2024 01:44
行と列の両方に含まれるステーションのみに行列を制限
# 行と列の両方に存在するステーション名を抽出
common_stations = set(transition_matrix.index) & set(transition_matrix.columns)
# 行と列の両方に含まれるステーションのみに行列を制限
filtered_matrix = transition_matrix.loc[common_stations, common_stations]
# filtered_matrix の各行の合計を計算
row_sums = filtered_matrix.sum(axis=1)
# 行和が0でないステーションのリストを取得
@smzn
smzn / gist:1d81c76f02b43e49bf6e99e5becfa7b2
Created January 7, 2024 01:20
ステーション間のトリップの回数をカウント
# ステーション間のトリップの回数をカウント
transition_counts = df.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='transition_count')
# 推移回数行列の作成
transition_matrix = transition_counts.pivot_table(index='start_station_name', columns='end_station_name', values='transition_count', fill_value=0)
# 結果を表示(上位の行のみ)
transition_matrix
@smzn
smzn / gist:9257edcb0a77ff5884b124dfa7fd78b3
Created January 7, 2024 01:00
ステーションの地図表示
import folium
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
# 利用回数に応じた色の設定
def get_color(usage, max_usage):
norm = plt.Normalize(aggregated_data['Total Usage'].min(), max_usage)
cmap = plt.cm.Reds #Reds
rgb = cmap(norm(usage))[:3]
return mcolors.rgb2hex(rgb)
@smzn
smzn / gist:fdeacccb3f92582e37dfe4326d188e62
Created January 6, 2024 08:42
相関係数を計算
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# 列名のリスト
columns = ['Electric Bike Count', 'Classic Bike Count', 'Start Count', 'End Count', 'Member Count', 'Casual Count']
# 相関係数を計算
correlation_matrix = aggregated_data[columns].corr()
@smzn
smzn / gist:5b0182ff7c313c69a71aca2401ce76ab
Created January 6, 2024 08:37
全ての組み合わせの散布図を描画する
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
# 列名のリスト
columns = ['Electric Bike Count', 'Classic Bike Count', 'Start Count', 'End Count', 'Member Count', 'Casual Count']
# 全ての組み合わせの散布図を描画する
for col1, col2 in combinations(columns, 2):
plt.figure(figsize=(8, 5))
df = divvy_tripdata
# Recalculating electric and classic bike counts as actual counts instead of rates
electric_bike_start_counts = df[df['rideable_type'] == 'electric_bike'].groupby('start_station_name')['ride_id'].count()
classic_bike_start_counts = df[df['rideable_type'] == 'classic_bike'].groupby('start_station_name')['ride_id'].count()
# Electric and classic bike counts at both start and end stations
electric_bike_end_counts = df[df['rideable_type'] == 'electric_bike'].groupby('end_station_name')['ride_id'].count()
classic_bike_end_counts = df[df['rideable_type'] == 'classic_bike'].groupby('end_station_name')['ride_id'].count()
# Combining start and end counts
@smzn
smzn / gist:ea89412680313acbdb5779a0e9f4923f
Last active January 4, 2024 03:46
Calculating required statistics for each station
# Recalculating electric and classic bike counts as actual counts instead of rates
electric_bike_start_counts = df[df['rideable_type'] == 'electric_bike'].groupby('start_station_name')['ride_id'].count()
classic_bike_start_counts = df[df['rideable_type'] == 'classic_bike'].groupby('start_station_name')['ride_id'].count()
# Recalculating member and casual counts as actual counts instead of rates
member_counts = df[df['member_casual'] == 'member'].groupby('start_station_name')['ride_id'].count()
casual_counts = df[df['member_casual'] == 'casual'].groupby('start_station_name')['ride_id'].count()
# Total start and end counts for each station
start_counts = df.groupby('start_station_name')['ride_id'].count()
@smzn
smzn / gist:65facb5b539df9ba165ca0cd1c4cc1b8
Created January 4, 2024 03:16
the total number of rides for each station and member type combination
# Calculating the total number of rides for each station and member type combination
member_counts = df[df['member_casual'] == 'member'].groupby('start_station_name')['ride_id'].count()
casual_counts = df[df['member_casual'] == 'casual'].groupby('start_station_name')['ride_id'].count()
# Combining the counts for each station
combined_member_counts = member_counts.add(casual_counts, fill_value=0)
# Sorting stations by total rides and selecting the top stations
sorted_combined_member_counts = combined_member_counts.sort_values(ascending=False)
cumulative_percentage_members = sorted_combined_member_counts.cumsum() / sorted_combined_member_counts.sum()