This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
# Excluding the target variable 'Diabetes_binary' for PCA | |
features = data.drop('Diabetes_binary', axis=1) | |
# Scaling the data | |
scaler = StandardScaler() | |
scaled_features = scaler.fit_transform(features) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Re-importing necessary libraries and reloading the data | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import itertools | |
# Function to determine if a column is binary | |
def is_binary(column): | |
return sorted(column.unique()) == [0, 1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Identifying binary columns in the dataset | |
binary_columns = [col for col in data.columns if is_binary(data[col])] | |
# Creating combinations of binary and non-binary numeric columns for box plots | |
binary_non_binary_combinations = list(itertools.product(binary_columns, non_binary_numeric_columns)) | |
# Setting up the plotting grid for these combinations | |
n_plots = len(binary_non_binary_combinations) | |
n_cols = 3 # Number of columns per row | |
n_rows = (n_plots + n_cols - 1) // n_cols # Calculating the required number of rows |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Re-importing necessary libraries and reloading the data | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import itertools | |
# Function to determine if a column is binary | |
def is_binary(column): | |
return sorted(column.unique()) == [0, 1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Set the aesthetic style of the plots | |
sns.set_style("whitegrid") | |
# Selecting a subset of columns for plotting histograms | |
# Excluding binary columns for more meaningful histograms | |
hist_columns = [ | |
"Diabetes_binary", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Set the aesthetic style of the plots | |
sns.set_style("whitegrid") | |
# Selecting a subset of columns for plotting histograms | |
# Excluding binary columns for more meaningful histograms | |
hist_columns = ['BMI', 'Age', 'GenHlth', 'MentHlth', 'PhysHlth', 'Education', 'Income'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Load the dataset | |
file_path = '/content/drive/MyDrive/研究/糖尿病/Diabetes Health Indicators Dataset/diabetes_binary_health_indicators_BRFSS2015.csv' | |
data = pd.read_csv(file_path) | |
# Display the first few rows of the dataset | |
data | |
# Checking data types and unique values for each column |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Aligning the station names exactly with the transition probability matrix, excluding any extra stations | |
# First, we adjust the list of station names to exclude the extra station (if any) | |
aligned_stations = set(transition_probability_matrix.columns) - {'start_station_name'} | |
# Filter the station_stats dataframe to include only those rows where both the start and end stations are in the aligned_stations set | |
filtered_station_stats = station_stats[ | |
(station_stats['start_station_name'].isin(aligned_stations)) & | |
(station_stats['end_station_name'].isin(aligned_stations)) | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = divvy_tripdata | |
# 2. Convert 'started_at' and 'ended_at' to datetime | |
df['started_at'] = pd.to_datetime(df['started_at']) | |
df['ended_at'] = pd.to_datetime(df['ended_at']) | |
# 3. Calculate the travel time for each trip | |
df['travel_time'] = df['ended_at'] - df['started_at'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Set the size of the heatmap | |
plt.figure(figsize=(20, 15)) | |
# Adjust the heatmap color mapping to use red for high values and white for low values | |
plt.figure(figsize=(20, 15)) | |
sns.heatmap(transition_probability_matrix.iloc[:, 1:], cmap='Reds') |