smzn

## gist:82104cac24976ac9f794d4c8ad820168
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Excluding the target variable 'Diabetes_binary' for PCA
features = data.drop('Diabetes_binary', axis=1)

# Scaling the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

## gist:ce6c855970e9fb2079fa3f6f4f350453
# Re-importing necessary libraries and reloading the data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

# Function to determine if a column is binary
def is_binary(column):
    return sorted(column.unique()) == [0, 1]

## gist:8d28fb32dfc37e96b79724642ae3a807
# Identifying binary columns in the dataset
binary_columns = [col for col in data.columns if is_binary(data[col])]

# Creating combinations of binary and non-binary numeric columns for box plots
binary_non_binary_combinations = list(itertools.product(binary_columns, non_binary_numeric_columns))

# Setting up the plotting grid for these combinations
n_plots = len(binary_non_binary_combinations)
n_cols = 3  # Number of columns per row
n_rows = (n_plots + n_cols - 1) // n_cols  # Calculating the required number of rows

## gist:1c731107952d07683549822e2fc255cf
# Re-importing necessary libraries and reloading the data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

# Function to determine if a column is binary
def is_binary(column):
    return sorted(column.unique()) == [0, 1]

## gist:bf80863b48d56fbfa9edad8bd17ef79c
import matplotlib.pyplot as plt
import seaborn as sns

# Set the aesthetic style of the plots
sns.set_style("whitegrid")

# Selecting a subset of columns for plotting histograms
# Excluding binary columns for more meaningful histograms
hist_columns = [
    "Diabetes_binary",

## gist:8d22018b21aed8272e38623d90314631
import matplotlib.pyplot as plt
import seaborn as sns

# Set the aesthetic style of the plots
sns.set_style("whitegrid")

# Selecting a subset of columns for plotting histograms
# Excluding binary columns for more meaningful histograms
hist_columns = ['BMI', 'Age', 'GenHlth', 'MentHlth', 'PhysHlth', 'Education', 'Income']

## gist:b76c874372560060f1df656de905a1f2
import pandas as pd

# Load the dataset
file_path = '/content/drive/MyDrive/研究/糖尿病/Diabetes Health Indicators Dataset/diabetes_binary_health_indicators_BRFSS2015.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data

# Checking data types and unique values for each column

## gist:595463c6a37c92209cf516e2e9590219
# Aligning the station names exactly with the transition probability matrix, excluding any extra stations
# First, we adjust the list of station names to exclude the extra station (if any)
aligned_stations = set(transition_probability_matrix.columns) - {'start_station_name'}

# Filter the station_stats dataframe to include only those rows where both the start and end stations are in the aligned_stations set
filtered_station_stats = station_stats[
    (station_stats['start_station_name'].isin(aligned_stations)) &
    (station_stats['end_station_name'].isin(aligned_stations))
]

## gist:1682054dbb07d007ad1d38d2e7abd261
import pandas as pd

df = divvy_tripdata

# 2. Convert 'started_at' and 'ended_at' to datetime
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])

# 3. Calculate the travel time for each trip
df['travel_time'] = df['ended_at'] - df['started_at']

## gist:b79f015a55e4324e45c92be46b873d9f
import matplotlib.pyplot as plt
import seaborn as sns

# Set the size of the heatmap
plt.figure(figsize=(20, 15))

# Adjust the heatmap color mapping to use red for high values and white for low values
plt.figure(figsize=(20, 15))
sns.heatmap(transition_probability_matrix.iloc[:, 1:], cmap='Reds')
	import numpy as np
	from sklearn.preprocessing import StandardScaler
	from sklearn.decomposition import PCA

	# Excluding the target variable 'Diabetes_binary' for PCA
	features = data.drop('Diabetes_binary', axis=1)

	# Scaling the data
	scaler = StandardScaler()
	scaled_features = scaler.fit_transform(features)
	# Re-importing necessary libraries and reloading the data
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import itertools

	# Function to determine if a column is binary
	def is_binary(column):
	return sorted(column.unique()) == [0, 1]
	# Identifying binary columns in the dataset
	binary_columns = [col for col in data.columns if is_binary(data[col])]

	# Creating combinations of binary and non-binary numeric columns for box plots
	binary_non_binary_combinations = list(itertools.product(binary_columns, non_binary_numeric_columns))

	# Setting up the plotting grid for these combinations
	n_plots = len(binary_non_binary_combinations)
	n_cols = 3 # Number of columns per row
	n_rows = (n_plots + n_cols - 1) // n_cols # Calculating the required number of rows
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Set the aesthetic style of the plots
	sns.set_style("whitegrid")

	# Selecting a subset of columns for plotting histograms
	# Excluding binary columns for more meaningful histograms
	hist_columns = [
	"Diabetes_binary",
	import pandas as pd

	# Load the dataset
	file_path = '/content/drive/MyDrive/研究/糖尿病/Diabetes Health Indicators Dataset/diabetes_binary_health_indicators_BRFSS2015.csv'
	data = pd.read_csv(file_path)

	# Display the first few rows of the dataset
	data

	# Checking data types and unique values for each column
	# Aligning the station names exactly with the transition probability matrix, excluding any extra stations
	# First, we adjust the list of station names to exclude the extra station (if any)
	aligned_stations = set(transition_probability_matrix.columns) - {'start_station_name'}

	# Filter the station_stats dataframe to include only those rows where both the start and end stations are in the aligned_stations set
	filtered_station_stats = station_stats[
	(station_stats['start_station_name'].isin(aligned_stations)) &
	(station_stats['end_station_name'].isin(aligned_stations))
	]
	import pandas as pd

	df = divvy_tripdata

	# 2. Convert 'started_at' and 'ended_at' to datetime
	df['started_at'] = pd.to_datetime(df['started_at'])
	df['ended_at'] = pd.to_datetime(df['ended_at'])

	# 3. Calculate the travel time for each trip
	df['travel_time'] = df['ended_at'] - df['started_at']
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Set the size of the heatmap
	plt.figure(figsize=(20, 15))

	# Adjust the heatmap color mapping to use red for high values and white for low values
	plt.figure(figsize=(20, 15))
	sns.heatmap(transition_probability_matrix.iloc[:, 1:], cmap='Reds')