Skip to content

Instantly share code, notes, and snippets.

import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# Excluding the target variable 'Diabetes_binary' for PCA
features = data.drop('Diabetes_binary', axis=1)
# Scaling the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Re-importing necessary libraries and reloading the data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
# Function to determine if a column is binary
def is_binary(column):
return sorted(column.unique()) == [0, 1]
# Identifying binary columns in the dataset
binary_columns = [col for col in data.columns if is_binary(data[col])]
# Creating combinations of binary and non-binary numeric columns for box plots
binary_non_binary_combinations = list(itertools.product(binary_columns, non_binary_numeric_columns))
# Setting up the plotting grid for these combinations
n_plots = len(binary_non_binary_combinations)
n_cols = 3 # Number of columns per row
n_rows = (n_plots + n_cols - 1) // n_cols # Calculating the required number of rows
@smzn
smzn / gist:1c731107952d07683549822e2fc255cf
Created January 22, 2024 06:34
2値データ以外での散布図
# Re-importing necessary libraries and reloading the data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
# Function to determine if a column is binary
def is_binary(column):
return sorted(column.unique()) == [0, 1]
@smzn
smzn / gist:bf80863b48d56fbfa9edad8bd17ef79c
Created January 22, 2024 06:07
2値データヒストグラム
import matplotlib.pyplot as plt
import seaborn as sns
# Set the aesthetic style of the plots
sns.set_style("whitegrid")
# Selecting a subset of columns for plotting histograms
# Excluding binary columns for more meaningful histograms
hist_columns = [
"Diabetes_binary",
@smzn
smzn / gist:8d22018b21aed8272e38623d90314631
Last active January 22, 2024 06:06
数値データヒストグラム
import matplotlib.pyplot as plt
import seaborn as sns
# Set the aesthetic style of the plots
sns.set_style("whitegrid")
# Selecting a subset of columns for plotting histograms
# Excluding binary columns for more meaningful histograms
hist_columns = ['BMI', 'Age', 'GenHlth', 'MentHlth', 'PhysHlth', 'Education', 'Income']
@smzn
smzn / gist:b76c874372560060f1df656de905a1f2
Created January 22, 2024 05:29
diabetes_binary_health_indicators_BRFSS2015.csv
import pandas as pd
# Load the dataset
file_path = '/content/drive/MyDrive/研究/糖尿病/Diabetes Health Indicators Dataset/diabetes_binary_health_indicators_BRFSS2015.csv'
data = pd.read_csv(file_path)
# Display the first few rows of the dataset
data
# Checking data types and unique values for each column
@smzn
smzn / gist:595463c6a37c92209cf516e2e9590219
Created January 8, 2024 06:00
平均利用時間行列
# Aligning the station names exactly with the transition probability matrix, excluding any extra stations
# First, we adjust the list of station names to exclude the extra station (if any)
aligned_stations = set(transition_probability_matrix.columns) - {'start_station_name'}
# Filter the station_stats dataframe to include only those rows where both the start and end stations are in the aligned_stations set
filtered_station_stats = station_stats[
(station_stats['start_station_name'].isin(aligned_stations)) &
(station_stats['end_station_name'].isin(aligned_stations))
]
import pandas as pd
df = divvy_tripdata
# 2. Convert 'started_at' and 'ended_at' to datetime
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])
# 3. Calculate the travel time for each trip
df['travel_time'] = df['ended_at'] - df['started_at']
@smzn
smzn / gist:b79f015a55e4324e45c92be46b873d9f
Created January 8, 2024 02:11
ヒートマップ表示
import matplotlib.pyplot as plt
import seaborn as sns
# Set the size of the heatmap
plt.figure(figsize=(20, 15))
# Adjust the heatmap color mapping to use red for high values and white for low values
plt.figure(figsize=(20, 15))
sns.heatmap(transition_probability_matrix.iloc[:, 1:], cmap='Reds')