Skip to content

Instantly share code, notes, and snippets.

@smzn
smzn / gist:10296bda3bc2f2906652fe592a9f4255
Created January 30, 2024 15:01
パラメタのベイズ推定
from scipy.stats import gamma, poisson
import numpy as np
# Define the prior parameters for the gamma distribution (alpha and beta)
alpha_prior = 1
beta_prior = 1
# Calculate the posterior parameters
# For gamma-poisson conjugate, the posterior alpha is prior alpha + sum of observed counts
# And posterior beta is prior beta + number of observations
@smzn
smzn / gist:f9a76201fb3375740f08f75befdda6d8
Created January 30, 2024 14:52
日毎の販売回数の集計
#日毎の販売回数の集計
# Convert the 'DateTime' column to datetime objects
bakery_data['DateTime'] = pd.to_datetime(bakery_data['DateTime'])
# Extract the date from the 'DateTime' column
bakery_data['Date'] = bakery_data['DateTime'].dt.date
# Group by the date and count the number of transactions per day
daily_sales = bakery_data.groupby('Date')['TransactionNo'].nunique()
from sklearn.preprocessing import MinMaxScaler
# Dropping the 'Cluster' column for scaling
cluster_data_for_scaling = cluster_means_with_diabetes_rate.drop('Cluster', axis=1)
# Applying Min-Max scaling
scaler = MinMaxScaler()
cluster_scaled = scaler.fit_transform(cluster_data_for_scaling)
# Creating a DataFrame from the scaled data
# Full Python code to perform clustering and calculate statistics
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
# Assuming 'data' is the original DataFrame and 'features' are the feature columns
from mpl_toolkits.mplot3d import Axes3D
def biplot_3d_with_arrows(score, coeff, diabetes_data, ax):
xs = score[:,0]
ys = score[:,1]
zs = score[:,2]
n = coeff.shape[0]
# Plotting points for diabetes and non-diabetes cases
ax.scatter(score[diabetes_data == 1, 0], score[diabetes_data == 1, 1], score[diabetes_data == 1, 2], color='red', s=5, label='Diabetes')
def biplot_non_diabetes_only(score, coeff, diabetes_data):
xs = score[:,0]
ys = score[:,1]
n = coeff.shape[0]
scalex = 1.0/(xs.max() - xs.min())
scaley = 1.0/(ys.max() - ys.min())
# Plotting points for non-diabetes cases only
non_diabetes_points = score[diabetes_data == 0]
plt.scatter(non_diabetes_points[:, 0] * scalex, non_diabetes_points[:, 1] * scaley, color='blue', s=5, label='Non-Diabetes')
@smzn
smzn / gist:82f54c497c8160e87e07ac1b525a41c3
Created January 25, 2024 08:25
biplot糖尿病でない
def biplot_non_diabetes_only(score, coeff, diabetes_data):
xs = score[:,0]
ys = score[:,1]
n = coeff.shape[0]
scalex = 1.0/(xs.max() - xs.min())
scaley = 1.0/(ys.max() - ys.min())
# Plotting points for non-diabetes cases only
non_diabetes_points = score[diabetes_data == 0]
plt.scatter(non_diabetes_points[:, 0] * scalex, non_diabetes_points[:, 1] * scaley, color='blue', s=5, label='Non-Diabetes')
def biplot_diabetes_smaller_arrows(score, coeff, diabetes_data):
xs = score[:,0]
ys = score[:,1]
n = coeff.shape[0]
scalex = 1.0/(xs.max() - xs.min())
scaley = 1.0/(ys.max() - ys.min())
# Plotting points for diabetes cases only
diabetes_points = score[diabetes_data == 1]
plt.scatter(diabetes_points[:, 0] * scalex, diabetes_points[:, 1] * scaley, color='red', s=5, label='Diabetes')
# Creating a plot for the cumulative variance with a horizontal line at 80%
plt.figure(figsize=(10, 6))
plt.plot(cumulative_variance, marker='o')
# Adding annotations for each point
for i, v in enumerate(cumulative_variance):
plt.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=8)
# Adding a horizontal line at 80% cumulative variance
plt.axhline(y=0.80, color='r', linestyle='--')
import matplotlib.pyplot as plt
import seaborn as sns
# Creating a heatmap for the loadings
plt.figure(figsize=(14, 8))
sns.heatmap(loadings_df_80_transposed, cmap='coolwarm', annot=True)
plt.title('Heatmap of PCA Loadings')
plt.ylabel('Principal Components')
plt.xlabel('Features')
plt.show()