This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import gamma, poisson | |
import numpy as np | |
# Define the prior parameters for the gamma distribution (alpha and beta) | |
alpha_prior = 1 | |
beta_prior = 1 | |
# Calculate the posterior parameters | |
# For gamma-poisson conjugate, the posterior alpha is prior alpha + sum of observed counts | |
# And posterior beta is prior beta + number of observations |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#日毎の販売回数の集計 | |
# Convert the 'DateTime' column to datetime objects | |
bakery_data['DateTime'] = pd.to_datetime(bakery_data['DateTime']) | |
# Extract the date from the 'DateTime' column | |
bakery_data['Date'] = bakery_data['DateTime'].dt.date | |
# Group by the date and count the number of transactions per day | |
daily_sales = bakery_data.groupby('Date')['TransactionNo'].nunique() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import MinMaxScaler | |
# Dropping the 'Cluster' column for scaling | |
cluster_data_for_scaling = cluster_means_with_diabetes_rate.drop('Cluster', axis=1) | |
# Applying Min-Max scaling | |
scaler = MinMaxScaler() | |
cluster_scaled = scaler.fit_transform(cluster_data_for_scaling) | |
# Creating a DataFrame from the scaled data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Full Python code to perform clustering and calculate statistics | |
import pandas as pd | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from sklearn.cluster import KMeans | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Assuming 'data' is the original DataFrame and 'features' are the feature columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mpl_toolkits.mplot3d import Axes3D | |
def biplot_3d_with_arrows(score, coeff, diabetes_data, ax): | |
xs = score[:,0] | |
ys = score[:,1] | |
zs = score[:,2] | |
n = coeff.shape[0] | |
# Plotting points for diabetes and non-diabetes cases | |
ax.scatter(score[diabetes_data == 1, 0], score[diabetes_data == 1, 1], score[diabetes_data == 1, 2], color='red', s=5, label='Diabetes') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def biplot_non_diabetes_only(score, coeff, diabetes_data): | |
xs = score[:,0] | |
ys = score[:,1] | |
n = coeff.shape[0] | |
scalex = 1.0/(xs.max() - xs.min()) | |
scaley = 1.0/(ys.max() - ys.min()) | |
# Plotting points for non-diabetes cases only | |
non_diabetes_points = score[diabetes_data == 0] | |
plt.scatter(non_diabetes_points[:, 0] * scalex, non_diabetes_points[:, 1] * scaley, color='blue', s=5, label='Non-Diabetes') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def biplot_non_diabetes_only(score, coeff, diabetes_data): | |
xs = score[:,0] | |
ys = score[:,1] | |
n = coeff.shape[0] | |
scalex = 1.0/(xs.max() - xs.min()) | |
scaley = 1.0/(ys.max() - ys.min()) | |
# Plotting points for non-diabetes cases only | |
non_diabetes_points = score[diabetes_data == 0] | |
plt.scatter(non_diabetes_points[:, 0] * scalex, non_diabetes_points[:, 1] * scaley, color='blue', s=5, label='Non-Diabetes') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def biplot_diabetes_smaller_arrows(score, coeff, diabetes_data): | |
xs = score[:,0] | |
ys = score[:,1] | |
n = coeff.shape[0] | |
scalex = 1.0/(xs.max() - xs.min()) | |
scaley = 1.0/(ys.max() - ys.min()) | |
# Plotting points for diabetes cases only | |
diabetes_points = score[diabetes_data == 1] | |
plt.scatter(diabetes_points[:, 0] * scalex, diabetes_points[:, 1] * scaley, color='red', s=5, label='Diabetes') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creating a plot for the cumulative variance with a horizontal line at 80% | |
plt.figure(figsize=(10, 6)) | |
plt.plot(cumulative_variance, marker='o') | |
# Adding annotations for each point | |
for i, v in enumerate(cumulative_variance): | |
plt.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=8) | |
# Adding a horizontal line at 80% cumulative variance | |
plt.axhline(y=0.80, color='r', linestyle='--') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Creating a heatmap for the loadings | |
plt.figure(figsize=(14, 8)) | |
sns.heatmap(loadings_df_80_transposed, cmap='coolwarm', annot=True) | |
plt.title('Heatmap of PCA Loadings') | |
plt.ylabel('Principal Components') | |
plt.xlabel('Features') | |
plt.show() |