Skip to content

Instantly share code, notes, and snippets.

@makispl
Last active Aug 31, 2021
Embed
What would you like to do?
# read in the training data
plays_df = pd.read_csv('../data/interim/plays_17_18_19_pre_proc_train.csv',
converters={'GAME_ID': lambda x: str(x)})
# switch to the for-normalisation-features
data_stnd = data.copy()
# instantiate, fit, transform scaler
scaler = MinMaxScaler()
data_stnd = scaler.fit_transform(data_stnd)
# instantiate pca
pca = PCA()
pca.fit(data_stnd)
pca.explained_variance_ratio_
# Visualize the variance to locate the # of principal components
with plt.style.context('fivethirtyeight'):
fig = plt.figure(figsize=(10, 8))
plt.figure(figsize=(10, 8))
plt.plot(range(1, 19), pca.explained_variance_ratio_.cumsum(), markerfacecolor='red', marker='o', linestyle='--')
plt.xlabel('Principal Components', fontsize=18)
plt.ylabel('Explained Variance %', fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
# instantiate pca for 4 components - explain > 80% of variance
pca = PCA(n_components = 4)
pca.fit(data_stnd)
pca_scores = pca.transform(data_stnd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment