Created
May 23, 2023 14:09
-
-
Save tyjeon24/1b84259f7dda4cd3c784b732bf14fad6 to your computer and use it in GitHub Desktop.
Quickstarter code for using PCA.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Pipeline(steps=[('standardscaler', StandardScaler()), ('pca', PCA())])\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>주성분_개수</th>\n", | |
" <th>설명력</th>\n", | |
" <th>누적설명력</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0.729624</td>\n", | |
" <td>0.729624</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>0.228508</td>\n", | |
" <td>0.958132</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>0.036689</td>\n", | |
" <td>0.994821</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3</td>\n", | |
" <td>0.005179</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 주성분_개수 설명력 누적설명력\n", | |
"0 0 0.729624 0.729624\n", | |
"1 1 0.228508 0.958132\n", | |
"2 2 0.036689 0.994821\n", | |
"3 3 0.005179 1.000000" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# PCA 사용법\n", | |
"\n", | |
"from sklearn.decomposition import PCA\n", | |
"from sklearn.preprocessing import StandardScaler\n", | |
"from sklearn.pipeline import make_pipeline\n", | |
"import seaborn as sns\n", | |
"import pandas as pd\n", | |
"\n", | |
"df = sns.load_dataset(\"iris\")\n", | |
"df = df.drop(columns=\"species\")\n", | |
"\n", | |
"scaler = StandardScaler()\n", | |
"pca = PCA()\n", | |
"pipeline = make_pipeline(scaler, pca)\n", | |
"print(pipeline)\n", | |
"\n", | |
"pipeline.fit(df)\n", | |
"\n", | |
"pca_result = (\n", | |
" pd.DataFrame()\n", | |
" .assign(주성분_개수=range(pca.n_components_))\n", | |
" .assign(설명력=pca.explained_variance_ratio_)\n", | |
" .assign(누적설명력=pca.explained_variance_ratio_.cumsum()) # 누적설명력이 0.99이면, 위에서부터 해당 주성분을 모두 사용하면 데이터의 99%를 설명할 수 있다는 뜻임.\n", | |
")\n", | |
"\n", | |
"pca_result" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "base", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.9" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment