Skip to content

Instantly share code, notes, and snippets.

@krassowski
Created March 18, 2020 17:55
Show Gist options
  • Save krassowski/3c8db573f6c186006fc196ed17f1a895 to your computer and use it in GitHub Desktop.
Save krassowski/3c8db573f6c186006fc196ed17f1a895 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"from sklearn import datasets\n",
"from sklearn.pipeline import make_pipeline, Pipeline\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.preprocessing import StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def calc_inertia(matrix):\n",
" return (matrix ** 2).sum().sum()\n",
"\n",
"\n",
"def sum_of_squared_errors(true, pred):\n",
" \"\"\"Residual sum of squares\"\"\"\n",
" return ((true - pred)**2).sum().sum()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def calc_r2(pipeline: Pipeline, data, components_n=10):\n",
" \"\"\"Assumes that the PCA is the last step of the pipeline.\"\"\"\n",
" result = []\n",
"\n",
" pipeline.fit(data)\n",
"\n",
" pca = pipeline.steps[-1][1]\n",
" components_fitted = pca.n_components_\n",
"\n",
" preprocessing_pipeline = Pipeline(pipeline.steps[:-1])\n",
" preprocessing_pipeline.fit(data)\n",
"\n",
" x_processed = preprocessing_pipeline.transform(data)\n",
" total_inertia = calc_inertia(x_processed)\n",
"\n",
" scores = pipeline.transform(data)\n",
" loadings = pca.components_\n",
"\n",
" components_n = min(components_n, components_fitted)\n",
"\n",
" for k in range(1, components_n + 1):\n",
" x_hat = scores[:, :k] @ loadings[:k, :]\n",
"\n",
" # REsidual Sum of Squares\n",
" ress = sum_of_squared_errors(x_processed, x_hat)\n",
" explained = calc_inertia(x_hat)\n",
"\n",
" result.append({\n",
" 'n': k,\n",
" 'r2_ratio': explained / total_inertia,\n",
" 'r2_ress': 1 - ress / total_inertia,\n",
" 'r2_sklearn': sum(pca.explained_variance_ratio_[:k])\n",
" })\n",
"\n",
" return DataFrame(result)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"iris = datasets.load_iris()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"pipeline = make_pipeline(StandardScaler(), PCA())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n</th>\n",
" <th>r2_ratio</th>\n",
" <th>r2_ress</th>\n",
" <th>r2_sklearn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.729624</td>\n",
" <td>0.729624</td>\n",
" <td>0.729624</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.958132</td>\n",
" <td>0.958132</td>\n",
" <td>0.958132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.994821</td>\n",
" <td>0.994821</td>\n",
" <td>0.994821</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n r2_ratio r2_ress r2_sklearn\n",
"0 1 0.729624 0.729624 0.729624\n",
"1 2 0.958132 0.958132 0.958132\n",
"2 3 0.994821 0.994821 0.994821\n",
"3 4 1.000000 1.000000 1.000000"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca_r2 = calc_r2(pipeline, iris.data)\n",
"pca_r2"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment