Last active
January 15, 2020 21:49
-
-
Save simecek/65e80cf9693099c7ec062bd7e52c9d0d to your computer and use it in GitHub Desktop.
Iteration_on_combinations.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Iteration_on_combinations.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyOVllpsnDPlkrxKNRk8GwRq", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/simecek/65e80cf9693099c7ec062bd7e52c9d0d/iteration_on_combinations.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "IXRY-TIA5M5x", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Just a small example how to iterate through pairs/combinations of DataFrame columns instead of having multiple for-loops.\n", | |
"\n", | |
"The original source is https://stackoverflow.com/a/45350450." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "jPyQ57qdsSVq", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from scipy.stats import ttest_ind\n", | |
"from itertools import combinations" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5g8h6Yy-shPT", | |
"colab_type": "code", | |
"outputId": "146e90cb-1629-4321-8407-97442a6e3779", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 639 | |
} | |
}, | |
"source": [ | |
"# generate a random array, add a small constant to each column\n", | |
"N, M = 20, 4\n", | |
"A = np.random.randn(N, M) + np.arange(M)/4\n", | |
"\n", | |
"# converts numpy array to pandas df\n", | |
"df = pd.DataFrame(A)\n", | |
"df" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-0.019667</td>\n", | |
" <td>-0.160105</td>\n", | |
" <td>1.650978</td>\n", | |
" <td>1.589697</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.126397</td>\n", | |
" <td>-0.465326</td>\n", | |
" <td>0.237224</td>\n", | |
" <td>1.785721</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.662371</td>\n", | |
" <td>-0.096171</td>\n", | |
" <td>0.841056</td>\n", | |
" <td>0.581495</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-1.596632</td>\n", | |
" <td>2.135681</td>\n", | |
" <td>2.111951</td>\n", | |
" <td>0.748818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.255028</td>\n", | |
" <td>0.811000</td>\n", | |
" <td>2.096242</td>\n", | |
" <td>0.269831</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.754257</td>\n", | |
" <td>0.624083</td>\n", | |
" <td>-0.832509</td>\n", | |
" <td>1.283291</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>0.053834</td>\n", | |
" <td>0.182164</td>\n", | |
" <td>1.839340</td>\n", | |
" <td>0.810557</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>-0.279346</td>\n", | |
" <td>1.365270</td>\n", | |
" <td>-0.292281</td>\n", | |
" <td>-0.609617</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>0.667880</td>\n", | |
" <td>-1.146813</td>\n", | |
" <td>1.310888</td>\n", | |
" <td>1.638820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>0.996876</td>\n", | |
" <td>1.073842</td>\n", | |
" <td>-0.601994</td>\n", | |
" <td>0.998753</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>-1.618132</td>\n", | |
" <td>0.877366</td>\n", | |
" <td>-0.582637</td>\n", | |
" <td>1.425230</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>0.884677</td>\n", | |
" <td>-0.164225</td>\n", | |
" <td>0.730524</td>\n", | |
" <td>-0.947566</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0.054816</td>\n", | |
" <td>-1.029457</td>\n", | |
" <td>0.057920</td>\n", | |
" <td>2.209974</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>0.476399</td>\n", | |
" <td>-0.090746</td>\n", | |
" <td>1.771726</td>\n", | |
" <td>-0.626088</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0.212336</td>\n", | |
" <td>-0.158800</td>\n", | |
" <td>1.573141</td>\n", | |
" <td>1.706248</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>-0.200484</td>\n", | |
" <td>0.635122</td>\n", | |
" <td>1.023190</td>\n", | |
" <td>0.086667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0.587187</td>\n", | |
" <td>-0.956648</td>\n", | |
" <td>0.728709</td>\n", | |
" <td>1.081525</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>-0.020284</td>\n", | |
" <td>1.561295</td>\n", | |
" <td>-0.210088</td>\n", | |
" <td>0.774460</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>-1.266576</td>\n", | |
" <td>1.435791</td>\n", | |
" <td>1.033223</td>\n", | |
" <td>0.236054</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>1.178102</td>\n", | |
" <td>-0.024724</td>\n", | |
" <td>-0.977254</td>\n", | |
" <td>0.538408</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3\n", | |
"0 -0.019667 -0.160105 1.650978 1.589697\n", | |
"1 1.126397 -0.465326 0.237224 1.785721\n", | |
"2 0.662371 -0.096171 0.841056 0.581495\n", | |
"3 -1.596632 2.135681 2.111951 0.748818\n", | |
"4 1.255028 0.811000 2.096242 0.269831\n", | |
"5 0.754257 0.624083 -0.832509 1.283291\n", | |
"6 0.053834 0.182164 1.839340 0.810557\n", | |
"7 -0.279346 1.365270 -0.292281 -0.609617\n", | |
"8 0.667880 -1.146813 1.310888 1.638820\n", | |
"9 0.996876 1.073842 -0.601994 0.998753\n", | |
"10 -1.618132 0.877366 -0.582637 1.425230\n", | |
"11 0.884677 -0.164225 0.730524 -0.947566\n", | |
"12 0.054816 -1.029457 0.057920 2.209974\n", | |
"13 0.476399 -0.090746 1.771726 -0.626088\n", | |
"14 0.212336 -0.158800 1.573141 1.706248\n", | |
"15 -0.200484 0.635122 1.023190 0.086667\n", | |
"16 0.587187 -0.956648 0.728709 1.081525\n", | |
"17 -0.020284 1.561295 -0.210088 0.774460\n", | |
"18 -1.266576 1.435791 1.033223 0.236054\n", | |
"19 1.178102 -0.024724 -0.977254 0.538408" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "AKPbk8ei1QHI", | |
"colab_type": "code", | |
"outputId": "93969cc8-1c9e-4154-b271-707e7b18776a", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 166 | |
} | |
}, | |
"source": [ | |
"pairwise_pvalues = pd.DataFrame(columns=df.columns, index=df.columns, dtype=float)\n", | |
"\n", | |
"for (label1, column1), (label2, column2) in combinations(df.items(), 2):\n", | |
" pairwise_pvalues.loc[label1, label2] = ttest_ind(column1, column2)[1]\n", | |
" pairwise_pvalues.loc[label2, label1] = pairwise_pvalues.loc[label1, label2]\n", | |
"\n", | |
"pairwise_pvalues.round(3)" | |
], | |
"execution_count": 0, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NaN</td>\n", | |
" <td>0.659</td>\n", | |
" <td>0.116</td>\n", | |
" <td>0.039</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.659</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.252</td>\n", | |
" <td>0.111</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.116</td>\n", | |
" <td>0.252</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.730</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.039</td>\n", | |
" <td>0.111</td>\n", | |
" <td>0.730</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3\n", | |
"0 NaN 0.659 0.116 0.039\n", | |
"1 0.659 NaN 0.252 0.111\n", | |
"2 0.116 0.252 NaN 0.730\n", | |
"3 0.039 0.111 0.730 NaN" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 23 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment