Skip to content

Instantly share code, notes, and snippets.

@simecek
Last active January 15, 2020 21:49
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simecek/65e80cf9693099c7ec062bd7e52c9d0d to your computer and use it in GitHub Desktop.
Save simecek/65e80cf9693099c7ec062bd7e52c9d0d to your computer and use it in GitHub Desktop.
Iteration_on_combinations.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Iteration_on_combinations.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyOVllpsnDPlkrxKNRk8GwRq",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/simecek/65e80cf9693099c7ec062bd7e52c9d0d/iteration_on_combinations.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IXRY-TIA5M5x",
"colab_type": "text"
},
"source": [
"Just a small example how to iterate through pairs/combinations of DataFrame columns instead of having multiple for-loops.\n",
"\n",
"The original source is https://stackoverflow.com/a/45350450."
]
},
{
"cell_type": "code",
"metadata": {
"id": "jPyQ57qdsSVq",
"colab_type": "code",
"colab": {}
},
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from scipy.stats import ttest_ind\n",
"from itertools import combinations"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5g8h6Yy-shPT",
"colab_type": "code",
"outputId": "146e90cb-1629-4321-8407-97442a6e3779",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 639
}
},
"source": [
"# generate a random array, add a small constant to each column\n",
"N, M = 20, 4\n",
"A = np.random.randn(N, M) + np.arange(M)/4\n",
"\n",
"# converts numpy array to pandas df\n",
"df = pd.DataFrame(A)\n",
"df"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-0.019667</td>\n",
" <td>-0.160105</td>\n",
" <td>1.650978</td>\n",
" <td>1.589697</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.126397</td>\n",
" <td>-0.465326</td>\n",
" <td>0.237224</td>\n",
" <td>1.785721</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.662371</td>\n",
" <td>-0.096171</td>\n",
" <td>0.841056</td>\n",
" <td>0.581495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-1.596632</td>\n",
" <td>2.135681</td>\n",
" <td>2.111951</td>\n",
" <td>0.748818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.255028</td>\n",
" <td>0.811000</td>\n",
" <td>2.096242</td>\n",
" <td>0.269831</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.754257</td>\n",
" <td>0.624083</td>\n",
" <td>-0.832509</td>\n",
" <td>1.283291</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.053834</td>\n",
" <td>0.182164</td>\n",
" <td>1.839340</td>\n",
" <td>0.810557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>-0.279346</td>\n",
" <td>1.365270</td>\n",
" <td>-0.292281</td>\n",
" <td>-0.609617</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.667880</td>\n",
" <td>-1.146813</td>\n",
" <td>1.310888</td>\n",
" <td>1.638820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.996876</td>\n",
" <td>1.073842</td>\n",
" <td>-0.601994</td>\n",
" <td>0.998753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>-1.618132</td>\n",
" <td>0.877366</td>\n",
" <td>-0.582637</td>\n",
" <td>1.425230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.884677</td>\n",
" <td>-0.164225</td>\n",
" <td>0.730524</td>\n",
" <td>-0.947566</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.054816</td>\n",
" <td>-1.029457</td>\n",
" <td>0.057920</td>\n",
" <td>2.209974</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.476399</td>\n",
" <td>-0.090746</td>\n",
" <td>1.771726</td>\n",
" <td>-0.626088</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.212336</td>\n",
" <td>-0.158800</td>\n",
" <td>1.573141</td>\n",
" <td>1.706248</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>-0.200484</td>\n",
" <td>0.635122</td>\n",
" <td>1.023190</td>\n",
" <td>0.086667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.587187</td>\n",
" <td>-0.956648</td>\n",
" <td>0.728709</td>\n",
" <td>1.081525</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>-0.020284</td>\n",
" <td>1.561295</td>\n",
" <td>-0.210088</td>\n",
" <td>0.774460</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>-1.266576</td>\n",
" <td>1.435791</td>\n",
" <td>1.033223</td>\n",
" <td>0.236054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1.178102</td>\n",
" <td>-0.024724</td>\n",
" <td>-0.977254</td>\n",
" <td>0.538408</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3\n",
"0 -0.019667 -0.160105 1.650978 1.589697\n",
"1 1.126397 -0.465326 0.237224 1.785721\n",
"2 0.662371 -0.096171 0.841056 0.581495\n",
"3 -1.596632 2.135681 2.111951 0.748818\n",
"4 1.255028 0.811000 2.096242 0.269831\n",
"5 0.754257 0.624083 -0.832509 1.283291\n",
"6 0.053834 0.182164 1.839340 0.810557\n",
"7 -0.279346 1.365270 -0.292281 -0.609617\n",
"8 0.667880 -1.146813 1.310888 1.638820\n",
"9 0.996876 1.073842 -0.601994 0.998753\n",
"10 -1.618132 0.877366 -0.582637 1.425230\n",
"11 0.884677 -0.164225 0.730524 -0.947566\n",
"12 0.054816 -1.029457 0.057920 2.209974\n",
"13 0.476399 -0.090746 1.771726 -0.626088\n",
"14 0.212336 -0.158800 1.573141 1.706248\n",
"15 -0.200484 0.635122 1.023190 0.086667\n",
"16 0.587187 -0.956648 0.728709 1.081525\n",
"17 -0.020284 1.561295 -0.210088 0.774460\n",
"18 -1.266576 1.435791 1.033223 0.236054\n",
"19 1.178102 -0.024724 -0.977254 0.538408"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "AKPbk8ei1QHI",
"colab_type": "code",
"outputId": "93969cc8-1c9e-4154-b271-707e7b18776a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 166
}
},
"source": [
"pairwise_pvalues = pd.DataFrame(columns=df.columns, index=df.columns, dtype=float)\n",
"\n",
"for (label1, column1), (label2, column2) in combinations(df.items(), 2):\n",
" pairwise_pvalues.loc[label1, label2] = ttest_ind(column1, column2)[1]\n",
" pairwise_pvalues.loc[label2, label1] = pairwise_pvalues.loc[label1, label2]\n",
"\n",
"pairwise_pvalues.round(3)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NaN</td>\n",
" <td>0.659</td>\n",
" <td>0.116</td>\n",
" <td>0.039</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.659</td>\n",
" <td>NaN</td>\n",
" <td>0.252</td>\n",
" <td>0.111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.116</td>\n",
" <td>0.252</td>\n",
" <td>NaN</td>\n",
" <td>0.730</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.039</td>\n",
" <td>0.111</td>\n",
" <td>0.730</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3\n",
"0 NaN 0.659 0.116 0.039\n",
"1 0.659 NaN 0.252 0.111\n",
"2 0.116 0.252 NaN 0.730\n",
"3 0.039 0.111 0.730 NaN"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment