Skip to content

Instantly share code, notes, and snippets.

@armish armish/Compare GMTs.ipynb Secret
Created Jan 29, 2016

Embed
What would you like to do?
Compare GMTs with a asymmetric, naive score
{
"cells": [
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Collection 1</th>\n",
" <th>Collection 2</th>\n",
" <th>Average Similarity</th>\n",
" <th>Similarity Std Dev</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>0.003064</td>\n",
" <td>0.007804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>0.846611</td>\n",
" <td>0.294187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>0.032423</td>\n",
" <td>0.025545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>0.016477</td>\n",
" <td>0.012815</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>0.987879</td>\n",
" <td>0.017142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>0.015584</td>\n",
" <td>0.011808</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>0.017495</td>\n",
" <td>0.005607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>0.766578</td>\n",
" <td>0.371519</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>0.003268</td>\n",
" <td>0.007636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>0.032190</td>\n",
" <td>0.024325</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>custom.bindea.gmt</td>\n",
" <td>0.011427</td>\n",
" <td>0.008323</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>custom.cancer.immunity.symbols.gmt</td>\n",
" <td>0.003923</td>\n",
" <td>0.004344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>custom.senbabaoglu.gmt</td>\n",
" <td>0.011700</td>\n",
" <td>0.008200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>c7.all.v5.1.symbols.gmt</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Collection 1 Collection 2 \\\n",
"0 custom.bindea.gmt custom.bindea.gmt \n",
"1 custom.bindea.gmt custom.cancer.immunity.symbols.gmt \n",
"2 custom.bindea.gmt custom.senbabaoglu.gmt \n",
"3 custom.bindea.gmt c7.all.v5.1.symbols.gmt \n",
"4 custom.cancer.immunity.symbols.gmt custom.bindea.gmt \n",
"5 custom.cancer.immunity.symbols.gmt custom.cancer.immunity.symbols.gmt \n",
"6 custom.cancer.immunity.symbols.gmt custom.senbabaoglu.gmt \n",
"7 custom.cancer.immunity.symbols.gmt c7.all.v5.1.symbols.gmt \n",
"8 custom.senbabaoglu.gmt custom.bindea.gmt \n",
"9 custom.senbabaoglu.gmt custom.cancer.immunity.symbols.gmt \n",
"10 custom.senbabaoglu.gmt custom.senbabaoglu.gmt \n",
"11 custom.senbabaoglu.gmt c7.all.v5.1.symbols.gmt \n",
"12 c7.all.v5.1.symbols.gmt custom.bindea.gmt \n",
"13 c7.all.v5.1.symbols.gmt custom.cancer.immunity.symbols.gmt \n",
"14 c7.all.v5.1.symbols.gmt custom.senbabaoglu.gmt \n",
"15 c7.all.v5.1.symbols.gmt c7.all.v5.1.symbols.gmt \n",
"\n",
" Average Similarity Similarity Std Dev \n",
"0 1.000000 0.000000 \n",
"1 0.003064 0.007804 \n",
"2 0.846611 0.294187 \n",
"3 0.032423 0.025545 \n",
"4 0.016477 0.012815 \n",
"5 0.987879 0.017142 \n",
"6 0.015584 0.011808 \n",
"7 0.017495 0.005607 \n",
"8 0.766578 0.371519 \n",
"9 0.003268 0.007636 \n",
"10 1.000000 0.000000 \n",
"11 0.032190 0.024325 \n",
"12 0.011427 0.008323 \n",
"13 0.003923 0.004344 \n",
"14 0.011700 0.008200 \n",
"15 1.000000 0.000000 "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gmts = [\"custom.bindea.gmt\", \"custom.cancer.immunity.symbols.gmt\", \"custom.senbabaoglu.gmt\",\"c7.all.v5.1.symbols.gmt\"]\n",
"folder = \"./gmts/\"\n",
"\n",
"def compareGMTs(gmt1, gmt2):\n",
" genesets1 = readGMT(gmt1)\n",
" genesets2 = readGMT(gmt2)\n",
" \n",
" scores = []\n",
" for setname1 in genesets1:\n",
" set1 = genesets1[setname1]\n",
" \n",
" tmpScores = []\n",
" for setname2 in genesets2:\n",
" set2 = genesets2[setname2]\n",
" \n",
" overlap = len(set(set1).intersection(set2))\n",
" score = float(overlap) / max(len(set1), len(set2))\n",
" tmpScores.append(score)\n",
" \n",
" scores.append(max(tmpScores))\n",
" \n",
" return scores\n",
"\n",
"def readGMT(filename):\n",
" genesets = {}\n",
" \n",
" gmtfile = open(folder + filename)\n",
" for line in gmtfile.readlines():\n",
" values = line.split(\"\\t\")\n",
" genesets[values[0]] = sorted(values[2:])\n",
" \n",
" return genesets\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"comparisons = []\n",
"for gmt1 in gmts:\n",
" for gmt2 in gmts:\n",
" scores = compareGMTs(gmt1, gmt2)\n",
" summary = (gmt1, gmt2, np.mean(scores), np.std(scores))\n",
" comparisons.append(summary)\n",
"\n",
"pd.DataFrame(data=comparisons, columns=[\"Collection 1\", \"Collection 2\", \"Average Similarity\", \"Similarity Std Dev\"])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
@armish

This comment has been minimized.

Copy link
Owner Author

commented Jan 29, 2016

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.