Skip to content

Instantly share code, notes, and snippets.

@k3yavi
Created February 7, 2019 23:31
Show Gist options
  • Save k3yavi/50692c546718012bf6928bc4a011b226 to your computer and use it in GitHub Desktop.
Save k3yavi/50692c546718012bf6928bc4a011b226 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"%matplotlib inline\n",
"from tqdm import tqdm\n",
"from collections import defaultdict\n",
"import scipy.stats as stats"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"srrs = list(pd.read_csv(\"/mnt/scratch1/hirak/mappings-matter/data/srr.109.hg\", header=None, sep=\"\\t\")[0].values)\n",
"srrs.remove(\"SRR1609058\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# tools options:\n",
"# quasi, hsa, hsa_rsem, star, bt2_rsem, bt2_reg\n",
"tool1 = 'quasi'\n",
"tool2 = 'hsa'\n",
"tnames = {\n",
" 'quasi': \"salmon_aux_dup_5k_range_4_no_orphan_no_dovetail_23\",\n",
" 'hsa': \"vm_empirical_2\",\n",
" 'hsa_rsem': \"strict_vm_empirical_2\", \n",
" 'star': \"salmon_star_aln_aux_5k\",\n",
" 'bt2_rsem': \"salmon_bt2_last\",\n",
" 'bt2_reg': \"salmon_bt2_normal_last\"\n",
"}\n",
"\n",
"subs = \"NumReads\"\n",
"base = \"/mnt/scratch1/hirak/mappings-matter/data/quants/\"\n",
"\n",
"quants = {}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 108/108 [01:27<00:00, 1.31it/s]\n"
]
}
],
"source": [
"for exp in tqdm(srrs):\n",
" exp += '/'\n",
" t1 = pd.read_csv(base+exp+tnames[tool1]+\"/quant.sf\", sep=\"\\t\").set_index(\"Name\")[subs]\n",
" t2 = pd.read_csv(base+exp+tnames[tool2]+\"/quant.sf\", sep=\"\\t\").set_index(\"Name\")[subs]\n",
"\n",
" ct = pd.concat([t1, t2], axis=1, sort=True).fillna(0)\n",
" ct.columns = [tool1, tool2]\n",
" quants[exp] = ct"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"topx_threshold = 0.1\n",
"counts = defaultdict(int)\n",
"for ct in quants.values():\n",
" # taking txps with prediction of >100 by at least 1 tool\n",
" df = ct[ct.max(axis=1) > 100]\n",
" #reltive difference\n",
" diff = np.abs(df[tool1] - df[tool2]) / np.abs(df[tool1] + df[tool2])\n",
" #sort by descending values\n",
" topx_txps = diff.sort_values(ascending=False)\n",
" # maintain a count for txps with rel diff > topx_threshold\n",
" for x in topx_txps[topx_txps>topx_threshold].keys():\n",
" counts[x] += 1"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# transcript should be different in at least 20 samples\n",
"roi = pd.DataFrame(counts, index=[0]).T.sort_values(by=0, ascending=False)\n",
"roi_txps = roi[roi>20].dropna().index"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# getting genes from different txps\n",
"t2g = pd.read_csv(\"/mnt/scratch1/hirak/mappings-matter/data/gtf/txp2gene.tsv\", header=None, sep=\"\\t\").set_index(0)[1].to_dict()\n",
"roi_genes = set([])\n",
"for x in roi_txps:\n",
" roi_genes.add( t2g[x] )"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(934, 686)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(roi_txps), len(roi_genes)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"unq_genes = pd.read_table(\"/mnt/scratch1/hirak/mappings-matter/data/gtf/hg_gene_canonicalized_uniqueness.txt\").set_index(\"gene\")\n",
"unq_txps = pd.read_table(\"/mnt/scratch1/hirak/mappings-matter/data/gtf/hg_txp_canonicalized_uniqueness.txt\").set_index(\"gene\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"unq_genes['ratio'] = unq_genes['unique'] / unq_genes['total']\n",
"unq_txps['ratio'] = unq_txps['unique'] / unq_txps['total']"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f323fb2c780>"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFKxJREFUeJzt3X+QXWd93/H3B8n8qERtg/GOIruRO4gMDkwM3nHNZKZdYSYIp4OcGZyxpwGbuFGaGoa2nhSR/gEJYUraErd0XFIRuRKEIDv8iDW2U+oa31I62EQCY9m4DAq4WLaC62BUFhe3cr794x51NmKlvbq7917fZ9+vmTt7znOec87z1a4+e/a5596bqkKS1K7nTXoAkqTRMuglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjVs76QEAnHPOObVp06ah9v3hD3/IunXrVnZAz3HWvDpY8+qwnJoPHDjwZFW9bKl+z4mg37RpE/v37x9q316vx9zc3MoO6DnOmlcHa14dllNzkv8xSD+nbiSpcQa9JDVu4KBPsibJV5Pc3q1fkOS+JN9MckuS53ftL+jWD3XbN41m6JKkQZzOFf27gIcXrP8OcGNVbQaeAq7r2q8DnqqqlwM3dv0kSRMyUNAnOQ/4eeD3u/UArwc+1XXZA1zRLW/r1um2X9b1lyRNwKBX9P8a+KfAX3brLwW+X1XHuvXDwMZueSPwKEC3/WjXX5I0AUveXpnk7wJPVNWBJHPHmxfpWgNsW3jc7cB2gJmZGXq93iDj/THz8/ND7zutrHl1sObVYSw1V9UpH8A/p3/F/gjw58DTwCeAJ4G1XZ/XAZ/rlj8HvK5bXtv1y6nOcfHFF9ew7rnnnqH3nVbWvDpY8+qwnJqB/bVEhlfV0lM3VfWeqjqvqjYBVwGfr6q/B9wDvKXrdg1wW7e8r1un2/75bkCSpAlYzitj3w3sTfLbwFeBXV37LuDjSQ4B36P/y0GSnrM27bhjYufevXX0b/lwWkFfVT2g1y1/C7hkkT4/Aq5cgbFJklaAr4yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktS4JYM+yQuTfDnJ15I8lOQ3u/bdSb6d5P7ucVHXniQfTnIoyQNJXjvqIiRJJzfIZ8Y+A7y+quaTnAF8McmfdNt+vao+dUL/NwGbu8ffAj7SfZUkTcCSV/TVN9+tntE96hS7bAM+1u13L3BWkg3LH6okaRipOlVmd52SNcAB4OXATVX17iS7gdfRv+K/G9hRVc8kuR34YFV9sdv3buDdVbX/hGNuB7YDzMzMXLx3796hCpifn2f9+vVD7TutrHl1sObxOfjY0bGf87gLzlwzdM1btmw5UFWzS/UbZOqGqnoWuCjJWcBnk7wKeA/w58DzgZ3Au4HfArLYIRY55s5uP2ZnZ2tubm6QofyYXq/HsPtOK2teHax5fK7dccfYz3nc7q3rRl7zad11U1XfB3rA1qo60k3PPAP8B+CSrtth4PwFu50HPL4CY5UkDWGQu25e1l3Jk+RFwBuA/3583j1JgCuAB7td9gFv6+6+uRQ4WlVHRjJ6SdKSBpm62QDs6ebpnwfcWlW3J/l8kpfRn6q5H/gHXf87gcuBQ8DTwNtXftiSpEEtGfRV9QDwmkXaX3+S/gVcv/yhSZJWgq+MlaTGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYN8pmxL0zy5SRfS/JQkt/s2i9Icl+Sbya5Jcnzu/YXdOuHuu2bRluCJOlUBrmifwZ4fVX9DHARsLX70O/fAW6sqs3AU8B1Xf/rgKeq6uXAjV0/SdKELBn01TffrZ7RPQp4PfCprn0PcEW3vK1bp9t+WZKs2IglSadloDn6JGuS3A88AdwF/Bnw/ao61nU5DGzsljcCjwJ0248CL13JQUuSBrd2kE5V9SxwUZKzgM8Cr1ysW/d1sav3OrEhyXZgO8DMzAy9Xm+QofyY+fn5ofedVta8Oljz+Nzw6mNLdxqRcdQ8UNAfV1XfT9IDLgXOSrK2u2o/D3i863YYOB84nGQtcCbwvUWOtRPYCTA7O1tzc3NDFdDr9Rh232llzauDNY/PtTvuGPs5j9u9dd3Iax7krpuXdVfyJHkR8AbgYeAe4C1dt2uA27rlfd063fbPV9WPXdFLksZjkCv6DcCeJGvo/2K4tapuT/J1YG+S3wa+Cuzq+u8CPp7kEP0r+atGMG5J0oCWDPqqegB4zSLt3wIuWaT9R8CVKzI6SdKy+cpYSWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJatwgnxl7fpJ7kjyc5KEk7+ra35fksST3d4/LF+zzniSHknwjyRtHWYAk6dQG+czYY8ANVfWVJC8GDiS5q9t2Y1X9q4Wdk1xI/3Nifxr4CeA/J3lFVT27kgOXJA1mySv6qjpSVV/pln8APAxsPMUu24C9VfVMVX0bOMQiny0rSRqP05qjT7KJ/geF39c1vSPJA0luTnJ217YReHTBboc59S8GSdIIpaoG65isB/4L8IGq+kySGeBJoID3Axuq6peT3AR8qar+oNtvF3BnVX36hONtB7YDzMzMXLx3796hCpifn2f9+vVD7TutrHl1sObxOfjY0bGf87gLzlwzdM1btmw5UFWzS/UbZI6eJGcAnwY+UVWfAaiq7y7Y/lHg9m71MHD+gt3PAx4/8ZhVtRPYCTA7O1tzc3ODDOXH9Ho9ht13Wlnz6mDN43PtjjvGfs7jdm9dN/KaB7nrJsAu4OGq+t0F7RsWdPsF4MFueR9wVZIXJLkA2Ax8eeWGLEk6HYNc0f8s8FbgYJL7u7bfAK5OchH9qZtHgF8FqKqHktwKfJ3+HTvXe8eNJE3OkkFfVV8EssimO0+xzweADyxjXJKkFeIrYyWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxg3xm7PlJ7knycJKHkryra39JkruSfLP7enbXniQfTnIoyQNJXjvqIiRJJzfIFf0x4IaqeiVwKXB9kguBHcDdVbUZuLtbB3gT/Q8E3wxsBz6y4qOWJA1syaCvqiNV9ZVu+QfAw8BGYBuwp+u2B7iiW94GfKz67gXOSrJhxUcuSRrIac3RJ9kEvAa4D5ipqiPQ/2UAnNt12wg8umC3w12bJGkC1g7aMcl64NPAP6qq/5XkpF0XaatFjred/tQOMzMz9Hq9QYfyV8zPzw+977Sy5tXBmsfnhlcfG/s5jxtHzQMFfZIz6If8J6rqM13zd5NsqKoj3dTME137YeD8BbufBzx+4jGraiewE2B2drbm5uaGKqDX6zHsvtPKmlcHax6fa3fcMfZzHrd767qR1zzIXTcBdgEPV9XvLti0D7imW74GuG1B+9u6u28uBY4en+KRJI3fIFf0Pwu8FTiY5P6u7TeADwK3JrkO+A5wZbftTuBy4BDwNPD2FR2xJOm0LBn0VfVFFp93B7hskf4FXL/McUmSVoivjJWkxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGDfKZsTcneSLJgwva3pfksST3d4/LF2x7T5JDSb6R5I2jGrgkaTCDXNHvBrYu0n5jVV3UPe4ESHIhcBXw090+/y7JmpUarCTp9C0Z9FX1BeB7Ax5vG7C3qp6pqm/T/4DwS5YxPknSMi1njv4dSR7opnbO7to2Ao8u6HO4a5MkTUiqaulOySbg9qp6Vbc+AzwJFPB+YENV/XKSm4AvVdUfdP12AXdW1acXOeZ2YDvAzMzMxXv37h2qgPn5edavXz/UvtPKmlcHax6fg48dHfs5j7vgzDVD17xly5YDVTW7VL+1wxy8qr57fDnJR4Hbu9XDwPkLup4HPH6SY+wEdgLMzs7W3NzcMEOh1+sx7L7TyppXB2sen2t33DH2cx63e+u6kdc81NRNkg0LVn8BOH5Hzj7gqiQvSHIBsBn48vKGKElajiWv6JN8EpgDzklyGHgvMJfkIvpTN48AvwpQVQ8luRX4OnAMuL6qnh3N0CVJg1gy6Kvq6kWad52i/weADyxnUJJWp4OPHZ3oNEqrfGWsJDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGLRn0SW5O8kSSBxe0vSTJXUm+2X09u2tPkg8nOZTkgSSvHeXgJUlLG+SKfjew9YS2HcDdVbUZuLtbB3gTsLl7bAc+sjLDlCQNa8mgr6ovAN87oXkbsKdb3gNcsaD9Y9V3L3BWkg0rNVhJ0ulbO+R+M1V1BKCqjiQ5t2vfCDy6oN/hru3IiQdIsp3+VT8zMzP0er2hBjI/Pz/0vtPKmleH1VjzzIvghlcfm/Qwxmoc3+dhg/5kskhbLdaxqnYCOwFmZ2drbm5uqBP2ej2G3XdaWfPqsBpr/refuI0PHVzpWHpu27113ci/z8PedfPd41My3dcnuvbDwPkL+p0HPD788CRJyzVs0O8DrumWrwFuW9D+tu7um0uBo8eneCRJk7Hk30hJPgnMAeckOQy8F/ggcGuS64DvAFd23e8ELgcOAU8Dbx/BmCVJp2HJoK+qq0+y6bJF+hZw/XIHJUlaOb4yVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhq3rI9bT/II8APgWeBYVc0meQlwC7AJeAT4xap6annDlDQum3bcMbFz3/DqiZ26aStxRb+lqi6qqtlufQdwd1VtBu7u1iVJEzKKqZttwJ5ueQ9wxQjOIUka0HKDvoD/lORAku1d20xVHQHovp67zHNIkpYhVTX8zslPVNXjSc4F7gLeCeyrqrMW9Hmqqs5eZN/twHaAmZmZi/fu3TvUGObn51m/fv1Q+04ra14dJlXzwceOjv2cx828CL77vyd2+om44Mw1Q3+ft2zZcmDBtPlJLSvo/8qBkvcB88CvAHNVdSTJBqBXVT91qn1nZ2dr//79Q5231+sxNzc31L7TyppXh0nVPNknY4/xoYPLukdk6uzeum7o73OSgYJ+6KmbJOuSvPj4MvBzwIPAPuCarts1wG3DnkOStHzL+dU5A3w2yfHj/GFV/cckfwrcmuQ64DvAlcsfpiRpWEMHfVV9C/iZRdr/ArhsOYOSJK0cXxkrSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXFT/zZxBx87yrUTere9Rz748xM5r1aHSf5sqy1e0UtS4wx6SWqcQS9JjZv6OXpp1Cb1iUs3vHoip1WDDPplmFwAHPMJaEkDM+g1FbwDRRreyII+yVbg3wBrgN+vqg+O6lwaH6cxpOkzkidjk6wBbgLeBFwIXJ3kwlGcS5J0aqO66+YS4FBVfauq/g+wF9g2onNJkk5hVEG/EXh0wfrhrk2SNGapqpU/aHIl8Maq+vvd+luBS6rqnQv6bAe2d6s/BXxjyNOdAzy5jOFOI2teHax5dVhOzT9ZVS9bqtOonow9DJy/YP084PGFHapqJ7BzuSdKsr+qZpd7nGlizauDNa8O46h5VFM3fwpsTnJBkucDVwH7RnQuSdIpjOSKvqqOJXkH8Dn6t1feXFUPjeJckqRTG9l99FV1J3DnqI6/wLKnf6aQNa8O1rw6jLzmkTwZK0l67vDdKyWpcVMT9Em2JvlGkkNJdiyy/QVJbum235dk0/hHubIGqPmfJPl6kgeS3J3kJycxzpW0VM0L+r0lSSWZ+js0Bqk5yS923+uHkvzhuMe40gb42f4bSe5J8tXu5/vySYxzpSS5OckTSR48yfYk+XD37/FAkteu6ACq6jn/oP+E7p8BfxN4PvA14MIT+vxD4Pe65auAWyY97jHUvAX4a93yr62Gmrt+Lwa+ANwLzE563GP4Pm8Gvgqc3a2fO+lxj6HmncCvdcsXAo9MetzLrPlvA68FHjzJ9suBPwECXArct5Lnn5Yr+kHeUmEbsKdb/hRwWZKMcYwrbcmaq+qeqnq6W72X/usVptmgb53xfuBfAD8a5+BGZJCafwW4qaqeAqiqJ8Y8xpU2SM0F/PVu+UxOeB3OtKmqLwDfO0WXbcDHqu9e4KwkG1bq/NMS9IO8pcL/71NVx4CjwEvHMrrRON23kbiO/hXBNFuy5iSvAc6vqtvHObARGuT7/ArgFUn+W5J7u3eGnWaD1Pw+4JeSHKZ/9947adtI3zZmWt6PfrEr8xNvFxqkzzQZuJ4kvwTMAn9npCMavVPWnOR5wI3AteMa0BgM8n1eS3/6Zo7+X23/Ncmrqur7Ix7bqAxS89XA7qr6UJLXAR/vav7L0Q9vIkaaX9NyRb/kWyos7JNkLf0/9071p9Jz3SA1k+QNwD8D3lxVz4xpbKOyVM0vBl4F9JI8Qn8uc9+UPyE76M/2bVX1f6vq2/TfF2rzmMY3CoPUfB1wK0BVfQl4If33hGnVQP/fhzUtQT/IWyrsA67plt8CfL66Zzmm1JI1d9MY/55+yE/7vC0sUXNVHa2qc6pqU1Vtov+8xJurav9khrsiBvnZ/mP6T7yT5Bz6UznfGusoV9YgNX8HuAwgySvpB/3/HOsox2sf8Lbu7ptLgaNVdWSlDj4VUzd1krdUSPJbwP6q2gfsov/n3SH6V/JXTW7Eyzdgzf8SWA/8Ufe883eq6s0TG/QyDVhzUwas+XPAzyX5OvAs8OtV9ReTG/XyDFjzDcBHk/xj+lMY107zhVuST9Kfejune97hvcAZAFX1e/Sfh7gcOAQ8Dbx9Rc8/xf92kqQBTMvUjSRpSAa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mN+39nnzZ8P5d8qgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"unq_genes.loc[list(roi_genes)]['ratio'].hist()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f32415e0198>"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEk5JREFUeJzt3X+QXWV9x/H3VyJqiZJgZCeTpK6O0erAqLBD4zBjN8ZaiB3CH9DBQYlM2sxYytjKVGn7h/01U6xDqTCOdivW4EQDpbXJIOowgR1Lp6EmRQk/dFgxhW3SRE2IXUEt+u0f94mzhk3u2ftjb/bZ92tm557znOee83w3y2effe69h8hMJEn1esGgByBJ6i+DXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5RYMeAMCyZctyeHi4o+f+8Ic/5IwzzujtgE5x1rwwWPPC0E3Ne/bs+V5mvqJdv1Mi6IeHh9m9e3dHzx0fH2d0dLS3AzrFWfPCYM0LQzc1R8R/Nenn0o0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUaBX1ELImIOyPimxHxWES8JSLOioh7IuLx8ri09I2IuDkiJiLioYg4r78lSJJOpumM/mPAlzPzV4A3Ao8B1wM7M3M1sLPsA1wMrC5fm4FP9HTEkqRZaRv0EfEy4K3ArQCZ+ZPMfBrYAGwp3bYAl5btDcBt2bILWBIRy3s+cklSI01m9K8Gvgv8Q0Q8GBGfiogzgKHMPABQHs8u/VcAT017/mRpkyQNQLT7n4NHxAiwC7gwMx+IiI8BPwCuzcwl0/odycylEfFF4K8y8/7SvhP4YGbuOe68m2kt7TA0NHT+tm3bOirg0OGjHHy2o6d27dwVZw7kulNTUyxevHgg1x4Ua14YrHl21q5duyczR9r1a3ILhElgMjMfKPt30lqPPxgRyzPzQFmaOTSt/6ppz18J7D/+pJk5BowBjIyMZKcfAb5l63Zu3DuYOznsu3J0INf1Y+ILgzUvDHNRc9ulm8z8H+CpiHhdaVoHPArsADaWto3A9rK9A7iqvPtmDXD02BKPJGnuNZ0KXwtsjYjTgSeAq2n9krgjIjYBTwKXl753A+uBCeCZ0leSNCCNgj4zvw7MtA60boa+CVzT5bgkST3iJ2MlqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVrlHQR8S+iNgbEV+PiN2l7ayIuCciHi+PS0t7RMTNETEREQ9FxHn9LECSdHKzmdGvzcw3ZeZI2b8e2JmZq4GdZR/gYmB1+doMfKJXg5UkzV43SzcbgC1lewtw6bT227JlF7AkIpZ3cR1JUhciM9t3ivgOcARI4O8ycywins7MJdP6HMnMpRFxF3BDZt5f2ncCH8rM3cedczOtGT9DQ0Pnb9u2raMCDh0+ysFnO3pq185dceZArjs1NcXixYsHcu1BseaFwZpnZ+3atXumrbKc0KKG57swM/dHxNnAPRHxzZP0jRnanvfbJDPHgDGAkZGRHB0dbTiUX3TL1u3cuLdpGb2178rRgVx3fHycTr9f85U1LwzW3B+Nlm4yc395PAR8AbgAOHhsSaY8HirdJ4FV056+EtjfqwFLkmanbdBHxBkR8dJj28A7gIeBHcDG0m0jsL1s7wCuKu++WQMczcwDPR+5JKmRJmseQ8AXIuJY/89l5pcj4mvAHRGxCXgSuLz0vxtYD0wAzwBX93zUkqTG2gZ9Zj4BvHGG9u8D62ZoT+CanoxOktQ1PxkrSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZVrHPQRcVpEPBgRd5X9V0XEAxHxeETcHhGnl/YXlf2Jcny4P0OXJDUxmxn9+4HHpu1/BLgpM1cDR4BNpX0TcCQzXwPcVPpJkgakUdBHxErgncCnyn4AbwPuLF22AJeW7Q1ln3J8XekvSRqApjP6vwU+CPys7L8ceDoznyv7k8CKsr0CeAqgHD9a+kuSBmBRuw4R8ZvAoczcExGjx5pn6JoNjk0/72ZgM8DQ0BDj4+NNxvs8Qy+B6859rn3HPuh0zN2ampoa2LUHxZoXBmvuj7ZBD1wIXBIR64EXAy+jNcNfEhGLyqx9JbC/9J8EVgGTEbEIOBM4fPxJM3MMGAMYGRnJ0dHRjgq4Zet2btzbpIze23fl6ECuOz4+Tqffr/nKmhcGa+6Ptks3mflHmbkyM4eBK4B7M/NK4D7gstJtI7C9bO8o+5Tj92bm82b0kqS50c376D8EfCAiJmitwd9a2m8FXl7aPwBc390QJUndmNWaR2aOA+Nl+wngghn6/Ai4vAdjkyT1gJ+MlaTKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVLm2QR8RL46I/4iIb0TEIxHxZ6X9VRHxQEQ8HhG3R8Tppf1FZX+iHB/ubwmSpJNpMqP/MfC2zHwj8CbgoohYA3wEuCkzVwNHgE2l/ybgSGa+Brip9JMkDUjboM+WqbL7wvKVwNuAO0v7FuDSsr2h7FOOr4uI6NmIJUmzEpnZvlPEacAe4DXAx4GPArvKrJ2IWAV8KTPPiYiHgYsyc7Ic+zbwq5n5vePOuRnYDDA0NHT+tm3bOirg0OGjHHy2o6d27dwVZw7kulNTUyxevHgg1x4Ua14YrHl21q5duyczR9r1W9TkZJn5U+BNEbEE+ALw+pm6lceZZu/P+22SmWPAGMDIyEiOjo42Gcrz3LJ1OzfubVRGz+27cnQg1x0fH6fT79d8Zc0LgzX3x6zedZOZTwPjwBpgSUQcS9iVwP6yPQmsAijHzwQO92KwkqTZa/Kum1eUmTwR8RLg7cBjwH3AZaXbRmB72d5R9inH780m60OSpL5osuaxHNhS1ulfANyRmXdFxKPAtoj4S+BB4NbS/1bgsxExQWsmf0Ufxi1Jaqht0GfmQ8CbZ2h/ArhghvYfAZf3ZHSSpK75yVhJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqlzboI+IVRFxX0Q8FhGPRMT7S/tZEXFPRDxeHpeW9oiImyNiIiIeiojz+l2EJOnEmszonwOuy8zXA2uAayLiDcD1wM7MXA3sLPsAFwOry9dm4BM9H7UkqbG2QZ+ZBzLzP8v2/wKPASuADcCW0m0LcGnZ3gDcli27gCURsbznI5ckNTKrNfqIGAbeDDwADGXmAWj9MgDOLt1WAE9Ne9pkaZMkDcCiph0jYjHwT8DvZ+YPIuKEXWdoyxnOt5nW0g5DQ0OMj483HcovGHoJXHfucx09t1udjrlbU1NTA7v2oFjzwmDN/dEo6CPihbRCfmtm/nNpPhgRyzPzQFmaOVTaJ4FV056+Eth//DkzcwwYAxgZGcnR0dGOCrhl63Zu3Nv491VP7btydCDXHR8fp9Pv13xlzQuDNfdHk3fdBHAr8Fhm/s20QzuAjWV7I7B9WvtV5d03a4Cjx5Z4JElzr8lU+ELgPcDeiPh6aftj4AbgjojYBDwJXF6O3Q2sByaAZ4CrezpiSdKstA36zLyfmdfdAdbN0D+Ba7oclySpR/xkrCRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMq1DfqI+HREHIqIh6e1nRUR90TE4+VxaWmPiLg5IiYi4qGIOK+fg5cktddkRv8Z4KLj2q4HdmbmamBn2Qe4GFhdvjYDn+jNMCVJnWob9Jn5VeDwcc0bgC1lewtw6bT227JlF7AkIpb3arCSpNmLzGzfKWIYuCszzyn7T2fmkmnHj2Tm0oi4C7ghM+8v7TuBD2Xm7hnOuZnWrJ+hoaHzt23b1lEBhw4f5eCzHT21a+euOHMg152ammLx4sUDufagWPPCYM2zs3bt2j2ZOdKu36KOzn5iMUPbjL9JMnMMGAMYGRnJ0dHRji54y9bt3Li312U0s+/K0YFcd3x8nE6/X/OVNS8M1twfnb7r5uCxJZnyeKi0TwKrpvVbCezvfHiSpG51GvQ7gI1leyOwfVr7VeXdN2uAo5l5oMsxSpK60HbNIyI+D4wCyyJiEvgwcANwR0RsAp4ELi/d7wbWAxPAM8DVfRizJGkW2gZ9Zr7rBIfWzdA3gWu6HZQkqXcG8ypmJYav/+JArvuZi84YyHUlzU/eAkGSKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5bwFgmbF2z5I848zekmqnDP6eWjvfx/lvQOaWUuafwx6zQuD+uW274Z3zvk1pV5z6UaSKueMXjqJQb34DL4Ard5xRi9JlTPoJalyLt1IpyhfgFavOKOXpMo5o5f0C3wBuj7O6CWpcs7oJZ0yfF2iP5zRS1Ll+hL0EXFRRHwrIiYi4vp+XEOS1EzPl24i4jTg48CvA5PA1yJiR2Y+2utrSVIv1P4CdD9m9BcAE5n5RGb+BNgGbOjDdSRJDfQj6FcAT03bnyxtkqQBiMzs7QkjLgd+IzN/u+y/B7ggM689rt9mYHPZfR3wrQ4vuQz4XofPna+seWGw5oWhm5pfmZmvaNepH2+vnARWTdtfCew/vlNmjgFj3V4sInZn5ki355lPrHlhsOaFYS5q7sfSzdeA1RHxqog4HbgC2NGH60iSGuj5jD4zn4uI3wO+ApwGfDozH+n1dSRJzfTlk7GZeTdwdz/OPYOul3/mIWteGKx5Yeh7zT1/MVaSdGrxFgiSVLl5E/TtbqsQES+KiNvL8QciYnjuR9lbDWr+QEQ8GhEPRcTOiHjlIMbZS01vnxERl0VERsS8f4dGk5oj4rfKv/UjEfG5uR5jrzX42f7liLgvIh4sP9/rBzHOXomIT0fEoYh4+ATHIyJuLt+PhyLivJ4OIDNP+S9aL+p+G3g1cDrwDeANx/X5XeCTZfsK4PZBj3sOal4L/FLZft9CqLn0eynwVWAXMDLocc/Bv/Nq4EFgadk/e9DjnoOax4D3le03APsGPe4ua34rcB7w8AmOrwe+BASwBnigl9efLzP6JrdV2ABsKdt3AusiIuZwjL3WtubMvC8znym7u2h9ZmE+a3r7jL8A/hr40VwOrk+a1Pw7wMcz8whAZh6a4zH2WpOaE3hZ2T6TGT6LM59k5leBwyfpsgG4LVt2AUsiYnmvrj9fgr7JbRV+3icznwOOAi+fk9H1x2xvJbGJ1oxgPmtbc0S8GViVmXfN5cD6qMm/82uB10bEv0XEroi4aM5G1x9Nav5T4N0RMUnrHXzXUre+3jpmvvyPR2aamR//dqEmfeaTxvVExLuBEeDX+jqi/jtpzRHxAuAm4L1zNaA50OTfeRGt5ZtRWn+1/WtEnJOZT/d5bP3SpOZ3AZ/JzBsj4i3AZ0vNP+v/8Aair/k1X2b0TW6r8PM+EbGI1p97J/tT6VTX6FYSEfF24E+ASzLzx3M0tn5pV/NLgXOA8YjYR2stc8c8f0G26c/29sz8v8z8Dq37Qq2eo/H1Q5OaNwF3AGTmvwMvpnVPmFo1+u+9U/Ml6JvcVmEHsLFsXwbcm+VVjnmqbc1lGePvaIX8fF+3hTY1Z+bRzFyWmcOZOUzrdYlLMnP3YIbbE01+tv+F1gvvRMQyWks5T8zpKHurSc1PAusAIuL1tIL+u3M6yrm1A7iqvPtmDXA0Mw/06uTzYukmT3BbhYj4c2B3Zu4AbqX1590ErZn8FYMbcfca1vxRYDHwj+V15ycz85KBDbpLDWuuSsOavwK8IyIeBX4K/GFmfn9wo+5Ow5qvA/4+Iv6A1hLGe+fzxC0iPk9r6W1Zed3hw8ALATLzk7Reh1gPTADPAFf39Prz+HsnSWpgvizdSJI6ZNBLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5/wfd8L1BcIQPxAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"unq_txps.loc[list(roi_txps)]['ratio'].hist()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#sampling roi number of txps from full txome 1000 times\n",
"import random\n",
"nulls = []\n",
"for _ in range(1000):\n",
" null_txps = set([])\n",
" while True:\n",
" if len(null_txps) == len(roi_txps):\n",
" break\n",
"\n",
" null_txps.add(random.choice(unq_txps.index))\n",
" nulls.append(null_txps)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADdNJREFUeJzt3W2MpeVdx/HvryxQTStQdmjI7upguk0gTSxkQ9Y00QqN4cGwvAADsbIlGzepaKo0KuqL+vQCNEpD0rSuQro0WqBoZEMxDeEhqBF0kBZ5CGGLCBtIdyqw2pBWaf++OBd1XAbmnp1z5nCu/X6Sybnv677m3P9rZ/a397nuh01VIUnq1zumXYAkabIMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnNky7AICNGzfW/Pz8tMuQpJny8MMPf7Oq5lbq97YI+vn5eRYWFqZdhiTNlCT/PqSfUzeS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktS5t8WdsWsxf82Xp7bvZ6+9cGr7lqShPKKXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6tzgoE9yTJJHktzZ1k9L8lCSp5PcmuS41n58W9/fts9PpnRJ0hCrOaL/BPDkkvXrgOuraivwMrCrte8CXq6q9wHXt36SpCkZFPRJNgMXAn/e1gOcA9zeuuwFLm7LO9o6bfu5rb8kaQqGHtF/Gvh14Htt/WTglap6ra0fADa15U3A8wBt+6HWX5I0BSsGfZKfAQ5W1cNLm5fpWgO2LX3f3UkWkiwsLi4OKlaStHpDjug/BFyU5FngFkZTNp8GTkyyofXZDLzQlg8AWwDa9hOAlw5/06raU1Xbqmrb3NzcmgYhSXpzKwZ9Vf1mVW2uqnngMuDeqvo54D7gktZtJ3BHW97X1mnb762qNxzRS5LWx1quo/8N4Ook+xnNwd/Y2m8ETm7tVwPXrK1ESdJabFi5y/+pqvuB+9vyM8DZy/T5NnDpGGqTJI2Bd8ZKUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMrBn2Sdyb5pyRfS/J4kt9t7acleSjJ00luTXJcaz++re9v2+cnOwRJ0lsZckT/HeCcqvox4IPAeUm2A9cB11fVVuBlYFfrvwt4uareB1zf+kmSpmTFoK+Rb7XVY9tXAecAt7f2vcDFbXlHW6dtPzdJxlaxJGlVBs3RJzkmyVeBg8DdwNeBV6rqtdblALCpLW8Cngdo2w8BJ4+zaEnScIOCvqq+W1UfBDYDZwOnL9etvS539F6HNyTZnWQhycLi4uLQeiVJq7Sqq26q6hXgfmA7cGKSDW3TZuCFtnwA2ALQtp8AvLTMe+2pqm1VtW1ubu7IqpckrWjIVTdzSU5syz8AfAR4ErgPuKR12wnc0Zb3tXXa9nur6g1H9JKk9bFh5S6cCuxNcgyjfxhuq6o7kzwB3JLkD4BHgBtb/xuBLyTZz+hI/rIJ1C1JGmjFoK+qR4Ezl2l/htF8/eHt3wYuHUt1kqQ1885YSeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzq0Y9Em2JLkvyZNJHk/yidb+niR3J3m6vZ7U2pPkhiT7kzya5KxJD0KS9OaGHNG/Bnyyqk4HtgNXJTkDuAa4p6q2Ave0dYDzga3tazfw2bFXLUkabMWgr6oXq+pf2vJ/AU8Cm4AdwN7WbS9wcVveAdxcIw8CJyY5deyVS5IGWdUcfZJ54EzgIeC9VfUijP4xAE5p3TYBzy/5tgOt7fD32p1kIcnC4uLi6iuXJA0yOOiTvAv4K+BXquo/36rrMm31hoaqPVW1raq2zc3NDS1DkrRKg4I+ybGMQv4vquqvW/M3Xp+Saa8HW/sBYMuSb98MvDCeciVJqzXkqpsANwJPVtWfLNm0D9jZlncCdyxpv6JdfbMdOPT6FI8kaf1tGNDnQ8DPA/+a5Kut7beAa4HbkuwCngMubdvuAi4A9gOvAleOtWJJ0qqsGPRV9fcsP+8OcO4y/Qu4ao11SZLGxDtjJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktS5FYM+yU1JDiZ5bEnbe5LcneTp9npSa0+SG5LsT/JokrMmWbwkaWVDjug/D5x3WNs1wD1VtRW4p60DnA9sbV+7gc+Op0xJ0pFaMeir6gHgpcOadwB72/Je4OIl7TfXyIPAiUlOHVexkqTV23CE3/feqnoRoKpeTHJKa98EPL+k34HW9uKRlyhJkzV/zZentu9nr71w4vsY98nYLNNWy3ZMdidZSLKwuLg45jIkSa870qD/xutTMu31YGs/AGxZ0m8z8MJyb1BVe6pqW1Vtm5ubO8IyJEkrOdKg3wfsbMs7gTuWtF/Rrr7ZDhx6fYpHkjQdK87RJ/ki8GFgY5IDwKeAa4HbkuwCngMubd3vAi4A9gOvAldOoGZJ0iqsGPRVdfmbbDp3mb4FXLXWoiRJ4+OdsZLUOYNekjpn0EtS5470hikxvZss1uMGC0n98Ihekjpn0EtS5wx6Seqcc/RaFc9LSLPHI3pJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6Seqcd8bOoGndnSppNnlEL0mdM+glqXMGvSR1zqCXpM4Z9JLUOa+6kd7CNK9w8hn8GheDXjPhaLyk1P/kReNi0Ev6f/wU0x+DXtLbxtH4yW09eDJWkjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6N5GgT3JekqeS7E9yzST2IUkaZuxBn+QY4DPA+cAZwOVJzhj3fiRJw0ziiP5sYH9VPVNV/w3cAuyYwH4kSQNMIug3Ac8vWT/Q2iRJUzCJp1dmmbZ6Q6dkN7C7rX4ryVNHuL+NwDeP8HtnlWM+Ojjmo0CuW9OYf2RIp0kE/QFgy5L1zcALh3eqqj3AnrXuLMlCVW1b6/vMEsd8dHDMR4f1GPMkpm7+Gdia5LQkxwGXAfsmsB9J0gBjP6KvqteS/BLwFeAY4Kaqenzc+5EkDTOR/2Gqqu4C7prEey9jzdM/M8gxHx0c89Fh4mNO1RvOk0qSOuIjECSpczMT9Cs9ViHJ8UlubdsfSjK//lWO14AxX53kiSSPJrknyaBLrd7Ohj4+I8klSSrJzF+hMWTMSX62/awfT/KX613juA343f7hJPcleaT9fl8wjTrHJclNSQ4meexNtifJDe3P49EkZ421gKp6238xOqn7deBHgeOArwFnHNbnF4HPteXLgFunXfc6jPmngB9syx8/Gsbc+r0beAB4ENg27brX4ee8FXgEOKmtnzLtutdhzHuAj7flM4Bnp133Gsf8E8BZwGNvsv0C4G8Z3Ye0HXhonPuflSP6IY9V2AHsbcu3A+cmWe7mrVmx4pir6r6qerWtPsjonoVZNvTxGb8P/CHw7fUsbkKGjPkXgM9U1csAVXVwnWsctyFjLuCH2vIJLHMvziypqgeAl96iyw7g5hp5EDgxyanj2v+sBP2Qxyp8v09VvQYcAk5el+omY7WPktjF6Ihglq045iRnAluq6s71LGyChvyc3w+8P8k/JHkwyXnrVt1kDBnz7wAfTXKA0RV8v7w+pU3NRB8dM5HLKydgyGMVBj16YYYMHk+SjwLbgJ+caEWT95ZjTvIO4HrgY+tV0DoY8nPewGj65sOMPrX9XZIPVNUrE65tUoaM+XLg81X1x0l+HPhCG/P3Jl/eVEw0v2bliH7IYxW+3yfJBkYf997qo9Lb3aBHSST5CPDbwEVV9Z11qm1SVhrzu4EPAPcneZbRXOa+GT8hO/R3+46q+p+q+jfgKUbBP6uGjHkXcBtAVf0j8E5Gz8Hp1aC/70dqVoJ+yGMV9gE72/IlwL3VznLMqBXH3KYx/pRRyM/6vC2sMOaqOlRVG6tqvqrmGZ2XuKiqFqZT7lgM+d3+G0Yn3kmykdFUzjPrWuV4DRnzc8C5AElOZxT0i+ta5fraB1zRrr7ZDhyqqhfH9eYzMXVTb/JYhSS/ByxU1T7gRkYf7/YzOpK/bHoVr93AMf8R8C7gS+2883NVddHUil6jgWPuysAxfwX46SRPAN8Ffq2q/mN6Va/NwDF/EvizJL/KaArjY7N84Jbki4ym3ja28w6fAo4FqKrPMToPcQGwH3gVuHKs+5/hPztJ0gCzMnUjSTpCBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ37X8ULYZAiBQb9AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#histogram of null\n",
"counts, _, _ = plt.hist(unq_txps.loc[list(null_txps)]['ratio'])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 1/1000 [00:00<01:54, 8.75it/s]/home/avi/miniconda3/envs/zero/lib/python3.6/site-packages/numpy/lib/histograms.py:754: RuntimeWarning: invalid value encountered in greater_equal\n",
" keep = (tmp_a >= first_edge)\n",
"/home/avi/miniconda3/envs/zero/lib/python3.6/site-packages/numpy/lib/histograms.py:755: RuntimeWarning: invalid value encountered in less_equal\n",
" keep &= (tmp_a <= last_edge)\n",
"100%|██████████| 1000/1000 [03:46<00:00, 1.44it/s]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAERJJREFUeJzt3X+sX3V9x/HnWwriprNACyNt2WVaE4gGJDesxqHSGiPVUBJh1vijkGYVh8YJYXa/YjdNhiMOR6bUTgiFTCmTKQ2rc6SU4BbLvAzkhx3hikhvirYqVA1Rh773x/dTd6W3vefe74/T7+c+H8nN95zP+fS+Px96efXczznf843MRJJUrxe0PQBJUn8Z9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKzWt7AAALFizIkZGRtochSUPlvvvu+35mLpyu3xER9CMjI4yNjbU9DEkaKhHxnSb9Gi3dRMQTEfFQRDwQEWOl7fiIuDMiHiuvx5X2iIhrI2I8Ih6MiLNmPw1JUrdmskZ/bmaemZmjZX89sD0zlwLbyz7AecDS8rUOuK5Xg5UkzVw3F2NXAZvL9mbggkntN2XHTmB+RJzcRR1JUheaBn0C/x4R90XEutJ2UmY+BVBeTyzti4Ddk/7sRGn7NRGxLiLGImJs3759sxu9JGlaTS/GvjYz90TEicCdEfE/h+kbU7Qd9ND7zNwEbAIYHR31ofiS1CeNzugzc0953Qt8ETgb+N6BJZnyurd0nwCWTPrji4E9vRqwJGlmpg36iPjNiHjJgW3gTcDDwFZgTem2Bri9bG8F3lPuvlkG7D+wxCNJGrwmSzcnAV+MiAP9P5eZ/xYRXwdujYi1wJPARaX/NmAlMA48C1zS81FLkhqbNugz83HgjCnafwCsmKI9gct6MjpJUteOiHfGduNTl97VWu3LNi5vrbYkNTX0Qb/87jZ/edjVYm1Jambog37bGS9rrfZprVWWpOZ8TLEkVc6gl6TKDf3SzbHHXd72ECTpiOYZvSRVzqCXpMoN/dKNt1dK0uF5Ri9JlTPoJalyQ7904xumJOnwPKOXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyQ38f/dWXfqy12le0VlmSmvOMXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXKNgz4ijoqI+yPijrJ/akTcGxGPRcSWiDimtL+w7I+X4yP9GbokqYmZnNF/ENg1af/jwDWZuRR4Glhb2tcCT2fmy4FrSj9JUksaBX1ELAbeAny27AewHPhC6bIZuKBsryr7lOMrSn9JUguantF/EvgT4Jdl/wTgmcx8ruxPAIvK9iJgN0A5vr/0lyS1YNqgj4i3Ansz877JzVN0zQbHJn/fdRExFhFj+/btazRYSdLMNTmjfy1wfkQ8AdxCZ8nmk8D8iDjwUYSLgT1lewJYAlCOvxT44fO/aWZuyszRzBxduHBhV5OQJB3atEGfmX+amYszcwRYDdyVme8EdgAXlm5rgNvL9tayTzl+V2YedEYvSRqMbu6j/zBweUSM01mDv760Xw+cUNovB9Z3N0RJUjfmTd/l/2Xm3cDdZftx4Owp+vwUuKgHY5Mk9YDvjJWkyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUuWmDPiKOjYj/iohvRMQjEfFXpf3UiLg3Ih6LiC0RcUxpf2HZHy/HR/o7BUnS4TQ5o/8ZsDwzzwDOBN4cEcuAjwPXZOZS4Glgbem/Fng6M18OXFP6SZJaMm3QZ8dPyu7R5SuB5cAXSvtm4IKyvarsU46viIjo2YglSTPSaI0+Io6KiAeAvcCdwLeAZzLzudJlAlhUthcBuwHK8f3ACVN8z3URMRYRY/v27etuFpKkQ2oU9Jn5i8w8E1gMnA2cNlW38jrV2Xse1JC5KTNHM3N04cKFTccrSZqhGd11k5nPAHcDy4D5ETGvHFoM7CnbE8ASgHL8pcAPezFYSdLMNbnrZmFEzC/bLwLeCOwCdgAXlm5rgNvL9tayTzl+V2YedEYvSRqMedN34WRgc0QcRecfhlsz846I+CZwS0R8DLgfuL70vx64OSLG6ZzJr+7DuCVJDU0b9Jn5IPDqKdofp7Ne//z2nwIX9WR0kqSu+c5YSaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmq3LRBHxFLImJHROyKiEci4oOl/fiIuDMiHiuvx5X2iIhrI2I8Ih6MiLP6PQlJ0qE1OaN/DrgiM08DlgGXRcTpwHpge2YuBbaXfYDzgKXlax1wXc9HLUlqbNqgz8ynMvO/y/aPgV3AImAVsLl02wxcULZXATdlx05gfkSc3PORS5IamdEafUSMAK8G7gVOysynoPOPAXBi6bYI2D3pj02UNklSCxoHfUS8GLgN+OPM/NHhuk7RllN8v3URMRYRY/v27Ws6DEnSDDUK+og4mk7I/1Nm/ktp/t6BJZnyure0TwBLJv3xxcCe53/PzNyUmaOZObpw4cLZjl+SNI0md90EcD2wKzP/btKhrcCasr0GuH1S+3vK3TfLgP0HlngkSYM3r0Gf1wLvBh6KiAdK258BVwG3RsRa4EngonJsG7ASGAeeBS7p6YglSTMybdBn5n8w9bo7wIop+idwWZfjkiT1iO+MlaTKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqlyTxxRLUtVG1v9ra7WfuOotfa/hGb0kVc6gl6TKuXQjac77wLeva7G6SzeSpC4Z9JJUOZduJM15N678Tmu1rxhADYNe0pz35V2fbnsIfWXQS5rzfp8ftVb7iQHUMOglzXlXPvOitofQVwa9pDlv1fyj2x5CX3nXjSRVzqCXpMq5dCNpzvvssdtbq72Bc/pewzN6SaqcQS9JlXPppgsbNmyYU3UlDSfP6CWpcp7Rd+G2U29rpe4GNrRSV6rVxtdf0FrtDQOoYdB34da/ea6dwmvaKStpOBn0XTht9Z62hyBJ05p2jT4iboiIvRHx8KS24yPizoh4rLweV9ojIq6NiPGIeDAizurn4CVJ02tyRn8j8A/ATZPa1gPbM/OqiFhf9j8MnAcsLV+/B1xXXqu0gQ+1VFeSmps26DPznogYeV7zKuANZXszcDedoF8F3JSZCeyMiPkRcXJmPtWrAR9JznndzS1V3tBSXUnDaLZr9CcdCO/MfCoiTizti4Ddk/pNlLaDgj4i1gHrAE455ZRZDqNd74x27rr5bitVJQ2rXl+MjSnacqqOmbkJ2AQwOjo6ZZ8j3ZUb/6Kdwufe0U5dSUNptkH/vQNLMhFxMrC3tE8ASyb1WwxUe2vKme/d1fYQJGlasw36rXTu5r6qvN4+qf39EXELnYuw+2tdn2/Tb+94oLXa3z33zNZqS5qdaYM+Ij5P58LrgoiYAD5CJ+BvjYi1wJPARaX7NmAlMA48C1zShzEfMdpao5ekmWhy1807DnFoxRR9E7is20ENi7bW6NtdMvpWi7UlzYbvjO3C20/9cCt1F9/z1lbqArC8vdKSZseg78Kjb7q4lbrn7W7vdtSHWqssabYM+iH05V2fbnsIkoaIz6OXpMp5Rt+Ftu66WXjau1upC/CQizfS0DHoh9ClX/v71mp/Yls7F4Kv2OK7gaXZculGkirnGf0Q+ujbj2+t9l9uuby12pJmx6DXjGx8zQdbqXuZ1wakWTPoNRTaer6Pz/ZRDQx6zUhbH4h+7nWtlJWq4MVYSaqcZ/SakT2f/nnbQ5A0Qwa9hsLYV37cSt3t+bJW6gKsWO6TQtUbLt1IUuUMekmqnEs3mpG2nu+z40uH+vyb/lpx7PdbqQvwqt2vaqXu2779tlbqAtx2akuf2nbKze3UHRCDXkNhy+q3t1J3A9e0Uhfau5V1y+pWyqqPDHrpCLXtjPYuBKsuBr2GwsbXX9BK3XOyxV/p2/xo4JZcvO13Wql79aWtlB0Yg146jLauSQBcSTsfPq/6GPTSEerM97ZzSv/Ve0Zbqav+MeilI1Rbv01cypdaqQtw9aUfa612zQx6Sb/mnNe1d11iI+1ci6mdQS/p17R5XUL94TtjJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZXrS9BHxJsj4tGIGI+I9f2oIUlqpudBHxFHAZ8CzgNOB94REaf3uo4kqZl+nNGfDYxn5uOZ+XPgFmBVH+pIkhroR9AvAnZP2p8obZKkFvTjoWYxRVse1CliHbCu7P4kIh6dZb0FQHuf4NwO5zw3OOc5ILqbc6OP5OpH0E8ASybtLwb2PL9TZm4CNnVbLCLGMnNOfVKCc54bnPPcMIg592Pp5uvA0og4NSKOAVYDW/tQR5LUQM/P6DPzuYh4P/AV4Cjghsx8pNd1JEnN9OWDRzJzG7CtH997Cl0v/wwh5zw3OOe5oe9zjsyDrpNKkiriIxAkqXJDE/TTPVYhIl4YEVvK8XsjYmTwo+ytBnO+PCK+GREPRsT2iGh0q9WRrOnjMyLiwojIiBj6OzSazDki/qD8XT8SEZ8b9Bh7rcHP9ikRsSMi7i8/3yvbGGevRMQNEbE3Ih4+xPGIiGvLf48HI+Ksng4gM4/4LzoXdb8F/C5wDPAN4PTn9fkjYGPZXg1saXvcA5jzucBvlO33zYU5l34vAe4BdgKjbY97AH/PS4H7gePK/oltj3sAc94EvK9snw480fa4u5zz64CzgIcPcXwl8GU670NaBtzby/rDckbf5LEKq4DNZfsLwIqImOrNW8Ni2jln5o7MfLbs7qTznoVh1vTxGR8F/hb46SAH1ydN5vyHwKcy82mAzNw74DH2WpM5J/BbZfulTPFenGGSmfcAPzxMl1XATdmxE5gfESf3qv6wBH2Txyr8qk9mPgfsB04YyOj6Y6aPklhL54xgmE0754h4NbAkM+8Y5MD6qMnf8yuAV0TEf0bEzoh488BG1x9N5rwBeFdETNC5g+8Dgxlaa/r66Ji+3F7ZB00eq9Do0QtDpPF8IuJdwCjw+r6OqP8OO+eIeAFwDXDxoAY0AE3+nufRWb55A53f2r4aEa/MzGf6PLZ+aTLndwA3ZuYnIuI1wM1lzr/s//Ba0df8GpYz+iaPVfhVn4iYR+fXvcP9qnSka/QoiYh4I/DnwPmZ+bMBja1fppvzS4BXAndHxBN01jK3DvkF2aY/27dn5v9m5reBR+kE/7BqMue1wK0Amfk14Fg6z4SpVaP/32drWIK+yWMVtgJryvaFwF1ZrnIMqWnnXJYxPkMn5Id93RammXNm7s/MBZk5kpkjdK5LnJ+ZY+0Mtyea/Gx/ic6FdyJiAZ2lnMcHOsreajLnJ4EVABFxGp2g3zfQUQ7WVuA95e6bZcD+zHyqV998KJZu8hCPVYiIvwbGMnMrcD2dX+/G6ZzJr25vxN1rOOergRcD/1yuOz+Zmee3NuguNZxzVRrO+SvAmyLim8AvgCsz8wftjbo7Ded8BfCPEfEhOksYFw/ziVtEfJ7O0tuCct3hI8DRAJm5kc51iJXAOPAscElP6w/xfztJUgPDsnQjSZolg16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMr9HyB7MizO6Mx0AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# for each of the 1000 null bin them into 10 bins based on txp uniqueness\n",
"counts = []\n",
"for null in tqdm(nulls):\n",
" count, _, _ = plt.hist(unq_txps.loc[null]['ratio'])\n",
" counts.append(count)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1000, 10)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(counts), len(counts[0])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"#taking mean of the counts in each bin\n",
"null = pd.DataFrame(counts).mean().values"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEClJREFUeJzt3X+snmV9x/H3Ryq6qbP8KIS0ZdVYN42JSk5IjYlT6wziQvkDFswclTRr4phx02xj2x/u1x+4ZUNJDK4TZzH+gLE5GmU6UiBuy2AeBiI/NFTG6EkZPQp0M0Qd+t0fz1U9toee+/Q8zzmci/crOXnu+7qv57m/V8/pp9e57ue5m6pCktSv56x0AZKkyTLoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ1bs9IFAJx66qm1adOmlS5DklaVO+6441tVtW6hfs+IoN+0aRPT09MrXYYkrSpJ/mtIP5duJKlzBr0kdc6gl6TOGfSS1DmDXpI6Nyjok6xNcn2Srye5P8nrkpyc5KYkD7THk1rfJLkyyb4kdyc5a7JDkCQdy9AZ/YeBL1bVzwOvBu4HLgP2VtVmYG/bB3gbsLl97QSuGmvFkqRFWTDok/wM8AbgaoCq+n5VPQFsA3a3bruB89v2NuCaGrkNWJvkjLFXLkkaZMiM/qXALPA3Se5M8rEkLwBOr6pHANrjaa3/emD/nOfPtDZJ0goY8snYNcBZwHuq6vYkH+bHyzTzyTxtR/0P5El2Mlra4cwzzxxQxvw2XfaF437uUj10+dtX7NySNNSQGf0MMFNVt7f96xkF/6OHl2Ta48E5/TfOef4G4MCRL1pVu6pqqqqm1q1b8FYNkqTjtGDQV9V/A/uT/Fxr2grcB+wBtre27cANbXsPcHF7980W4NDhJR5J0vIbelOz9wCfSnIi8CBwCaN/JK5LsgN4GLiw9b0ROBfYBzzZ+kqSVsigoK+qu4CpeQ5tnadvAZcusS5J0pj4yVhJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdGxT0SR5K8rUkdyWZbm0nJ7kpyQPt8aTWniRXJtmX5O4kZ01yAJKkY1vMjP5NVfWaqppq+5cBe6tqM7C37QO8DdjcvnYCV42rWEnS4i1l6WYbsLtt7wbOn9N+TY3cBqxNcsYSziNJWoKhQV/APyW5I8nO1nZ6VT0C0B5Pa+3rgf1znjvT2n5Ckp1JppNMz87OHl/1kqQFrRnY7/VVdSDJacBNSb5+jL6Zp62OaqjaBewCmJqaOuq4JGk8Bs3oq+pAezwIfA44G3j08JJMezzYus8AG+c8fQNwYFwFS5IWZ8GgT/KCJC86vA28FbgH2ANsb922Aze07T3Axe3dN1uAQ4eXeCRJy2/I0s3pwOeSHO7/6ar6YpKvANcl2QE8DFzY+t8InAvsA54ELhl71ZKkwRYM+qp6EHj1PO3fBrbO017ApWOpTpK0ZH4yVpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Nzjok5yQ5M4kn2/7L0lye5IHklyb5MTW/ry2v68d3zSZ0iVJQyxmRv9e4P45+x8ErqiqzcDjwI7WvgN4vKpeBlzR+kmSVsigoE+yAXg78LG2H+DNwPWty27g/La9re3Tjm9t/SVJK2DojP5DwO8AP2z7pwBPVNVTbX8GWN+21wP7AdrxQ62/JGkFLBj0SX4JOFhVd8xtnqdrDTg293V3JplOMj07OzuoWEnS4g2Z0b8eOC/JQ8BnGS3ZfAhYm2RN67MBONC2Z4CNAO34i4HHjnzRqtpVVVNVNbVu3bolDUKS9PQWDPqq+r2q2lBVm4CLgJur6leAW4ALWrftwA1te0/bpx2/uaqOmtFLkpbHUt5H/7vA+5LsY7QGf3Vrvxo4pbW/D7hsaSVKkpZizcJdfqyqbgVubdsPAmfP0+e7wIVjqE2SNAZ+MlaSOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnFgz6JM9P8u9Jvprk3iR/1NpfkuT2JA8kuTbJia39eW1/Xzu+abJDkCQdy5AZ/feAN1fVq4HXAOck2QJ8ELiiqjYDjwM7Wv8dwONV9TLgitZPkrRCFgz6GvlO231u+yrgzcD1rX03cH7b3tb2ace3JsnYKpYkLcqgNfokJyS5CzgI3AR8E3iiqp5qXWaA9W17PbAfoB0/BJwyz2vuTDKdZHp2dnZpo5AkPa1BQV9VP6iq1wAbgLOBV8zXrT3ON3uvoxqqdlXVVFVNrVu3bmi9kqRFWtS7bqrqCeBWYAuwNsmadmgDcKBtzwAbAdrxFwOPjaNYSdLiDXnXzboka9v2TwFvAe4HbgEuaN22Aze07T1tn3b85qo6akYvSVoeaxbuwhnA7iQnMPqH4bqq+nyS+4DPJvlT4E7g6tb/auCTSfYxmslfNIG6JUkDLRj0VXU38Np52h9ktF5/ZPt3gQvHUp0kacn8ZKwkdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdW7BoE+yMcktSe5Pcm+S97b2k5PclOSB9nhSa0+SK5PsS3J3krMmPQhJ0tMbMqN/Cnh/Vb0C2AJcmuSVwGXA3qraDOxt+wBvAza3r53AVWOvWpI02IJBX1WPVNV/tO3/Be4H1gPbgN2t227g/La9DbimRm4D1iY5Y+yVS5IGWdQafZJNwGuB24HTq+oRGP1jAJzWuq0H9s952kxrkyStgMFBn+SFwN8Bv1lV/3OsrvO01TyvtzPJdJLp2dnZoWVIkhZpUNAneS6jkP9UVf19a3708JJMezzY2meAjXOevgE4cORrVtWuqpqqqql169Ydb/2SpAUMeddNgKuB+6vqL+cc2gNsb9vbgRvmtF/c3n2zBTh0eIlHkrT81gzo83rgV4GvJbmrtf0+cDlwXZIdwMPAhe3YjcC5wD7gSeCSsVYsSVqUBYO+qv6F+dfdAbbO07+AS5dYlyRpTPxkrCR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4tGPRJPp7kYJJ75rSdnOSmJA+0x5Nae5JcmWRfkruTnDXJ4iVJCxsyo/8EcM4RbZcBe6tqM7C37QO8DdjcvnYCV42nTEnS8Vow6Kvqy8BjRzRvA3a37d3A+XPar6mR24C1Sc4YV7GSpMU73jX606vqEYD2eFprXw/sn9NvprUdJcnOJNNJpmdnZ4+zDEnSQsZ9MTbztNV8HatqV1VNVdXUunXrxlyGJOmw4w36Rw8vybTHg619Btg4p98G4MDxlydJWqrjDfo9wPa2vR24YU77xe3dN1uAQ4eXeCRJK2PNQh2SfAZ4I3BqkhngA8DlwHVJdgAPAxe27jcC5wL7gCeBSyZQsyRpERYM+qp6x9Mc2jpP3wIuXWpRkqTxWTDo9fQ2XfaFFTnvQ5e/fUXOK2l18hYIktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjrnLRC0KN72QVp9nNFLUuec0a9CKzWrlrQ6GfRaFVwyko6fSzeS1Dln9NIxrOQymb9NaFyc0UtS5wx6SeqcSzfSM5QXoDUuzuglqXPO6CX9BC9A98cZvSR1zhm9pGcMr0tMhjN6SercRII+yTlJvpFkX5LLJnEOSdIwY1+6SXIC8BHgF4EZ4CtJ9lTVfeM+lySNQ+8XoCcxoz8b2FdVD1bV94HPAtsmcB5J0gCTCPr1wP45+zOtTZK0AibxrpvM01ZHdUp2Ajvb7neSfOM4z3cq8K3jfO5q5ZifHRzzs0A+uKQx/+yQTpMI+hlg45z9DcCBIztV1S5g11JPlmS6qqaW+jqriWN+dnDMzw7LMeZJLN18Bdic5CVJTgQuAvZM4DySpAHGPqOvqqeS/AbwJeAE4ONVde+4zyNJGmYin4ytqhuBGyfx2vNY8vLPKuSYnx0c87PDxMecqqOuk0qSOuItECSpc6sm6Be6rUKS5yW5th2/Pcmm5a9yvAaM+X1J7ktyd5K9SQa91eqZbOjtM5JckKSSrPp3aAwZc5Jfbt/re5N8erlrHLcBP9tnJrklyZ3t5/vclahzXJJ8PMnBJPc8zfEkubL9edyd5KyxFlBVz/gvRhd1vwm8FDgR+CrwyiP6/Drw0bZ9EXDtSte9DGN+E/DTbfvdz4Yxt34vAr4M3AZMrXTdy/B93gzcCZzU9k9b6bqXYcy7gHe37VcCD6103Usc8xuAs4B7nub4ucA/Mvoc0hbg9nGef7XM6IfcVmEbsLttXw9sTTLfh7dWiwXHXFW3VNWTbfc2Rp9ZWM2G3j7jT4A/A767nMVNyJAx/xrwkap6HKCqDi5zjeM2ZMwF/EzbfjHzfBZnNamqLwOPHaPLNuCaGrkNWJvkjHGdf7UE/ZDbKvyoT1U9BRwCTlmW6iZjsbeS2MFoRrCaLTjmJK8FNlbV55ezsAka8n1+OfDyJP+a5LYk5yxbdZMxZMx/CLwzyQyjd/C9Z3lKWzETvXXMavmPR4bcVmHQrRdWkcHjSfJOYAr4hYlWNHnHHHOS5wBXAO9aroKWwZDv8xpGyzdvZPRb2z8neVVVPTHh2iZlyJjfAXyiqv4iyeuAT7Yx/3Dy5a2IiebXapnRD7mtwo/6JFnD6Ne9Y/2q9Ew36FYSSd4C/AFwXlV9b5lqm5SFxvwi4FXArUkeYrSWuWeVX5Ad+rN9Q1X9X1X9J/ANRsG/Wg0Z8w7gOoCq+jfg+Yzug9OrQX/fj9dqCfoht1XYA2xv2xcAN1e7yrFKLTjmtozxV4xCfrWv28ICY66qQ1V1alVtqqpNjK5LnFdV0ytT7lgM+dn+B0YX3klyKqOlnAeXtcrxGjLmh4GtAElewSjoZ5e1yuW1B7i4vftmC3Coqh4Z14uviqWbeprbKiT5Y2C6qvYAVzP69W4fo5n8RStX8dINHPOfAy8E/rZdd364qs5bsaKXaOCYuzJwzF8C3prkPuAHwG9X1bdXruqlGTjm9wN/neS3GC1hvGs1T9ySfIbR0tup7brDB4DnAlTVRxldhzgX2Ac8CVwy1vOv4j87SdIAq2XpRpJ0nAx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI69/8clUc0lbWQwAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#binning roi_txps into 10 bins based on uniqueness\n",
"rois,_,_ = plt.hist(unq_txps.loc[list(roi_txps)]['ratio'])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MannwhitneyuResult(statistic=35.0, pvalue=0.13651816987559418)\n",
"WilcoxonResult(statistic=13.0, pvalue=0.13941397332153205)\n",
"KruskalResult(statistic=1.2857142857142776, pvalue=0.25683925795785484)\n"
]
}
],
"source": [
"from scipy.stats import mannwhitneyu\n",
"data1, data2 = rois, null\n",
"print(stats.mannwhitneyu(data1, data2))\n",
"print(stats.wilcoxon(data1, data2))\n",
"print(stats.kruskal(data1, data2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment