Skip to content

Instantly share code, notes, and snippets.

@inodb
Last active May 7, 2018 20:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save inodb/26e77fb017d6b7b639a0d17d58b79728 to your computer and use it in GitHub Desktop.
Save inodb/26e77fb017d6b7b639a0d17d58b79728 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Check range mrna v2 continous in pancan\n",
"\n",
"Data file generated on cgds_gdac db pipelines with:\n",
"```\n",
"echo \"select * from genetic_alteration where genetic_profile_id in (select genetic_profile_id from genetic_profile where stable_id like '%pan_can%%v2%mrna%' and datatype='CONTINUOUS')\" | mysql -h localhost -u username -ppassword database_name > genetic_alteration_pancan.tsv\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"first_1k = pd.read_csv(\"1000_values.txt\",header=None)[0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"all_values = pd.read_csv(\"all_values.txt\",header=None)[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"780000"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(first_1k)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"206143299"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(all_values)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-0.99121057347755-7092440.0 (mean: 1004.0774317541006, min >0 : 4.823772759010581e-07)\n"
]
}
],
"source": [
"def print_stats(x):\n",
" print(\"{} - {} (mean: {}, min >0 : {})\".format(\n",
" x.min(),\n",
" x.max(),\n",
" x.mean(),\n",
" x[x>0].min()\n",
" ))\n",
"print_stats(all_values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.hist(all_values[~pd.isnull(all_values)], log=True)\n",
"plt.title('All values mrna v2 TCGA pancan values')\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of values below 0: 1190144 (0.58%) of 206143299 total\n"
]
}
],
"source": [
"print(\"Number of values below 0: {} ({:.2f}%) of {} total\".format(\n",
" len(all_values[all_values < 0]),\n",
" len(all_values[all_values < 0]) * 100.0 / len(all_values),\n",
" len(all_values)\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5,1,'Distribution values < 1 (17.17% of total)')"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGHpJREFUeJzt3X20XVV57/Hvz8RA5R1JbYCEhHtiNFpr7SliWwsqQxIkxBfqTYoY2ghCL/WOWofEYutLeyv2dpSOXrEaFeMLBBG1JhBLsYihowEJvaIJaSBEkGOAhIZEUAQCT/+Y88jKyX5/Oftk+vuMccbZe6615nrO3Gs/e+1nzX22IgIzMyvXcwYdgJmZ9ZcTvZlZ4ZzozcwK50RvZlY4J3ozs8I50ZuZFc6Jvs8kfULSn/eorxmSHpM0Kd+/SdI7etF37u8bkpb0qr9ekBSShgYdRz9IWinpjYOOY7xI+iVJqyXtlvTlAez/ZEkjlfvfkfSS8Y5jEJzouyDpXkmPS3pU0i5J/y7pfEk/H9eIOD8i/rLFvk5ptE5E/DAiDo6Ip3sQ+wclfXFM//Mj4nPd9v2LSNJLJV0v6WFJTT+cIullwK8BX8/3p0laJWlbfnGbOWb9jflFfvRnj6TVdfruuC9Jh+W/Y5ekK0ZPKvKyT0l6U3sjs5czgRcAz4+I36sR9z7HZCPtrl/D3wIf7mL7/YYTffcWRMQhwHHAJcBFwGd6vRNJk3vdp7Unn5EeUmfxU8DVwNIWu3sncEU8+4nFZ4B/Bt5Sa+WIeEl+kT8YOAT4IVDvrLibvt4J/H9SQp4JvAlA0quAaRHxtRb/vlqOA+6KiD1d9NFLq4DXSJo26ED6LiL80+EPcC9wypi2E0hPtJfm+yuAv8q3jwKuBXYBO4GbSS+2X8jbPA48BryX9CQLUuL4IbC20jY593cT8BHgO8Bu0tnhkXnZycBIrXiBecCTpOT0GHBHpb935NvPAd4P3AdsBz4PHJaXjcaxJMf2MHBxnTE6EXgQmFRpexPwvcp4rctj8gDwMWBKZd0AhsbGl++fA/xb5f6LgBvy2G4G3lpZdhpwJ/Ao8CPgPW08zicCnwQeAV7VZN2h9LRq2udW4HdqtE/Of/PMBtuelB+3g5rso+2+gH8ETs23L8nH4iTgFuB/tPB3vTg/TruAjcAZuf1DY465pWO2q3dMHk1KyDuBLcC5Tdb/A2BTfpy3Au+s7ONk9n1O3AAsGa+cMagfn9H3WER8BxgBXl1j8Z/mZVNJZ0x/ljaJs0kJc0GkM62/qWxzEunJc2qdXb4d+EPSE2IP8A8txPjPwF8DX8r7+7Uaq52Tf14DHA8cTErCVb8DzAFeB/yFpBfX2NctwE+A11aafx+4Mt9+GvgT0ovgq3Jff9TsbxhL0kGkJ+2VwC8Di4GPV2qwnyE96Q8BXgrc2KS/aZLeK2kTcAWwDXhFRKxrN7Y6sc4ivRh1YglwTUT8pNtYavS1AThF0i+RjuGNwLuAb0TEPY06kvRcYDXwL6TH4I+BKyTNiYgPsPcxt9e73gbH5ErSc+ZoUunnryW9rsH624HTgUNJSf9SSa9oEPYmUgmtaE70/bENOLJG+1PANOC4iHgqIm6OfFrRwAcj4icR8Xid5V+IiA35ifrnwFurddUunAX8XURsjYjHgPcBi8aUkD4UEY9HxB3AHdR/wqwkJV5y6eO03EZE3B4Rt0TEnoi4l3TmfFIH8Z4O3BsRn819/QfwFVJygDT2cyUdGhGP5OX7yBe8ryWd/b+IVMoYiogPRcQPOoirlsPz70fb3VDS80h/04pug6jT12eAw4BbSe847wDOBv5e0j9KWivpr+p0eSLphOCSiHgyIm4kvYNd3GF800knExdFxM8i4rvAp3M8NUXEdRFxTyTfJr3o1DrpGvUozz4exXKi749jSG81x/q/pLef/yJpq6RlLfR1fxvL7wOeSzo77tbRub9q35NJ70RGPVi5/VPSk7yWK4E3SzoAeDPwHxFxH4CkF0q6VtKDkn5MOkvrJP7jgFfmi4i7JO0ivVj9Sl7+FtILzH2Svp1rzrUcBLyEdBZ5B7CphRfjdu3Kv+vV+xt5M+nY+nYP4tinr5xQz4uIl0XEMuBS0jvPs0glnJNI4zyvRn9HA/dHxDOVtvtIz4dOHA3sjIjqC2LD/iTNl3SLpJ35GDiNxsfTITz7eBTLib7HJP0m6UD8t7HLIuLRiPjTiDgeWAC8W9LrRhfX6bJZkpleuT2DdOb6MKlc8rxKXJNIJaNW+91GSp7VvvcADzXZbh8RcSfpCTqfvcs2kGrC/wnMjohDSUlFdbra62/i2SQO6QXv2xFxeOXn4Ii4IMdwW0QsJJUU/ol04bRWrJtIpap3Ab8B3J1nsJyZX6i6lt993QO8sIPNlwCf79GLT8O+cjJXLpP8KrA+r7seeFmNTbYB06uzzkjHzY9ajGdsHNuAI8dcAK/2t9f6+fH5Cmk2zQsi4nBgDfWPJ0hl0TtajG+/5UTfI5IOlXQ6cBXwxYj4fo11Tpc0JEnAj0n16dGpkg+REky73iZpbn4b/mFSvfVp4C7gQElvyLXT9wPVRPUQMHPMk7JqJfAnkmZJOphn66Gdzpi4kpQ8f5e9Z4scQhqLxyS9CLigQR/fJb0zeJ7S3PrqDJdrgRdKOlvSc/PPb0p6saQpks6SdFhEPMWzY19Tftv/rYh4O+mF9OvA/wYeyNMi96HkQGBKvn9gkxeGNYwpUeXtR7c5IN+vLj+WdM2k6RTYbvvK619Cun4C8APgZElTgN8mXegc61bSi/F78/ifTDqhuapZvNlex2RE3A/8O/CRPJ4vIz3mV9RanzT2BwA7gD2S5gOvr7ez/Pj8BunaTtGc6Lu3WtKjpDPKi4G/I10EqmU28E3SLIF1wMcj4qa87CPA+3PZ4T1t7P8LpBrrg8CBpGRKROwmXdT8NOkM6CekcsSo0WT7X5Jq1asvz32vJT3Jf0a6uNaplaRZDzdGxMOV9veQzvIfBT4FfKlBH5eSZlo8REpQo0948tv71wOLSGeCDwIf5dlkdzZwby4PnQ+8rZWg87uwz0TEq0kzhLbXWfU40qypjfn+4zS+2LocOCu/6I8anXUF6V3O2OsyZwPral0UVZoPX61Fd9xX9mek6Z+jpcFPkkogO0jH0T7TLCPiSeAM0ju3h4GPA2+PiP+ss4+xah2Ti0mzvLblfX4gIm6otX4+Bt5Ferf2COm4WtVgf2cAN0XEthbj22+p9+VHM2uFpCuBqyPinwYdyy8iSbeSpnluGHQs/eZEb2ZWOJduzMwK50RvZlY4J3ozs8JNiH+UddRRR8XMmTMHHYaZ2X7l9ttvfzgipjZbb6CJXtICYMHQ0BDr168fZChmZvsdSfc1X2vApZuIWB0R5x122GGDDMPMrGiu0ZuZFc6J3syscE70ZmaFc6I3MyvcQBO9pAWSlu/evXuQYZiZFc2zbszMCufSjZlZ4SbEJ2PNmpm57LqB7PfeS94wkP2a9ZLP6M3MCudEb2ZWOCd6M7PCOdGbmRXOid7MrHD+wJSZWeH8gSkzs8K5dGNmVjgnejOzwjnRm5kVzonezKxwTvRmZoVzojczK5wTvZlZ4ZzozcwK50RvZlY4J3ozs8L1/BumJL0aOCv3PTcifqvX+zAzs9a1dEYv6XJJ2yVtGNM+T9JmSVskLQOIiJsj4nzgWuBzvQ/ZzMza0WrpZgUwr9ogaRJwGTAfmAssljS3ssrvAyt7EKOZmXWhpUQfEWuBnWOaTwC2RMTWiHgSuApYCCBpBrA7In5cr09J50laL2n9jh07OovezMya6uZi7DHA/ZX7I7kNYCnw2UYbR8TyiBiOiOGpU6d2EYaZmTXSzcVY1WgLgIj4QEsdSAuABUNDQ12EYWZmjXRzRj8CTK/cPxbY1k4H/uIRM7P+6ybR3wbMljRL0hRgEbCqN2GZmVmvtDq9ciWwDpgjaUTS0ojYA1wIXA9sAq6OiI3t7NzfGWtm1n8t1egjYnGd9jXAmk53HhGrgdXDw8PndtqHmZk15n+BYGZWuIEmepduzMz6b6CJ3rNuzMz6z6UbM7PCuXRjZlY4l27MzArn0o2ZWeGc6M3MCucavZlZ4VyjNzMrnEs3ZmaFc6I3MyucE72ZWeF8MdbMrHC+GGtmVjiXbszMCudEb2ZWOCd6M7PCOdGbmRXOs27MzArnWTdmZoWbPOgAzGximbnsuoHt+95L3jCwfZfMNXozs8I50ZuZFc6J3syscK7Rm01Qg6yVW1l8Rm9mVjgnejOzwvkDU2ZmhfMHpszMCufSjZlZ4TzrxswmjEHNNCr9E7k+ozczK5zP6M3sF17p/9/HZ/RmZoVzojczK5wTvZlZ4ZzozcwK50RvZlY4J3ozs8L1fHqlpOcAfwkcCqyPiM/1eh9mZta6ls7oJV0uabukDWPa50naLGmLpGW5eSFwDPAUMNLbcM3MrF2tlm5WAPOqDZImAZcB84G5wGJJc4E5wLqIeDdwQe9CNTOzTrSU6CNiLbBzTPMJwJaI2BoRTwJXkc7mR4BH8jpP1+tT0nmS1ktav2PHjvYjNzOzlnRzMfYY4P7K/ZHc9lXgVEn/D1hbb+OIWB4RwxExPHXq1C7CMDOzRrq5GKsabRERPwWWttSBtABYMDQ01EUYZmbWSDdn9CPA9Mr9Y4Ft7XTgLx4xM+u/bhL9bcBsSbMkTQEWAat6E5aZmfVKq9MrVwLrgDmSRiQtjYg9wIXA9cAm4OqI2NjOzv2dsWZm/ddSjT4iFtdpXwOs6XTnEbEaWD08PHxup32YmVlj/hcIZmaFG2iid+nGzKz/BproPevGzKz/XLoxMyvcQL8c3B+YsolukF8abdYrLt2YmRXOpRszs8I50ZuZFc7TK83MCucavZlZ4Vy6MTMrnBO9mVnhBjqP3vY/nldutv/xxVgzs8L5YqyZWeFcozczK5wTvZlZ4ZzozcwK50RvZlY4z7oxMyucZ92YmRXOpRszs8I50ZuZFc6J3syscE70ZmaFc6I3MyucE72ZWeGc6M3MCucPTJmZFc4fmDIzK5xLN2ZmhXOiNzMrnBO9mVnhnOjNzArnRG9mVjgnejOzwjnRm5kVbvKgA7D2zVx23aBDMLP9iM/ozcwK1/NEL+lkSTdL+oSkk3vdv5mZtaelRC/pcknbJW0Y0z5P0mZJWyQty80BPAYcCIz0NlwzM2tXq2f0K4B51QZJk4DLgPnAXGCxpLnAzRExH7gI+FDvQjUzs060lOgjYi2wc0zzCcCWiNgaEU8CVwELI+KZvPwR4ICeRWpmZh3pZtbNMcD9lfsjwCslvRk4FTgc+Fi9jSWdB5wHMGPGjC7CMDOzRrpJ9KrRFhHxVeCrzTaOiOXAcoDh4eHoIg4zM2ugm1k3I8D0yv1jgW3tdOAvHjEz679uEv1twGxJsyRNARYBq9rpwF88YmbWf61Or1wJrAPmSBqRtDQi9gAXAtcDm4CrI2Jj/0I1M7NOtFSjj4jFddrXAGs63bmkBcCCoaGhTrswM7Mm/J2xZmaFG2ii98VYM7P+8xm9mVnh/N8rzcwK50RvZlY41+jNzArnGr2ZWeFcujEzK5wTvZlZ4VyjNzMrnGv0ZmaFc+nGzKxwTvRmZoVzojczK5wvxpqZFc4XY83MCufSjZlZ4ZzozcwK50RvZlY4J3ozs8J51o2ZWeE868bMrHCTBx3A/mzmsusGHYKZWVOu0ZuZFc6J3syscE70ZmaFc6I3MyucE72ZWeGc6M3MCucPTJmZFc4fmDIzK5xLN2ZmhXOiNzMrnBO9mVnhnOjNzArnRG9mVjgnejOzwjnRm5kVzonezKxwTvRmZoXrS6KXdJCk2yWd3o/+zcysdS0lekmXS9ouacOY9nmSNkvaImlZZdFFwNW9DNTMzDrT6hn9CmBetUHSJOAyYD4wF1gsaa6kU4A7gYd6GKeZmXWopS8Hj4i1kmaOaT4B2BIRWwEkXQUsBA4GDiIl/8clrYmIZ3oWsZmZtaWlRF/HMcD9lfsjwCsj4kIASecAD9dL8pLOA84DmDFjRhdhmJlZI91cjFWNtvj5jYgVEXFtvY0jYnlEDEfE8NSpU7sIw8zMGukm0Y8A0yv3jwW2tdOBv3jEzKz/ukn0twGzJc2SNAVYBKxqpwN/8YiZWf+1Or1yJbAOmCNpRNLSiNgDXAhcD2wCro6Ije3s3Gf0Zmb91+qsm8V12tcAazrdeUSsBlYPDw+f22kfZmbWmP8FgplZ4Qaa6F26MTPrv4Emel+MNTPrP5duzMwK50RvZlY41+jNzArnGr2ZWeFcujEzK5wTvZlZ4VyjNzMrnGv0ZmaFc+nGzKxwTvRmZoVzjd7MrHCu0ZuZFc6lGzOzwjnRm5kVzonezKxwTvRmZoVzojczK5ynV5qZFW7yIHceEauB1cPDw+d22sfMZdf1MCIzs/K4dGNmVjgnejOzwjnRm5kVzonezKxwTvRmZoVzojczK5zn0ZuZFc7/ptjMrHAu3ZiZFU4RMegYkLQDuK+DTY8CHu5xOL3guNo3UWNzXO1xXO3rJrbjImJqs5UmRKLvlKT1ETE86DjGclztm6ixOa72OK72jUdsLt2YmRXOid7MrHD7e6JfPugA6nBc7ZuosTmu9jiu9vU9tv26Rm9mZs3t72f0ZmbWhBO9mVnhJnyil/R7kjZKekZS3SlIkuZJ2ixpi6RllfZZkm6VdLekL0ma0qO4jpR0Q+73BklH1FjnNZK+W/n5maQ35mUrJP2gsuzl4xVXXu/pyr5XVdoHOV4vl7QuP97fk/Q/K8t6Ol71jpfK8gPy378lj8fMyrL35fbNkk7tJo4O4nq3pDvz+PyrpOMqy2o+puMY2zmSdlRieEdl2ZL82N8tack4x3VpJaa7JO2qLOvbmEm6XNJ2SRvqLJekf8hxf0/SKyrLejteETGhf4AXA3OAm4DhOutMAu4BjgemAHcAc/Oyq4FF+fYngAt6FNffAMvy7WXAR5usfySwE3hevr8COLMP49VSXMBjddoHNl7AC4HZ+fbRwAPA4b0er0bHS2WdPwI+kW8vAr6Ub8/N6x8AzMr9TBrHuF5TOYYuGI2r0WM6jrGdA3ysxrZHAlvz7yPy7SPGK64x6/8xcPk4jdnvAq8ANtRZfhrwDUDAicCt/RqvCX9GHxGbImJzk9VOALZExNaIeBK4ClgoScBrgWvyep8D3tij0Bbm/lrt90zgGxHx0x7tv5524/q5QY9XRNwVEXfn29uA7UDTT/11oObx0iDea4DX5fFZCFwVEU9ExA+ALbm/cYkrIr5VOYZuAY7t0b67jq2BU4EbImJnRDwC3ADMG1Bci4GVPdp3QxGxlnRyV89C4POR3AIcLmkafRivCZ/oW3QMcH/l/khuez6wKyL2jGnvhRdExAMA+fcvN1l/EfseYP8nv2W7VNIB4xzXgZLWS7pltJzEBBovSSeQztDuqTT3arzqHS8118njsZs0Pq1s28+4qpaSzghH1XpMe6XV2N6SH6NrJE1vc9t+xkUuc80Cbqw093PMmqkXe8/Ha3I3G/eKpG8Cv1Jj0cUR8fVWuqjRFg3au46r1T5yP9OAXwWurzS/D3iQlMyWAxcBHx7HuGZExDZJxwM3Svo+8OMa6w1qvL4ALImIZ3Jzx+NVaxc12sb+nX05pppouW9JbwOGgZMqzfs8phFxT63t+xTbamBlRDwh6XzSO6LXtrhtP+MatQi4JiKerrT1c8yaGbdjbEIk+og4pcsuRoDplfvHAttI/yjocEmT81nZaHvXcUl6SNK0iHggJ6btDbp6K/C1iHiq0vcD+eYTkj4LvGc848qlESJiq6SbgF8HvsKAx0vSocB1wPvz29nRvjserxrqHS+11hmRNBk4jPQ2vJVt+xkXkk4hvXieFBFPjLbXeUx7lbSaxhYR/1W5+yngo5VtTx6z7U3jFVfFIuB/VRv6PGbN1Iu95+NVSunmNmC20oyRKaQHdFWkKxvfItXHAZYArbxDaMWq3F8r/e5TF8zJbrQu/kag5pX5fsQl6YjR0oeko4DfBu4c9Hjlx+5rpLrll8cs6+V41TxeGsR7JnBjHp9VwCKlWTmzgNnAd7qIpa24JP068EngjIjYXmmv+Zj2KK5WY5tWuXsGsCnfvh54fY7xCOD17P3utq9x5djmkC5srqu09XvMmlkFvD3PvjkR2J1PaHo/Xv264tyrH+BNpFe4J4CHgOtz+9HAmsp6pwF3kV6NL660H096Im4Bvgwc0KO4ng/8K3B3/n1kbh8GPl1ZbybwI+A5Y7a/Efg+KWF9ETh4vOICfivv+478e+lEGC/gbcBTwHcrPy/vx3jVOl5IpaAz8u0D89+/JY/H8ZVtL87bbQbm9/h4bxbXN/PzYHR8VjV7TMcxto8AG3MM3wJeVNn2D/NYbgH+YDzjyvc/CFwyZru+jhnp5O6BfEyPkK6pnA+cn5cLuCzH/X0qswp7PV7+FwhmZoUrpXRjZmZ1ONGbmRXOid7MrHBO9GZmhXOiNzMrnBO9mVnhnOjNzAr33/k62j0lMBm2AAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1c481b5048>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(all_values[(all_values < 1)], log=True)\n",
"plt.title(\"Distribution values < 1 ({:.2f}% of total)\".format(len(all_values[(all_values < 1)]) * 100.0 / len(all_values)))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of values that are 0: 25329688 (12.29%)\n"
]
}
],
"source": [
"print(\"Number of values that are 0: {} ({:.2f}%)\".format(\n",
" len(all_values[all_values == 0]),\n",
" len(all_values[all_values == 0]) * 100.0 / len(all_values)\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of profiles:\n"
]
}
],
"source": [
"print(\"Number of profiles:\")"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 33\r\n"
]
}
],
"source": [
"!cut -f1 genetic_alteration_pancan.tsv | tail -n+2 | cut -f1 | sort -u | wc -l"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of profiles with negative values:\n"
]
}
],
"source": [
"print(\"Number of profiles with negative values:\")"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 8\r\n"
]
}
],
"source": [
"!grep '-' genetic_alteration_pancan.tsv | cut -f1 | sort -u | wc -l"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEMCAYAAAA/Jfb8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGedJREFUeJzt3Xu8XGV97/HPl3DzwjXZgpBgQIM1RQTJAWuLxgP1hMsh51AviYjFIlEr9nBEKxx9YYrHA+rp6fECYkSaFoUI1EvEKLSvSrFoKkHBElI4MUSzDcgGAohUIfg7fzzPxpXJXNbee/aemSff9+s1rz1rrWfW+s2ame8886w1sxURmJlZWXbqdQFmZtZ9DnczswI53M3MCuRwNzMrkMPdzKxADnczswI53K2rJJ0m6cZe1zFeks6Q9M9TtK2B3lf9StJ8ScOV6bWS5vewpJ5wuA8oSc+TdLWkzZIelXSLpGOmuIbZkkLSzqPzIuKLEfHaqayjVyQtlfSF8d5+R9pXYyFpo6Tju7W+iPjdiLipW+sbFA73LqqG3BR4LnArcBSwL/A3wDckPXcKazCzfhURvlQuwEbgfcCPgF8Cnwf2A74J/AL4B2Cf3HY2EMCZwE+Bmyvz/jjPexD4QGX9RwPfAx4B7gM+DezaopZvAWc3zLsDOLVF+8eAo1osmwb8D+DH+X7cBszKy15JeqN4NP99ZeV2NwEfBm7Jt7sRmJGX/TTf18fz5feAM4B/rtw+gHcA/w/YAlwCKC9bCnyh0nZ03+2cpw8AVgIPA+uBsyptlwP/szI9HxiuTL8f+Fmu+W7guBb7ZXrexmPA9/N9rdb/CWBTXn4bcGyevwB4Engq3/c78vy3AuvydjcAb2/zXKu9r5rcdilwHfClvK0fAC+rLD+v8ljfBfzXxu0C/ztv517ghMryfYG/Bjbn5V/N8/cBrgdG8vzrgZl1nitN6p+Rb/9Ifny/Q+psXgn8Bvj3vF//vPGxrbxOj8/Xn5WfD1vyfX1fw3Oh2nanyr55CLgG2Dcv2x34Qp7/COm1sF+vM2ncWdbrAvrtkp8Iq0mBfiDwQH7hHAnsBvwj8KHcdnZ+Qf4t8Jz8JBud97k8/TLg18BL8m2OAl4B7JzbrgPOaVHLW4BbKtNz85NutyZtjwB+BezVYl3vA/4VeDGgXNf0/ELeApyea1qcp6fn292UXwiH5vtzE3Bxw/3fubKdM9g+sK4H9gYOIgXDgrxsKe3D/Z+AS/OL7oh82+PysuW0CPd8HzcBB1TW+8IW+2VFfoE/BziM9IZQrf/NeT/tDJwL3A/s3qz+PO8k4IV5H78aeAJ4eYtt195XTW67lPTG8jpgF+C9pJDeJS9/PenNcSfgjaSOyvMr230KOIv0pv9OUpCPvul+g/SmsU9e96vz/OnAHwHPBvYAriUHf6fnSpP6LwIuy+vfBTi2sv2N5DBufGwbXqejgX0x6c1hX2AWcCetw/0c0ut7Jun1/Fng6rzs7cDX8/2bRnqt7tnrTBp3lvV043AFKTzvrNH2r4Db8+Ue4JFJqmkjcFpl+u+Az1Sm381vezKz8wvykMry0XnVHs33gUUttncO8JUWy/bIL8oX5OmPAFc0abcnKbjPb3O/7gYWNpl/OvD9hnnfA87I128CPlhZ9qfAtxrua6dw/4PK9DXAefn6UlqEe36RPg3sUVl+EbA8X19O63B/UX5eHU8Ouxb7ZBop5H6nMu9/Vetvcpst5B5yY/0t2n8V+G8tltXeV01uuxRYXZneifRJ8NgW7W8fffzzdtdXlj07b3t/4PmknvM+NV4rRwBbKtMtnytNbnsh8DXgRS1eg2MJ9w1U3gSBJbQO93VUPsXl+/tUfs79CfBd4PBO930QLr0ec19O+njbUUT894g4IiKOAD4FfHkS6/p55fq/N5luHNfe1GQd91euPzF6G0mHSrpe0v2SHiOFyYxmRUTEL0i9qEV51iLgi9U2kp5F6m2sjoiL2tynWaReVaMDgJ80zPsJ6VNL2/syBuO5/QHAw3kftKqrqYhYT3rTXAo8IGmFpAOaNB0ivairj982+0LSuZLW5YPWjwB70eLxyu1PkLRa0sO5/Ynt2jcxln31TN0R8RtgmLTfkPQWSbdLeiTXcVhDHfdXbvtEvvpc0vPk4YjY0uS+PVvSZyX9JD93bwb2ljRtHPV/nDTUdqOkDZLOa3M/OzmANo9hgxcAX6nsl3WkTsR+pCGhG4AV+USFj0naZQJ19VRPwz0ibiaNtz1D0gslfUvSbZK+I+l3mtx0MXD1lBRZT4yh7WeAfwPmRMSepHFwtWl/NbBY0u+RPup+e3SBpN1IPcOfkT5StrOJNFzQaDPpCV91UF5nJ2O53838ktRrHLV/Q137StqjRV3tbktEXBURf0C6bwF8tMn2R4CtpECrbgMASceSxu7fQOrJ7k06LjH6eG1z//Pj8Xeksez9cvtVtH98J+KZuiXtRBpq2CzpBaRhwbNJw2t7k4Yq6tSxibTf926y7FzSkNcx+bn7qtHNj7XwiPhFRJwbEYcA/xl4j6TjRhc3NN/msc5vJkOV5ffR4jFsYhPp+MLelcvuEfGziHgqIv4iIuaSjkOdTBoaHUi97rk3swx4d0QcRRpHvLS6MD9xDyaNfQ+iPUgH5x7Pb1zv7NB+FSmgLgS+lHto5B7FdaRPEm8Znd/G5cCHJc1Rcrik6Xn9h0p6k6SdJb2RNLZ/fY37MkL6CH9IjbbN3A68StJBkvYCzh9dEBGbSB+RL5K0u6TDSQeuv1i57YmS9pW0P6mnDoCkF0v6jzlsf0XaR083bjwiniZ9Alyae6VzSQfCR+1BCv8RYGdJF5CGwEb9HJidgxVgV9I47giwVdIJwGSe6niUpFPzWVrnkI7trCYdP4hcB5LeSuq5dxQR95FOHrhU0j6SdpE0GuJ7kPblI5L2BT403sIlnSzpRZJEej08zW8fo5+z7XPqHmB3SSfl5/0HSft51DXA+bnemaSh01YuAz6ScwRJQ5IW5uuvkfTS/ObxGGm4ZrvnzaDoq3DPp/G9ErhW0u2kgx3Pb2i2CLguvzAH0XuBN5HOJvgc6cBVSxHxa1IAHQ9cVVk02rN4LenF9ni+HNtiVf+H9CK4kfTE/TzwrIh4KK/nXNJZAn8OnBwRD3a6I/nj/EeAW/LH3Fd0uk3D7f+edP9/RDoTpfENZTFpHH4z8BXSgey/z8uuJJ05tDHfp+p+3I10kO1B0jDB80ifkJo5mzR0cD9pmPCvK8tuIAXdPaSP+r9i24//1+a/D0n6QR5C+jPSft5CepxXttwBE/c10sHS0QPip+be513AX5KOnfwceCnpDJa6TicF27+Rjl2MvnH+X9KnxwdJbyLfmkDtc0hnnj2e67w0fnsu+kXAB/Nz6r0R8Shp/P5y0ie3X5KGoEb9BenxuZf0XLiyzXY/QXpMbpT0i3w/Rr8fsj+pw/QYabjmn0hnzwyk0aPTvStAmg1cHxGHSdoTuDsiGgO92v6HwLsi4rtTVKJZ35G0lHQw8s29rsX6U1/13CPiMeBeSa8HyMMHLxtdLunFpNOzvtejEs3MBkJPw13S1aSgfrGkYUlnAqcBZ0q6A1gLLKzcZDGwInr9ccPMrM/1fFjGzMy6r6+GZczMrDsc7mZmBZrKXzHcxowZM2L27Nm92ryZ2UC67bbbHoyIoU7tehbus2fPZs2aNb3avJnZQJLU7ucVnuFhGTOzAjnczcwK5HA3MyuQw93MrEAOdzOzAjnczcwK5HA3MyuQw93MrEA9+xKT2aCbfd43ms7fePFJU1yJ2fYc7mZd5tC3fuBhGTOzAjnczcwK5HA3MyuQw93MrEAOdzOzAjnczcwK5HA3MyuQw93MrEAOdzOzAjnczcwK5HA3MytQx9+WkXQFcDLwQEQc1mT5acD78+TjwDsj4o6uVmnWQ61+K8asn9XpuS8HFrRZfi/w6og4HPgwsKwLdZmZ2QR07LlHxM2SZrdZ/t3K5Gpg5sTLMjOziej2mPuZwDdbLZS0RNIaSWtGRka6vGkzMxvVtXCX9BpSuL+/VZuIWBYR8yJi3tDQULc2bWZmDbryzzokHQ5cDpwQEQ91Y51mpWl3YNb/yMO6bcI9d0kHAV8GTo+IeyZekpmZTVSdUyGvBuYDMyQNAx8CdgGIiMuAC4DpwKWSALZGxLzJKtjMzDqrc7bM4g7L3wa8rWsVmZnZhPkbqmZmBXK4m5kVyOFuZlYgh7uZWYEc7mZmBXK4m5kVyOFuZlYgh7uZWYEc7mZmBXK4m5kVyOFuZlYgh7uZWYEc7mZmBerKP+swG3Tt/pGG2SByz93MrEAOdzOzAjnczcwK5HA3MyuQw93MrEAOdzOzAjnczcwK5HA3MytQx3CXdIWkByTd2WK5JH1S0npJP5L08u6XaWZmY1Gn574cWNBm+QnAnHxZAnxm4mWZmdlEdAz3iLgZeLhNk4XA30ayGthb0vO7VaCZmY1dN8bcDwQ2VaaH8zwzM+uRboS7msyLpg2lJZLWSFozMjLShU2bmVkz3Qj3YWBWZXomsLlZw4hYFhHzImLe0NBQFzZtZmbNdOMnf1cCZ0taARwDPBoR93VhvWY7jFY/Obzx4pOmuBIrRcdwl3Q1MB+YIWkY+BCwC0BEXAasAk4E1gNPAG+drGLNzKyejuEeEYs7LA/gXV2ryMzMJszfUDUzK5DD3cysQA53M7MCOdzNzArkcDczK5DD3cysQA53M7MCOdzNzArUjZ8fMBsYrb7mb1Ya99zNzArkcDczK5DD3cysQA53M7MCOdzNzArkcDczK5DD3cysQA53M7MCOdzNzArkcDczK5DD3cysQA53M7MCOdzNzArkcDczK1CtcJe0QNLdktZLOq/J8oMkfVvSDyX9SNKJ3S/VzMzq6hjukqYBlwAnAHOBxZLmNjT7IHBNRBwJLAIu7XahZmZWX52e+9HA+ojYEBFPAiuAhQ1tAtgzX98L2Ny9Es3MbKzq/CemA4FNlelh4JiGNkuBGyW9G3gOcHxXqjMzs3Gp03NXk3nRML0YWB4RM4ETgSslbbduSUskrZG0ZmRkZOzVmplZLXXCfRiYVZmeyfbDLmcC1wBExPeA3YEZjSuKiGURMS8i5g0NDY2vYjMz66hOuN8KzJF0sKRdSQdMVza0+SlwHICkl5DC3V1zM7Me6RjuEbEVOBu4AVhHOitmraQLJZ2Sm50LnCXpDuBq4IyIaBy6MTOzKVLngCoRsQpY1TDvgsr1u4Df725pZmY2XrXC3WzQzD7vG70uwayn/PMDZmYFcribmRXIwzJmfazV8NLGi0+a4kps0LjnbmZWIIe7mVmBHO5mZgVyuJuZFcjhbmZWIIe7mVmBHO5mZgVyuJuZFcjhbmZWIIe7mVmBHO5mZgVyuJuZFcjhbmZWIIe7mVmBHO5mZgXy77nbQPO/0zNrzj13M7MCOdzNzArkcDczK1CtcJe0QNLdktZLOq9FmzdIukvSWklXdbdMMzMbi44HVCVNAy4B/hAYBm6VtDIi7qq0mQOcD/x+RGyR9LzJKtjMzDqr03M/GlgfERsi4klgBbCwoc1ZwCURsQUgIh7obplmZjYWdcL9QGBTZXo4z6s6FDhU0i2SVkta0GxFkpZIWiNpzcjIyPgqNjOzjuqEu5rMi4bpnYE5wHxgMXC5pL23u1HEsoiYFxHzhoaGxlqrmZnVVCfch4FZlemZwOYmbb4WEU9FxL3A3aSwNzOzHqgT7rcCcyQdLGlXYBGwsqHNV4HXAEiaQRqm2dDNQs3MrL6O4R4RW4GzgRuAdcA1EbFW0oWSTsnNbgAeknQX8G3gfRHx0GQVbWZm7dX6bZmIWAWsaph3QeV6AO/JFzMz6zH/cJgNBP9AmNnY+OcHzMwK5HA3MyuQw93MrEAOdzOzAjnczcwK5HA3MyuQT4U0G0CtTg3dePFJU1yJ9Sv33M3MCuRwNzMrkIdlrG/4W6hm3eOeu5lZgRzuZmYFcribmRXI4W5mViCHu5lZgRzuZmYF8qmQNuV8yqPZ5HPP3cysQA53M7MCOdzNzArkcDczK5DD3cysQLXOlpG0APgEMA24PCIubtHudcC1wH+IiDVdq9IGks+KMeudjj13SdOAS4ATgLnAYklzm7TbA/gz4F+6XaSZmY1NnWGZo4H1EbEhIp4EVgALm7T7MPAx4FddrM/MzMahTrgfCGyqTA/nec+QdCQwKyKub7ciSUskrZG0ZmRkZMzFmplZPXXCXU3mxTMLpZ2AvwLO7bSiiFgWEfMiYt7Q0FD9Ks3MbEzqHFAdBmZVpmcCmyvTewCHATdJAtgfWCnpFB9U3TH4wKlZ/6nTc78VmCPpYEm7AouAlaMLI+LRiJgREbMjYjawGnCwm5n1UMdwj4itwNnADcA64JqIWCvpQkmnTHaBZmY2drXOc4+IVcCqhnkXtGg7f+JlWT/y8IvZ4PA3VM3MCuRwNzMrkP9Zh23DQy9mZXC4mxWk1ZvzxotPmuJKrNc8LGNmViD33HdQHn4xK5t77mZmBXLPvXDuoZvtmNxzNzMrkMPdzKxAHpYphIdfzKzKPXczswK55z5g3EM3szrcczczK5DD3cysQB6W6UMeejGziXLP3cysQO6595B76GY2WdxzNzMrkHvuU8A9dDObau65m5kVyD33FvwfbcxskDnczXYA7YYG3WEpU61wl7QA+AQwDbg8Ii5uWP4e4G3AVmAE+JOI+EmXa+17Hls3s37Rccxd0jTgEuAEYC6wWNLchmY/BOZFxOHAdcDHul2omZnVV+eA6tHA+ojYEBFPAiuAhdUGEfHtiHgiT64GZna3TDMzG4s6wzIHApsq08PAMW3anwl8s9kCSUuAJQAHHXRQzRL7i4dezGwQ1Om5q8m8aNpQejMwD/h4s+URsSwi5kXEvKGhofpVmpnZmNTpuQ8DsyrTM4HNjY0kHQ98AHh1RPy6O+WZmdl41Om53wrMkXSwpF2BRcDKagNJRwKfBU6JiAe6X6aZmY1Fx3CPiK3A2cANwDrgmohYK+lCSafkZh8HngtcK+l2SStbrM7MzKZArfPcI2IVsKph3gWV68d3uS4zM5sA/7aMmVmBHO5mZgVyuJuZFWiH/+EwfynJdnT+BdQyueduZlYgh7uZWYEc7mZmBXK4m5kVyOFuZlYgh7uZWYEc7mZmBdrhz3M3s+Z8/vtgc8/dzKxADnczswI53M3MCuRwNzMrkMPdzKxAPlvGzMbEZ9EMBvfczcwK5HA3MyuQh2XMrCs8XNNfdphw939cMrMdyQ4T7mbWG+06Vu7VT55aY+6SFki6W9J6Sec1Wb6bpC/l5f8iaXa3CzUzs/o69twlTQMuAf4QGAZulbQyIu6qNDsT2BIRL5K0CPgo8MbJKBg8xGJWCo/TT546wzJHA+sjYgOApBXAQqAa7guBpfn6dcCnJSkioou1mtkOYqwdOL8ZbK9OuB8IbKpMDwPHtGoTEVslPQpMBx6sNpK0BFiSJx+XdDewF/BopVl1evR6498ZjeuuqXFbdZbVqa9T3eOpt12trZa3q7VTjVNda936OtXtfet9+6g+2rNaW9XXqe6J1PuCWq0jou0FeD1weWX6dOBTDW3WAjMr0z8Gpndad267rNX06PUmf9fUWXenbdVZVqe+GnWPud52tbZa3q7Wydy346nV+9b7dhD3bad5vdy3jZc6B1SHgVmV6ZnA5lZtJO1Meod5uMa6Ab7eZvrrLf6OV7vbt1pWp75W1ydSb6fbNlvertbG6W7u2/HU2my+9229Wuos977trBu1Ns7r5b7dhvI7QusGKazvAY4DfgbcCrwpItZW2rwLeGlEvCMfUD01It4w1sprFy2tiYh5k7X+bhukegepVhisegepVhisegepVpiaejuOuUcaQz8buAGYBlwREWslXUj6aLES+DxwpaT1pB77osksGlg2yevvtkGqd5BqhcGqd5BqhcGqd5BqhSmot2PP3czMBo9/OMzMrEAOdzOzAjnczcwKVFy4SzpE0uclXdfrWpqR9BxJfyPpc5JO63U9nfT7/mwk6b/kffs1Sa/tdT3tSHqJpMskXSfpnb2up5P83L1N0sm9rqUTSfMlfSfv3/m9rqcdSTtJ+oikT0n6426tt6/CXdIVkh6QdGfD/LY/XFYVERsi4szJrXRbY6z7VOC6iDgLOGUq66zUVbveXuzPRmOs96t5357BJP6+UZdqXRcR7wDeAEz5aXzjeL29H7hmaqvcpq6x1BvA48DupO/h9HOtC0nf8n+qq7WO9VtSk3kBXgW8HLizMm8a6RuvhwC7AncAc4GXAtc3XJ5Xud11fVr3+cARuc1V/b6fe7E/u1TvXwIv7/daSW/w3yV9d6RvawWOJ53ifAZwcr8/D4Cd8vL9gC/2ea3nAW/Pbbr2OuurnntE3Mz232x95ofLIuJJYAWwMCL+NSJObrg8MOVFM7a6Se/MM3Obnuz/Mdbbc2OpV8lHgW9GxA/6udbcfmVEvBKY8iG6Mdb6GuAVwJuAsyRN+XN3jPnwm7x8C7DbFJYJjCsTtuQ2T3erhr4K9xaa/XDZga0aS5ou6TLgSEnnT3ZxbbSq+8vAH0n6DBP/OYVualpvH+3PRq3277tJvczXSXpHLwprotW+nS/pk5I+C6zqTWnbaVprRHwgIs4BrgI+VwnPXmu1b0/N+/VK4NM9qWx77TLhP0n6FHBztzY2CP+JSU3mtfzmVUQ8BPTDi7pp3RHxS+CtU11MDa3q7Zf92ahVvZ8EPjnVxXTQqtabgJumtpSO2r7eImL51JVSS6t9+2VSaPaTVrU+QfqfGF01CD33Oj9c1o8GrW7XO3lc6+QZpHqntNZBCPdbgTmSDpa0K+mgzsoe11THoNXteiePa508g1Tv1NY61UeROxxhvhq4j9+eEnRmnn8i6Zcpfwx8oNd1Dnrdrte1Dlqtg1ZvP9TqHw4zMyvQIAzLmJnZGDnczcwK5HA3MyuQw93MrEAOdzOzAjnczcwK5HA3MyuQw93MrEAOdzOzAv1/8vjX3UQb0Q0AAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10be16400>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(all_values[~pd.isnull(all_values)], bins=np.logspace(np.log10(0.1),np.log10(1000000), 50))\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.title(\"mrna v2 continuous data in pancan studies\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:anaconda3]",
"language": "python",
"name": "conda-env-anaconda3-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment