Skip to content

Instantly share code, notes, and snippets.

@ctb
Created September 27, 2012 23:18
Show Gist options
  • Save ctb/3797036 to your computer and use it in GitHub Desktop.
Save ctb/3797036 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "hw3-solutions-1-and-2"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": "## Week 3, problem 1 -- load in files, calculate averages/stddevs, and plot"
},
{
"cell_type": "code",
"collapsed": false,
"input": "import csv\nimport math\n\n# function to load x, y values from a CSV file. Note, should take 'filename' and use 'csv' module.\ndef load_xy(filename):\n fp = open(filename, 'rb')\n r = csv.reader(fp)\n \n xx = []\n yy = []\n for x, y in r:\n x = float(x)\n y = float(y)\n xx.append(x)\n yy.append(y)\n \n return xx, yy\n\ndef average(yy):\n # could do like this:\n #return sum(yy) / float(len(yy))\n\n # or:\n total = 0.\n for y in yy:\n total += y\n return total / float(len(yy))\n\ndef stddev(yy):\n avg = average(yy)\n diffsq = 0.\n \n for y in yy:\n diffsq += (y - avg)**2\n return math.sqrt(diffsq) / float(len(yy))",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": "x1, y1 = load_xy('/usr/local/notebooks/quartet/q1.csv')\nx2, y2 = load_xy('/usr/local/notebooks/quartet/q2.csv')\nx3, y3 = load_xy('/usr/local/notebooks/quartet/q3.csv')\nx4, y4 = load_xy('/usr/local/notebooks/quartet/q4.csv')\n\nprint 'q1:', average(y1), stddev(y1)\nprint 'q2:', average(y2), stddev(y2)\nprint 'q3:', average(y3), stddev(y3)\nprint 'q4:', average(y4), stddev(y4)\n",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "q1: 7.50090909091 0.584034775577\nq2: 7.50090909091 0.584060246164\nq3: 7.5 0.583705744956\nq4: 7.50090909091 0.583750278525\n"
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "plot(x1, y1, 'r.')\nplot(x2, y2, 'b+')\nplot(x3, y3, 'gx')\nplot(x4, y4, 'y*')",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 12,
"text": "[<matplotlib.lines.Line2D at 0x7fb96448c0d0>]"
},
{
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAXEAAAD9CAYAAABKgkezAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGlhJREFUeJzt3X9QlPeBBvBnGU3gxjSLtZr0YCNWjaAoaKgExFk9JDkt\n1UhRyY0YwarYdgQNk6lYAe8go2tM7TSYgwQc7cSMMUy0WRa1cVajJrKmyaCw7RqFCs1dfgibILCJ\nynt/7IkSFnR339f3/bLPZ4YBNvDyDGSfvPnu94dOkiQJREQkpCC1AxARke9Y4kREAmOJExEJjCVO\nRCQwljgRkcBY4kREAhu0xLOysjBmzBhER0f3+2cvvfQSgoKC0NbWplg4IiIa3KAlvnLlStTW1vZ7\nvKWlBceOHcNjjz2mWDAiIrq7QUs8KSkJoaGh/R7fsGEDtm/frlgoIiK6N16PiR86dAhhYWGYOnWq\nEnmIiMgLw7z54q6uLpSWluLYsWO9jw20al+n0/mXjIgoQHmzG4pXd+KXLl1Cc3Mzpk2bhoiICLS2\ntmLGjBn44osvBgyi9bfCwkLVMzAnc4qakTnlf/OWV3fi0dHR+Pzzz3s/j4iIwEcffYSRI0d6/YOJ\niMh/g96JZ2RkICEhAQ6HA+Hh4aiqqurzzzlkQkSkrkHvxPfv3z/oN1++fFnWMGowGo1qR7gnzCkv\nEXKKkBFgTrXpJF8GYe7lwjqdT+M7RESBzNvu5LJ7IiKBscSJiATGEiciEhhLnIhIYCxxIiKBscSJ\niATGEiciEhhLnIhIYCxxIiKBscSJiATGEiciEhhLnIhIYCxxIiKBscSJiATGEifyg9lhhtPl7POY\n0+WE2WFWKREFGpY4kR8SDYkoOF7QW+ROlxMFxwuQaEhUORkFCh4KQeSnW8Wdn5AP0xkTSuaWQB+s\nVzsWCcrb7mSJE8mg2dmMiF0RaFrfhLH6sWrHIYHxZB+i+8zpcsJ0xoSm9U0wnTH1GyMnUhJLnMgP\nt4ZSSuaWYKx+LErmlvQZI6ehS5IkbNmyQfURB5Y4kR9OXzndZwxcH6xHydwSnL5yWuVkpLSamrfh\ncLwGi6Va1RwcEyci8kJZ2U5UV7+OceOuIyPjIvbvn4DLl4dj8eJsrFu3we/re9udw/z+iUREASQn\nJw8GQziqqzdCpwNu3HAhL68U8+enqZKHwylERF7Q6XTQ6XTo6nJiz54odHU5ex9TA0uciMhLdvt5\nZGZWoarqAjIzq9DYWK9aFo6JExFpCOeJExEFkEFLPCsrC2PGjEF0dHTvY/n5+YiMjMT06dORm5uL\n7u5uxUMSEZFng5b4ypUrUVtb2+exlJQUNDQ04Ny5c+js7MQbb7yhaEAiIhrYoCWelJSE0NDQPo/N\nmzcPQUFBCAoKwlNPPYUTJ04oGpCIiAbm15h4RUUFUlNT5cpCRERe8nmxz9atW/HQQw8hPT19wK8p\nKirq/dhoNMJoNPr644iIhiSr1Qqr1erz9991imFzczNSU1Nx/vz53sf27NmDiooKvPfeewgODvZ8\nYU4xJCLymuLL7mtra2EymXDy5MkBC5yIiO6PQe/EMzIycOLECXz11VcYM2YMiouL8eKLL+K7777D\nyJEjAQBPPvkkysrK+l+Yd+JERF7jyT5ERALjik0iogDCEiciEhhLnIhIYCxxIiKBscSJiATGEici\nEhhLnIhIYCxxIiKBscSJiATGEiciEhhLnIhIYCxxIiKBscSJiATGEiciEhhLnAKP0wksWeJ+TyQ4\nljgFFqcTSEkB3nrL/Z5FToJjiVNgWb0asNncH9ts7s+JBMaTfSiw3LoTt9mAuDjg6FFAr1c7FVEv\nHs9GdDdOp/sOvLycBU6awxInIhIYz9gkIgogLHEiIoGxxImIBMYSJyISGEuciEhgLHEiIoGxxImI\nBMYSJyIS2KAlnpWVhTFjxiA6Orr3sY6ODixcuBAGgwGLFi3CtWvXFA9JRESeDVriK1euRG1tbZ/H\ndu/eDYPBgIsXLyIsLAyvvvqqogGJiGhgg5Z4UlISQkND+zxWV1eH7OxsPPjgg8jKysLZs2cVDUhE\nRAPzekzcZrNh0qRJAIBJkyahrq5O9lBEAY8HV9A9GubtN3izMUtRUVHvx0ajEUaj0dsfRxR47twu\nt7mZ2+UOcVarFVar1efv97rE4+LiYLfbERsbC7vdjri4uAG/9s4SJ6J75OngigMH1M1Eivn+DW5x\ncbFX3+/1cMrMmTNRWVmJ7u5uVFZWIj4+3ttLENFgysvdB1YA7vfl5ermIU0btMQzMjKQkJAAh8OB\n8PBwVFVVIScnB1euXMHjjz+Of/7zn1i7du39ykoUGPR69xBKejqHUuiueCgEEZGG8FAIIqIAwhIn\nIhIYS5yISGAscSIigbHEiYgExhInIhIYS5w0z48VyURDHkucNI8lTjQwljjJSpTCFSUn0d14vQEW\n0WCsVkCOzSqt1ttFe+d+QEajfNfnppo0FLDESZO+X9bcEJPIM5Y4+U3pu2a5iJKTyBsscfKb0nfN\nchUs7+5pKOILm6R5vEsmGhhLXACSJGHLlg1CbO0rSuFqNafZYYbT1fdcTafLCbPDrFIi0jqWuABq\nat6Gw/EaLJZq+S7qdMJqLJL9IF6tluP3KZFTjmmLiYZEFBwv6C1yp8uJguMFSDQk+n9xGpJY4hpW\nVrYTycmTcejQJqxZ04F33vktkpMno6xsp38X/v+DeK0n4D6Qlyeqy0KOEtcH61EytwQFxwvQ7GxG\nwfEClMwtgT6Yp/uQZyxxDcvJyUNubhFu3HBBpwNu3HAhL68YOTl5/l3Y00G8pBn6YD3yE/IRsSsC\n+Qn5LHAaFGenaJhOp4NOp0NXlxN79kTB5WrpfcxXVitgHbcX+PE8FH/2S+DH/wqMWw6jVZyhEC1R\nYtqi0+WE6YwJTeubYDpj4p04DU5SiIKXDigm0xbJbD4o9fT0SGbzQclk2iLPhdvbpcKoA5LU3i7P\n9UgqLPT/Gu3d7dI68zqpvbvd4+c09HnbnTwoWQCSJKGwcCOKi1/y6y78+4qKOFdaTnL8Ps0OMxIN\niX3uvJ0uJ05fOY0FExf4d3ESAg9KHoIUmZ0CDp/ITY7f54KJC/oNneiD9SxwGhDvxDWsrGwnqqtf\nx7hx15GRcRH790/A5cvDsXhxNtat26B2PCJSgLfdyRc2NSwnJw8GQziqqzfeMTulFPPnp6kdjYg0\ngsMpGvb92SldXU6/Z6cIx+kElizhXHaiAbDENc5uP4/MzCpUVV1AZmYVGhvr1Y50//z/oiS89RYX\nJRENgGPipF1LlrgL/Jb0dODAAZ8vx5kfJALOThmiAvI4sfJyIC7O/XFcnPtzP4i2L0lA/s3Jaz6X\neEVFBRISEjBjxgzk5ubKmYk8CMgntF4PHD3qvgM/etT9uT+XE2xfkoD8m5PXfCrxtrY2lJaW4tix\nY7DZbHA4HDhy5Ijc2Ug0SrwIqde7h1D8LPDey3FfEhpifJpiGBISAkmS8PXXXwMAurq6EBoaKmsw\nEuw4sVsvQtpsQHOzLHfOStD6viRC/c1JG3xd319TUyMNHz5cGjFihLRp0ya/1//TwHp6eqTk5Dyp\np6dH7SgDS0+XJOD2W3q62on6EW1fEjn2YiHxeNudPt2Jf/nll8jJyUFjYyNCQ0ORnp4Os9mMBQv6\nvsJfdMdGEkajEUbeSvikpuZtjBjxGiyWRO0u9Ckvd9+B22yyvAiphNNXTve58741Rs7ZKaQmq9UK\nqx8vgPg0xdBsNmPfvn148803AQC7d+9Gc3Mztm3bdvvCnGLoN+GW3Tud7r3Jy8v9HkrhdED3sArv\newLPfZlimJSUhHPnzqGtrQ3ffvstLBYLUlJSfLkUDUKxQyGUIuOLkKJNB1QCC5zuhU8l/oMf/ACb\nN2/GM888g1mzZmHatGmYM2eO3NkC3p3L7rdvH4nOzsBZdi/adEAitXDFpsbt2FGI7u5OXLjw35gy\nZS1CQv4Fzz9ffPdvHCKanc2I2BWBpvVNGKsfq3YcIsVxxeYQUla2E7W1B9HSchhr115DS8sh1NYe\n9P+gZEF8fzrgraEVIrqNJa5hwo2Jy+jWGHjJ3BKM1Y/tHVrRbJFzt0VSCUtcw0TZitbsMPcrV6fL\nCbPD7PM1B5sOqDncbZFUxBLXOBG2olViJolQx5StXu2eHw+4369erW4eCih8YZNkcau48xPyNbmc\nXVF3bjkQF6fZLQdIDN52J0ucZBPQM0lkXOhEgY2zUzQgELcQDfiZJDLvtkh0rwK7xBWaURBoJS7c\nTBKiISRwS5wzCmQj1EwSoiEmcMfEZT6/8fv7QBcWuj/W2j7Q3FiKSNu87k4Ztr/1SMFLy6O9XZLi\n4tx7X8fFuT+Xidz7QPf09Ei/+508+4mLtqc2UaDxtjsDdzhF5vMblVRT8zYcjtdgsVT7fS1uLEU0\ntATucIqC5NoHWsn9xAN6OiCRhnGe+BAiSRLM5oN4+61cIOgzoOfH+MWSXZg/P82vpfcBvTCHSOM4\nT3wI0el00HV148qlz9D+FdBy6TPourpkKXBOByQaGngnrmG/KlqFU/v2YXL4d/hlIVBRDDS0PIBZ\ny5fjlaLXfLomZ6cQaRuHU4aQ9u52rHwxDfqPTuG5/OvYYxoO54wkVP32IEJDQtWOR0QK4HDKEBIa\nEopnY5+DMxioKBmB9mDgP6avYIETUS+WuMZduXQJi5b9AW8kXsMzy/6Af3x6Se1IRKQhHE7ROKfL\niU3vbcIDJ7/Dd7MfQOm/lXImCdEQxuGUIeTWTBLjd/H4338cwJzr8ZxJQkR98E5cw35VtAqNJ09h\nwvie3sU+Fz8NQtTsWT7PTiEibeOduEqUOGfyj4UV2LjhP/sclPz8xv/CHwsr/I1LREMES1wmSpwz\nKcpByUSkHpa4TJTaWEqEg5KJSD0cE5cZN5YiIn9wTFxFAX/OJBHddwFf4nKdh8mNpYhIDT6XeGdn\nJ1asWIGJEyciKioKH374oZy5FHXnTJJbJe7vTBKeM0lEavC5xAsLC2EwGFBfX4/6+npERkbKmUtR\nSswkWTBxQb8XMfXBeu4MSESK8vmFzZiYGHzwwQcICQnxfGENv7BptQK1VieOowC2XfmIW2/CXJTg\naaNeU4caE1HguS9b0ba2tiI5ORnx8fGw2+1YvHgx1q9fj+DgYJ+DqIEzSYhIa7ztzmG+/BCXywWH\nwwGTyYTk5GSsWbMGBw4cQGZmZp+vKyoq6v3YaDTCqKHb3FszSdZLTTyijIhUY7VaYfVjhoXPwymR\nkZGw2+0AAIvFgr1792L//v23L6zhO/E7Z5J88qEeMfFOnvpORJpw3+aJT5gwAWfPnkVPTw/MZjOS\nk5N9vdSglNiT5M6ZJEaj9meSSJKELVs2aPY/ikSkHp9LfMeOHVi/fj2mT5+O4OBgLFu2TM5cvTiT\nBKipeRsOx2uwWKrVjkJEGiPEsnuny4kCy/PIP/g/MP3iUZT8+46AGPYoK9uJ6urXMW7c9d6taC9f\nHo7Fi7Oxbt0GteMRkQLuywub95veBeT/3oaIufVo+v1U6OcACL7rtwkvJycPBkM4qqs39m5Fm5dX\nivnz09SORkQaIcSye+fa52AaUY+m3wOmEfVwrn1O7Uj3xa1tZzs72/HCCyPR2dnOrWiJqA/Nl7jT\n5UTB0lEoaY/FWCdQ0h6LgqWjAmZPErv9PKZMWYOIiOuYMmUtt6Iloj40PyZudpiRaEiE3gVg9Wqg\nvBzOYPcME62+ECkXjokTBZ77smLTmyBOlzMgClcJkiTBbD6I6uqNyMxswd694UhL24n589M4pEI0\nRGlqP3E5pgMGMh7PRkR3o2iJcxWk/+z281i+vBLh4SlYvrySY+JE1IeiwylN7dxYSg5m80Hs25eF\nzMwqTi8kGuI0NZzCI8r8U1a2E8nJk3Ho0CasWdOBd975LZKTJ6OsbKfa0YhIIxQtcR5R5p+cnDzk\n5hbhxg3XHYt9ipGTk6d2NCLSCEVLXOsbS2kdX9gkortRfLGPljeWEoHdfh6ZmVWoqrqAzMwqvrBJ\nRH1ofrEPEVEg0dQLm0REpCyWOBGRwFjicnM6gSVL3O+JiBQmVIn7cZbo/eF0AikpwFtvud+zyIlI\nYSxxOa1eDdhs7o9tNvfnREQKEqrENa+8HIiLc38cF+f+nIhIQZo/ns1qvX0HXlx8+3Gj0f2mKXo9\ncPRo777n0HPjLyJSllDzxIuK3G9EREMV54kTEQUQoUpcc8MnREQqE2o4hYhoqONwChFRAGGJExEJ\nTNkS54pFIiJFKVviXHpORKQoZUucS8+JiBTlc4nfvHkTsbGxSE1NHfiLuPSciEhRPpf4rl27EBUV\nNfh5j0ePcuk5EZGCfCrx1tZW1NTUYNWqVYPPZ2SBExEpyqcSz8vLg8lkQlAQZygSEanJ610M3333\nXYwePRqxsbGw3mWD76I7dqsyGo0wct08EVEfVqv1rl06GK+X3W/atAn79u3DsGHD4HK58M033yAt\nLQ179+7te2Euuyci8pq33enX3iknTpzAjh078Oc//9nvIEREpMLeKYPOTiEiIkVxF0MiIg3hLoZE\nRAGEJU5EJDCWOBGRwFjiREQCY4kTEQmMJU5EJDCWOBGRwFjiREQCY4kTEQmMJU5EJDCWOBGRwLze\nT9wbt7YTNxrdb0REJC9ugEVEpCHcAIuIKICwxImIBMYSJyISGEuciEhgLHEiIoGxxImIBMYSJyIS\nGEuciEhgLHEiIoGxxImIBMYSJyISGEuciEhgLHEiIoGxxImIBMYSJyISmE8l3tLSgjlz5mDy5Mkw\nGo1444035M5131itVrUj3BPmlJcIOUXICDCn2nwq8eHDh+Pll19GQ0MDDh48iM2bN6Ojo0PubPeF\nKH9Y5pSXCDlFyAgwp9p8KvFHHnkEMTExAIBRo0Zh8uTJOHfunKzBiIjo7vweE//000/R0NCAn/70\np3LkISIiL/h1xmZHRweMRiO2bNmChQsX9r2wTud3OCKiQORNLft82v3169eRlpaG5cuX9ytwb0MQ\nEZFvfLoTlyQJK1aswKhRo7Bz504lchER0T3wqcRPnTqF2bNnY+rUqb3DJi+++CKefvpp2QMSEdHA\nfHphc9asWejp6cEnn3yCjz/+GB9//HGfAr958yZiY2ORmpoqW1C5dXZ2YsWKFZg4cSKioqLw4Ycf\nqh3Jo4qKCiQkJGDGjBnIzc1VO06vrKwsjBkzBtHR0b2PdXR0YOHChTAYDFi0aBGuXbumYkI3Tznz\n8/MRGRmJ6dOnIzc3F93d3Som9JzxlpdeeglBQUFoa2tTIVlfA+WsqqpCZGQkJk+ejBdeeEGldLd5\nytnY2Iif/exniImJQWpqKux2u4oJ3QZab+Pt80iRFZu7du1CVFSUpl/cLCwshMFgQH19Perr6xEZ\nGal2pH7a2tpQWlqKY8eOwWazweFw4MiRI2rHAgCsXLkStbW1fR7bvXs3DAYDLl68iLCwMLz66qsq\npbvNU86UlBQ0NDTg3Llz6OzsVH2xmqeMgPtJfuzYMTz22GMqpOrPU84LFy6gvLwchw8fRkNDA55/\n/nmV0t3mKefWrVuRmZmJTz75BM8++yy2bt2qUrrbBlpv4+3zSPYSb21tRU1NDVatWqXpFzf/8pe/\nYNOmTQgODsawYcPw8MMPqx2pn5CQEEiShK+//hrd3d3o6upCaGio2rEAAElJSf2y1NXVITs7Gw8+\n+CCysrJw9uxZldLd5innvHnzEBQUhKCgIDz11FM4ceKESuncPGUEgA0bNmD79u0qJPLMU06LxYLs\n7GxMmDABAPCjH/1IjWh9eMr58MMP4+rVq+jp6cHVq1c18TzytN7GZrN5/TySvcTz8vJgMpkQFKTd\nbVlaW1vhcrmQk5ODmTNnYtu2bXC5XGrH6ickJAS7d+/G2LFj8cgjjyAxMVHT8/FtNhsmTZoEAJg0\naRLq6upUTnR3FRUVmhz2O3ToEMLCwjB16lS1owzq6NGjuHDhAp544gmsWrUKjY2NakfyyGQyYdeu\nXQgNDcUrr7yCbdu2qR2pjzvX23j7PJK1ad99912MHj0asbGxmr4Ld7lccDgcSEtLg9VqRUNDAw4c\nOKB2rH6+/PJL5OTkoLGxEc3Nzfjggw9gNpvVjjUgLf/NPdm6dSseeughpKenqx2lj66uLpSWlqK4\nuLj3Ma3+bl0uF9ra2vD+++9j4cKF+PWvf612JI+ysrLwm9/8BlevXsXatWuRnZ2tdqReHR0dWLp0\nKV5++WWMGDHC67+1rCV+5swZHD58GBEREcjIyMDx48eRmZkp54+Qxfjx4/H4448jNTUVISEhyMjI\ngMViUTtWP3V1dYiPj8f48ePxwx/+EOnp6Th58qTasQYUFxfX+4KR3W5HXFycyokGtmfPHhw5cgR/\n+tOf1I7Sz6VLl9Dc3Ixp06YhIiICra2tmDFjBr744gu1o/UTHx+PpUuXIiQkBKmpqfjb3/6myf+r\nPXXqFLKysjBs2DBkZ2dr5nnkab2Nt88jWUu8tLQULS0taGpqwptvvom5c+di7969cv4I2UyYMAFn\nz55FT08PzGYzkpOT1Y7UT1JSEs6dO4e2tjZ8++23sFgsSElJUTvWgGbOnInKykp0d3ejsrIS8fHx\nakfyqLa2FiaTCYcPH0ZwcLDacfqJjo7G559/jqamJjQ1NSEsLAx//etfMXr0aLWj9fPkk0/CYrFA\nkiScPXsWP/nJTzT5O50zZw4OHz4MwD1UNW/ePJUTuf/vKjs7G1OmTOkz88zr55GkEKvVKqWmpip1\neb/9/e9/l2bOnClNmzZN2rhxo3Tt2jW1I3lUVVUlzZ49W3riiSekzZs3Szdv3lQ7kiRJkrRs2TLp\n0UcflR544AEpLCxMqqyslL755hvp5z//uRQeHi4tXLhQ6ujoUDtmb87hw4dLYWFh0uuvvy6NHz9e\nMhgMUkxMjBQTEyPl5ORoIuOdv8s7RURESFevXlUp3W2ect64cUNas2aNNGnSJGnRokVSXV2d2jH7\n/c0rKyulCxcuSMuWLZOmTp0qPfvss5Ldblc7pvT+++9LOp1OmjZtWu+/ixaLxevnkV97pxARkbq0\nO4WEiIjuiiVORCQwljgRkcBY4kREAmOJExEJjCVORCSw/wOQIj7Hvl3PmQAAAABJRU5ErkJggg==\n",
"text": "<matplotlib.figure.Figure at 0x7fb96420b510>"
}
],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## Problem 2 -- invert dictionaries"
},
{
"cell_type": "code",
"collapsed": false,
"input": "def invert_d(dd):\n e = {}\n for k in dd:\n value = dd[k]\n e[value] = k\n return e\n",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": "# test --\n\nfirst_d = { 'first': 1, 'second': 2 }\ninverted_test = { 1: 'first', 2: 'second' }\n\nassert invert_d(first_d) == inverted_test",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": "bad_d = { 'one': 1, 'uno': 1 }\n\nprint invert_d(bad_d)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "{1: 'one'}\n"
}
],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": "What's happening with 'bad_d', above, is that the value '1' belongs to two keys, 'one' and 'uno'. When you invert this so that the value 'uno' belongs to the key 1, and the value 'one' belongs to the key 1, you run into the problem that keys have to be unique -- so either 'uno' or 'one' will be lost. Which one depends on the order in which the 'for' loop iterates over the dictionary -- the *last* key (of 'one' and 'uno') to be returned from bad_d will be assigned to the inverted dictionary as the value for the key 1."
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
{
"metadata": {
"name": "hw3-solutions-3"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "from cStringIO import StringIO\n\n# load sequences in from a file handle, 'fp' -- the result of an 'open'.\ndef load_sequences(fp):\n sequences = {}\n\n name = None\n seq = \"\"\n\n for line in fp:\n # new record? \n if line.startswith('>'): # new sequence records start with >\n sequences[name] = seq # save current record \n\n name = line # the new record name is this line! \n name = name[1:] # strip off the '>' \n name = name.strip() # remove leading & trailing whitespace \n seq = \"\" # start 'seq' over again.\n else:\n line = line.strip() # NOT a new record; remove whitespace. \n if line: # if any sequence at all... \n seq += line # add to my current sequence. \n\n return sequences",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['a'] == 'ACTG'\nassert results['c'] == 'TGCA'\nassert results['e'] == 'GGGG'\n\nprint 'my spot checks tell me that everything is working! good luck!'",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\nmy spot checks tell me that everything is working! good luck!\n"
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## Test cell 1 -- test multiline sequences"
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\nAGCT\n>c\nTGCA\n\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['b'] == 'AGCTAGCT'\n\nprint 'my spot checks tell me that everything is working! good luck!'",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "{'a': 'ACTG', None: '', 'b': 'AGCTAGCT'}\nmy spot checks tell me that everything is working! good luck!\n"
}
],
"prompt_number": 7
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## Test cell 2 -- test blank lines after sequences"
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n\n\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['a'] == 'ACTG'\n\nprint 'my spot checks tell me that everything is working! good luck!'",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\nmy spot checks tell me that everything is working! good luck!\n"
}
],
"prompt_number": 8
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## Test cell 3 -- "
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nassert results == {}\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\n\n\nprint 'my spot checks tell me that everything is working! good luck!'",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "my spot checks tell me that everything is working! good luck!\n"
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## Bug #1 -- empty sequence always created if there are any sequences"
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert None not in results\n\n",
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "AssertionError",
"evalue": "",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-13-96e0c0482260>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;31m# check a few things; here is where you want to edit/change the assert statements to check for new things\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[1;32massert\u001b[0m \u001b[0mNone\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mresults\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mAssertionError\u001b[0m: "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\n"
}
],
"prompt_number": 13
},
{
"cell_type": "markdown",
"metadata": {},
"source": "## Bug #2 -- last sequence is always omitted"
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['f'] == 'CCCC'\n",
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'f'",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-15-c56d38a83f5c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;31m# check a few things; here is where you want to edit/change the assert statements to check for new things\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[1;32massert\u001b[0m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'f'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'CCCC'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m: 'f'"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\n"
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": "from cStringIO import StringIO\n\n# load sequences in from a file handle, 'fp' -- the result of an 'open'.\ndef FIXED_load_sequences(fp):\n sequences = {}\n\n name = None\n seq = \"\"\n\n for line in fp:\n # new record? \n if line.startswith('>'): # new sequence records start with >\n if name is not None:\n sequences[name] = seq ### save current record **if it's not the first time through the code ** \n \n name = line # the new record name is this line! \n name = name[1:] # strip off the '>' \n name = name.strip() # remove leading & trailing whitespace \n seq = \"\" # start 'seq' over again.\n else:\n line = line.strip() # NOT a new record; remove whitespace. \n if line: # if any sequence at all... \n seq += line # add to my current sequence.\n \n if name is not None: ### **add final sequence to the dict**\n sequences[name] = seq\n\n return sequences",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = FIXED_load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert None not in results\nassert results['f'] == 'CCCC'\n",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "{'a': 'ACTG', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT', 'f': 'CCCC'}\n"
}
],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment