Created
September 27, 2012 23:19
-
-
Save ctb/3797039 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "hw3-solutions-3" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "from cStringIO import StringIO\n\n# load sequences in from a file handle, 'fp' -- the result of an 'open'.\ndef load_sequences(fp):\n sequences = {}\n\n name = None\n seq = \"\"\n\n for line in fp:\n # new record? \n if line.startswith('>'): # new sequence records start with >\n sequences[name] = seq # save current record \n\n name = line # the new record name is this line! \n name = name[1:] # strip off the '>' \n name = name.strip() # remove leading & trailing whitespace \n seq = \"\" # start 'seq' over again.\n else:\n line = line.strip() # NOT a new record; remove whitespace. \n if line: # if any sequence at all... \n seq += line # add to my current sequence. \n\n return sequences", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['a'] == 'ACTG'\nassert results['c'] == 'TGCA'\nassert results['e'] == 'GGGG'\n\nprint 'my spot checks tell me that everything is working! good luck!'", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\nmy spot checks tell me that everything is working! good luck!\n" | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Test cell 1 -- test multiline sequences" | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\nAGCT\n>c\nTGCA\n\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['b'] == 'AGCTAGCT'\n\nprint 'my spot checks tell me that everything is working! good luck!'", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "{'a': 'ACTG', None: '', 'b': 'AGCTAGCT'}\nmy spot checks tell me that everything is working! good luck!\n" | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Test cell 2 -- test blank lines after sequences" | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n\n\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['a'] == 'ACTG'\n\nprint 'my spot checks tell me that everything is working! good luck!'", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\nmy spot checks tell me that everything is working! good luck!\n" | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Test cell 3 -- " | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nassert results == {}\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\n\n\nprint 'my spot checks tell me that everything is working! good luck!'", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "my spot checks tell me that everything is working! good luck!\n" | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Bug #1 -- empty sequence always created if there are any sequences" | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert None not in results\n\n", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AssertionError", | |
"evalue": "", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-13-96e0c0482260>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;31m# check a few things; here is where you want to edit/change the assert statements to check for new things\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[1;32massert\u001b[0m \u001b[0mNone\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mresults\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 23\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;31mAssertionError\u001b[0m: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\n" | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Bug #2 -- last sequence is always omitted" | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert results['f'] == 'CCCC'\n", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "KeyError", | |
"evalue": "'f'", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-15-c56d38a83f5c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;31m# check a few things; here is where you want to edit/change the assert statements to check for new things\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[1;32massert\u001b[0m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'f'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'CCCC'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mKeyError\u001b[0m: 'f'" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "{'a': 'ACTG', None: '', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT'}\n" | |
} | |
], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "from cStringIO import StringIO\n\n# load sequences in from a file handle, 'fp' -- the result of an 'open'.\ndef FIXED_load_sequences(fp):\n sequences = {}\n\n name = None\n seq = \"\"\n\n for line in fp:\n # new record? \n if line.startswith('>'): # new sequence records start with >\n if name is not None:\n sequences[name] = seq ### save current record **if it's not the first time through the code ** \n \n name = line # the new record name is this line! \n name = name[1:] # strip off the '>' \n name = name.strip() # remove leading & trailing whitespace \n seq = \"\" # start 'seq' over again.\n else:\n line = line.strip() # NOT a new record; remove whitespace. \n if line: # if any sequence at all... \n seq += line # add to my current sequence.\n \n if name is not None: ### **add final sequence to the dict**\n sequences[name] = seq\n\n return sequences", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 19 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# a first, simple test example; this is the data you will want to change.\ndata = \"\"\">a\nACTG\n>b\nAGCT\n>c\nTGCA\n>d\nTTTT\n>e\nGGGG\n>f\nCCCC\n\n\"\"\"\ndata_fp = StringIO(data) # make the data _look_ like it's coming from a file, even though it's really an in-memory string.\n\nresults = FIXED_load_sequences(data_fp)\nprint results\n\n# check a few things; here is where you want to edit/change the assert statements to check for new things\nassert None not in results\nassert results['f'] == 'CCCC'\n", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "{'a': 'ACTG', 'c': 'TGCA', 'b': 'AGCT', 'e': 'GGGG', 'd': 'TTTT', 'f': 'CCCC'}\n" | |
} | |
], | |
"prompt_number": 20 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment