Created
October 6, 2013 23:30
-
-
Save BenLangmead/6860491 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def rotations(t):\n", | |
" ''' Return list of rotations of input string t '''\n", | |
" tt = t * 2\n", | |
" return [ tt[i:i+len(t)] for i in xrange(0, len(t)) ]\n", | |
"\n", | |
"def bwm(t):\n", | |
" ''' Return lexicographically sorted list of t\u2019s rotations '''\n", | |
" return sorted(rotations(t))\n", | |
"\n", | |
"def bwtViaBwm(t):\n", | |
" ''' Given T, returns BWT(T) by way of the BWM '''\n", | |
" return ''.join(map(lambda x: x[-1], bwm(t)))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"t = 'abaaba$'\n", | |
"b = bwtViaBwm(t)\n", | |
"b" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"'abba$aa'" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def rankBwt(bw):\n", | |
" ''' Given BWT string bw, return parallel list of B-ranks. Also\n", | |
" returns tots: map from character to # times it appears. '''\n", | |
" tots = dict()\n", | |
" ranks = []\n", | |
" for c in bw:\n", | |
" if c not in tots: tots[c] = 0\n", | |
" ranks.append(tots[c])\n", | |
" tots[c] += 1\n", | |
" return ranks, tots" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ranks, tots = rankBwt(b)\n", | |
"print zip(b, ranks) # print characters of BWT(T) in order, along with rank" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[('a', 0), ('b', 0), ('b', 1), ('a', 1), ('$', 0), ('a', 2), ('a', 3)]\n" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def firstCol(tots):\n", | |
" ''' Return map from character to the range of rows prefixed by\n", | |
" the character. '''\n", | |
" first = {}\n", | |
" totc = 0\n", | |
" for c, count in sorted(tots.iteritems()):\n", | |
" first[c] = (totc, totc + count)\n", | |
" totc += count\n", | |
" return first" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"firstCol(tots)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"{'$': (0, 1), 'a': (1, 5), 'b': (5, 7)}" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# confirm that the representation of the first column above is sensible\n", | |
"print('\\n'.join(bwm(t)))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"$abaaba\n", | |
"a$abaab\n", | |
"aaba$ab\n", | |
"aba$aba\n", | |
"abaaba$\n", | |
"ba$abaa\n", | |
"baaba$a\n" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def reverseBwt(bw):\n", | |
" ''' Make T from BWT(T) '''\n", | |
" ranks, tots = rankBwt(bw)\n", | |
" first = firstCol(tots)\n", | |
" rowi = 0 # start in first row\n", | |
" t = '$' # start with rightmost character\n", | |
" while bw[rowi] != '$':\n", | |
" c = bw[rowi]\n", | |
" t = c + t # prepend to answer\n", | |
" # jump to row that starts with c of same rank\n", | |
" rowi = first[c][0] + ranks[rowi]\n", | |
" return t" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"reverseBwt(b)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"'abaaba$'" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"reverseBwt(bwtViaBwm('In_the_jingle_jangle_morning$'))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
"'In_the_jingle_jangle_morning$'" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment