Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created October 6, 2013 23:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save BenLangmead/6860491 to your computer and use it in GitHub Desktop.
Save BenLangmead/6860491 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"def rotations(t):\n",
" ''' Return list of rotations of input string t '''\n",
" tt = t * 2\n",
" return [ tt[i:i+len(t)] for i in xrange(0, len(t)) ]\n",
"\n",
"def bwm(t):\n",
" ''' Return lexicographically sorted list of t\u2019s rotations '''\n",
" return sorted(rotations(t))\n",
"\n",
"def bwtViaBwm(t):\n",
" ''' Given T, returns BWT(T) by way of the BWM '''\n",
" return ''.join(map(lambda x: x[-1], bwm(t)))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = 'abaaba$'\n",
"b = bwtViaBwm(t)\n",
"b"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"'abba$aa'"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def rankBwt(bw):\n",
" ''' Given BWT string bw, return parallel list of B-ranks. Also\n",
" returns tots: map from character to # times it appears. '''\n",
" tots = dict()\n",
" ranks = []\n",
" for c in bw:\n",
" if c not in tots: tots[c] = 0\n",
" ranks.append(tots[c])\n",
" tots[c] += 1\n",
" return ranks, tots"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ranks, tots = rankBwt(b)\n",
"print zip(b, ranks) # print characters of BWT(T) in order, along with rank"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[('a', 0), ('b', 0), ('b', 1), ('a', 1), ('$', 0), ('a', 2), ('a', 3)]\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def firstCol(tots):\n",
" ''' Return map from character to the range of rows prefixed by\n",
" the character. '''\n",
" first = {}\n",
" totc = 0\n",
" for c, count in sorted(tots.iteritems()):\n",
" first[c] = (totc, totc + count)\n",
" totc += count\n",
" return first"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"firstCol(tots)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"{'$': (0, 1), 'a': (1, 5), 'b': (5, 7)}"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# confirm that the representation of the first column above is sensible\n",
"print('\\n'.join(bwm(t)))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"$abaaba\n",
"a$abaab\n",
"aaba$ab\n",
"aba$aba\n",
"abaaba$\n",
"ba$abaa\n",
"baaba$a\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def reverseBwt(bw):\n",
" ''' Make T from BWT(T) '''\n",
" ranks, tots = rankBwt(bw)\n",
" first = firstCol(tots)\n",
" rowi = 0 # start in first row\n",
" t = '$' # start with rightmost character\n",
" while bw[rowi] != '$':\n",
" c = bw[rowi]\n",
" t = c + t # prepend to answer\n",
" # jump to row that starts with c of same rank\n",
" rowi = first[c][0] + ranks[rowi]\n",
" return t"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reverseBwt(b)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"'abaaba$'"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reverseBwt(bwtViaBwm('In_the_jingle_jangle_morning$'))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"'In_the_jingle_jangle_morning$'"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment