Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created October 2, 2013 18:33
Show Gist options
  • Save BenLangmead/6798379 to your computer and use it in GitHub Desktop.
Save BenLangmead/6798379 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"def rotations(t):\n",
" # Return list of rotations of input string t\n",
" tt = t * 2\n",
" return [ tt[i:i+len(t)] for i in xrange(0, len(t)) ]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rotations('cat')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"['cat', 'atc', 'tca']"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def bwm(t):\n",
" # Return lexicographically sorted list of t\u2019s rotations\n",
" return sorted(rotations(t))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bwm('abaaba$')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"['$abaaba', 'a$abaab', 'aaba$ab', 'aba$aba', 'abaaba$', 'ba$abaa', 'baaba$a']"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print('\\n'.join(bwm('abaaba$')))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"$abaaba\n",
"a$abaab\n",
"aaba$ab\n",
"aba$aba\n",
"abaaba$\n",
"ba$abaa\n",
"baaba$a\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def bwtViaBwm(t):\n",
" # Given T, returns BWT(T) by way of the BWM\n",
" return ''.join(map(lambda x: x[-1], bwm(t)))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bwtViaBwm('abaaba$') # we can see the result equals the last column of the matrix above"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"'abba$aa'"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def suffixArray(s):\n",
" satups = sorted([(s[i:], i) for i in xrange(0, len(s))])\n",
" return map(lambda x: x[1], satups)\n",
"\n",
"def bwtViaSa(t):\n",
" # Given T, returns BWT(T) by way of the suffix array\n",
" bw = []\n",
" for si in suffixArray(t):\n",
" if si == 0:\n",
" bw.append('$')\n",
" else:\n",
" bw.append(t[si-1])\n",
" return ''.join(bw) # return string-ized version of list bw"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bwtViaBwm('abaaba$'), bwtViaSa('abaaba$') # same result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"('abba$aa', 'abba$aa')"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment