Created
October 1, 2013 19:33
-
-
Save BenLangmead/6783863 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Not a great way to build a suffix array, but we'll use it\n", | |
"# for the small examples here\n", | |
"def naiveBuildSA(t):\n", | |
" satups = sorted([(t[i:], i) for i in xrange(0, len(t))])\n", | |
" return map(lambda x: x[1], satups)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Simple function calculating LCP of two string\n", | |
"def lcp(x, y):\n", | |
" for i in xrange(min(len(x), len(y))):\n", | |
" if x[i] != y[i]: return i\n", | |
" return min(len(x), len(y))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"lcp('start', 'stark')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"4" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"lcp('start', 'star')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"4" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"lcp('yes', 'no')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"0" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Naive way to calculate LCP1 array given string and its suffix array\n", | |
"def naiveLCP1(t, sa):\n", | |
" return [ lcp(t[sa[i]:], t[sa[i+1]:]) for i in xrange(len(sa)-1) ]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"t = 'abaaba$'\n", | |
"naiveLCP1(t, naiveBuildSA(t))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"[0, 1, 1, 3, 0, 2]" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"t = 'abracadabracada$'\n", | |
"naiveLCP1(t, naiveBuildSA(t))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
"[0, 1, 8, 1, 5, 1, 3, 0, 7, 0, 4, 0, 2, 0, 6]" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"naiveBuildSA(t)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"[15, 14, 7, 0, 10, 3, 12, 5, 8, 1, 11, 4, 13, 6, 9, 2]" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Calculates (l, c) LCPs and (c, r) LCPs from LCP1 array. Returns pair where\n", | |
"# first element is list of LCPs for (l, c) combos and second is LCPs for\n", | |
"# (c, r) combos.\n", | |
"def precomputeLcps(lcp1):\n", | |
" llcp, rlcp = [None] * len(lcp1), [None] * len(lcp1)\n", | |
" lcp1 += [0]\n", | |
" def precomputeLcpsHelper(l, r):\n", | |
" if l == r-1: return lcp1[l]\n", | |
" c = (l + r) // 2\n", | |
" llcp[c-1] = precomputeLcpsHelper(l, c)\n", | |
" rlcp[c-1] = precomputeLcpsHelper(c, r)\n", | |
" return min(llcp[c-1], rlcp[c-1])\n", | |
" precomputeLcpsHelper(0, len(lcp1))\n", | |
" return llcp, rlcp" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"precomputeLcps(naiveLCP1(t, naiveBuildSA(t)))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": [ | |
"([0, 0, 8, 0, 5, 1, 3, 0, 7, 0, 4, 0, 2, 0, 6],\n", | |
" [1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment