Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created October 1, 2013 19:33
Show Gist options
  • Save BenLangmead/6783863 to your computer and use it in GitHub Desktop.
Save BenLangmead/6783863 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Not a great way to build a suffix array, but we'll use it\n",
"# for the small examples here\n",
"def naiveBuildSA(t):\n",
" satups = sorted([(t[i:], i) for i in xrange(0, len(t))])\n",
" return map(lambda x: x[1], satups)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Simple function calculating LCP of two string\n",
"def lcp(x, y):\n",
" for i in xrange(min(len(x), len(y))):\n",
" if x[i] != y[i]: return i\n",
" return min(len(x), len(y))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lcp('start', 'stark')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"4"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lcp('start', 'star')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"4"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lcp('yes', 'no')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"0"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Naive way to calculate LCP1 array given string and its suffix array\n",
"def naiveLCP1(t, sa):\n",
" return [ lcp(t[sa[i]:], t[sa[i+1]:]) for i in xrange(len(sa)-1) ]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = 'abaaba$'\n",
"naiveLCP1(t, naiveBuildSA(t))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[0, 1, 1, 3, 0, 2]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = 'abracadabracada$'\n",
"naiveLCP1(t, naiveBuildSA(t))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"[0, 1, 8, 1, 5, 1, 3, 0, 7, 0, 4, 0, 2, 0, 6]"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"naiveBuildSA(t)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"[15, 14, 7, 0, 10, 3, 12, 5, 8, 1, 11, 4, 13, 6, 9, 2]"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Calculates (l, c) LCPs and (c, r) LCPs from LCP1 array. Returns pair where\n",
"# first element is list of LCPs for (l, c) combos and second is LCPs for\n",
"# (c, r) combos.\n",
"def precomputeLcps(lcp1):\n",
" llcp, rlcp = [None] * len(lcp1), [None] * len(lcp1)\n",
" lcp1 += [0]\n",
" def precomputeLcpsHelper(l, r):\n",
" if l == r-1: return lcp1[l]\n",
" c = (l + r) // 2\n",
" llcp[c-1] = precomputeLcpsHelper(l, c)\n",
" rlcp[c-1] = precomputeLcpsHelper(c, r)\n",
" return min(llcp[c-1], rlcp[c-1])\n",
" precomputeLcpsHelper(0, len(lcp1))\n",
" return llcp, rlcp"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"precomputeLcps(naiveLCP1(t, naiveBuildSA(t)))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"([0, 0, 8, 0, 5, 1, 3, 0, 7, 0, 4, 0, 2, 0, 6],\n",
" [1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment