Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created November 14, 2013 20:02
Show Gist options
  • Save BenLangmead/7473392 to your computer and use it in GitHub Desktop.
Save BenLangmead/7473392 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"def z(s):\n",
" ''' Use Z-algorithm to preprocess given string. See\n",
" Gusfield for complete description of algorithm. '''\n",
" \n",
" Z = [len(s)] + [0] * len(s)\n",
" assert len(s) > 1\n",
" \n",
" # Initial comparison of s[1:] with prefix\n",
" for i in xrange(1, len(s)):\n",
" if s[i] == s[i-1]:\n",
" Z[1] += 1\n",
" else:\n",
" break\n",
" \n",
" r, l = 0, 0\n",
" if Z[1] > 0:\n",
" r, l = Z[1], 1\n",
" \n",
" for k in xrange(2, len(s)):\n",
" assert Z[k] == 0\n",
" if k > r:\n",
" # Case 1\n",
" for i in xrange(k, len(s)):\n",
" if s[i] == s[i-k]:\n",
" Z[k] += 1\n",
" else:\n",
" break\n",
" r, l = k + Z[k] - 1, k\n",
" else:\n",
" # Case 2\n",
" # Calculate length of beta\n",
" nbeta = r - k + 1\n",
" Zkp = Z[k - l]\n",
" if nbeta > Zkp:\n",
" # Case 2a: Zkp wins\n",
" Z[k] = Zkp\n",
" else:\n",
" # Case 2b: Compare characters just past r\n",
" nmatch = 0\n",
" for i in xrange(r+1, len(s)):\n",
" if s[i] == s[i - k]:\n",
" nmatch += 1\n",
" else:\n",
" break\n",
" l, r = k, r + nmatch\n",
" Z[k] = r - k + 1\n",
" return Z"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"z('abracadabra')\n",
"# abracadabra (11)\n",
"# a (1)\n",
"# a (1)\n",
"# abra (4)\n",
"# a (1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"[11, 0, 0, 1, 0, 1, 0, 4, 0, 0, 1, 0]"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"z('aaaaa')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[5, 4, 3, 2, 1, 0]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def zMatch(p, t):\n",
" s = p + \"$\" + t\n",
" Z = z(s)\n",
" occurrences = []\n",
" for i in xrange(len(p) + 1, len(s)):\n",
" if Z[i] >= len(p):\n",
" occurrences.append(i - (len(p) + 1))\n",
" return occurrences"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"zMatch('needle', 'haystack needle haystack')\n",
"# 012345678901234567890123\n",
"# 1 2"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[9]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"zMatch('needle', 'haystack needle needle')\n",
"# 0123456789012345678901\n",
"# 1 2"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"[9, 16]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment