Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Created November 12, 2013 20:35
Show Gist options
  • Save BenLangmead/7438200 to your computer and use it in GitHub Desktop.
Save BenLangmead/7438200 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from fm import FmIndex"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def stringNeighborsHamming(st, alph, mms=1):\n",
" ''' Given string \"st\", alphabet \"alph\", and a maximum Hamming\n",
" distance \"mms\", return all strings within that distance\n",
" from \"st\". '''\n",
" ret = []\n",
" def __stringNeighborsHamming(st, mms, ii):\n",
" for i in xrange(ii, len(st)):\n",
" if mms > 0:\n",
" # Mismatch at position i\n",
" for a in alph:\n",
" if a != st[i]:\n",
" newst = st[:i] + a + st[i+1:]\n",
" __stringNeighborsHamming(newst, mms - 1, ii+1)\n",
" ret.append(st)\n",
" __stringNeighborsHamming(st, mms, 0)\n",
" return ret"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"stringNeighborsHamming('CAT', alph='ACGT', mms=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"['AAT', 'GAT', 'TAT', 'CCT', 'CGT', 'CTT', 'CAA', 'CAC', 'CAG', 'CAT']"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def cotraverseFmHamming(p, t, mms=1):\n",
" ''' Given a pattern p, a text t, and a maximum Hamming distance\n",
" mms, find all occurrences of p in t within the maximum Hamming\n",
" distance by conducting a co-traversal of p's neighborhood and\n",
" t's suffix trie. '''\n",
" ret = []\n",
" index = FmIndex(t[::-1]) # build FM index over reverse(t)\n",
" alph = index.first.keys() # get alphabet from index\n",
" alph.remove('$')\n",
" def __cotraverseFmHamming(p, mms, ii, l, r):\n",
" i = ii\n",
" while i < len(p) and r > l:\n",
" if mms > 0:\n",
" # Try all mismatches at position i\n",
" for a in alph:\n",
" if a != p[i]:\n",
" # Say l, r delimit the range of rows with S\n",
" # s a prefix. index.nextRange(l, r, a)\n",
" # returns range with a + S as a prefix.\n",
" newl, newr = index.nextRange(l, r, a)\n",
" newst = p[:i] + a + p[i+1:]\n",
" __cotraverseFmHamming(newst, mms-1, i+1, newl, newr)\n",
" newl, newr = index.nextRange(l, r, p[i])\n",
" l, r = newl, newr\n",
" i += 1\n",
" if i == len(p):\n",
" for row in xrange(l, r):\n",
" ret.append((p, index.slen - index.resolve(row) - len(p) - 1))\n",
" __cotraverseFmHamming(p, mms, 0, 0, index.slen)\n",
" return ret"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cotraverseFmHamming(\"CAA\", \"ACATAG\", 1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[('CAT', 1)]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cotraverseFmHamming('needle', 'needle_neddle_nedle', mms=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"[('neddle', 7), ('needle', 0)]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment