Created
November 12, 2013 20:35
-
-
Save BenLangmead/7438200 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from fm import FmIndex" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def stringNeighborsHamming(st, alph, mms=1):\n", | |
" ''' Given string \"st\", alphabet \"alph\", and a maximum Hamming\n", | |
" distance \"mms\", return all strings within that distance\n", | |
" from \"st\". '''\n", | |
" ret = []\n", | |
" def __stringNeighborsHamming(st, mms, ii):\n", | |
" for i in xrange(ii, len(st)):\n", | |
" if mms > 0:\n", | |
" # Mismatch at position i\n", | |
" for a in alph:\n", | |
" if a != st[i]:\n", | |
" newst = st[:i] + a + st[i+1:]\n", | |
" __stringNeighborsHamming(newst, mms - 1, ii+1)\n", | |
" ret.append(st)\n", | |
" __stringNeighborsHamming(st, mms, 0)\n", | |
" return ret" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"stringNeighborsHamming('CAT', alph='ACGT', mms=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"['AAT', 'GAT', 'TAT', 'CCT', 'CGT', 'CTT', 'CAA', 'CAC', 'CAG', 'CAT']" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def cotraverseFmHamming(p, t, mms=1):\n", | |
" ''' Given a pattern p, a text t, and a maximum Hamming distance\n", | |
" mms, find all occurrences of p in t within the maximum Hamming\n", | |
" distance by conducting a co-traversal of p's neighborhood and\n", | |
" t's suffix trie. '''\n", | |
" ret = []\n", | |
" index = FmIndex(t[::-1]) # build FM index over reverse(t)\n", | |
" alph = index.first.keys() # get alphabet from index\n", | |
" alph.remove('$')\n", | |
" def __cotraverseFmHamming(p, mms, ii, l, r):\n", | |
" i = ii\n", | |
" while i < len(p) and r > l:\n", | |
" if mms > 0:\n", | |
" # Try all mismatches at position i\n", | |
" for a in alph:\n", | |
" if a != p[i]:\n", | |
" # Say l, r delimit the range of rows with S\n", | |
" # s a prefix. index.nextRange(l, r, a)\n", | |
" # returns range with a + S as a prefix.\n", | |
" newl, newr = index.nextRange(l, r, a)\n", | |
" newst = p[:i] + a + p[i+1:]\n", | |
" __cotraverseFmHamming(newst, mms-1, i+1, newl, newr)\n", | |
" newl, newr = index.nextRange(l, r, p[i])\n", | |
" l, r = newl, newr\n", | |
" i += 1\n", | |
" if i == len(p):\n", | |
" for row in xrange(l, r):\n", | |
" ret.append((p, index.slen - index.resolve(row) - len(p) - 1))\n", | |
" __cotraverseFmHamming(p, mms, 0, 0, index.slen)\n", | |
" return ret" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"cotraverseFmHamming(\"CAA\", \"ACATAG\", 1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"[('CAT', 1)]" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"cotraverseFmHamming('needle', 'needle_neddle_nedle', mms=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"[('neddle', 7), ('needle', 0)]" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment