Skip to content

Instantly share code, notes, and snippets.

@d2207197
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d2207197/aac0083f7958c3b0979c to your computer and use it in GitHub Desktop.
Save d2207197/aac0083f7958c3b0979c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 236,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from functools import wraps, lru_cache\n",
"\n",
"def listify(func):\n",
" \"Decorator to convert generator output to a list\"\n",
" @wraps(func)\n",
" def listify(*args, **kw):\n",
" return list(func(*args, **kw))\n",
" return listify\n",
"\n",
"import sys\n",
"import time\n",
"from contextlib import redirect_stdout\n",
"def trace(slowdown = False):\n",
" def trace(func):\n",
" \"print function input and output\"\n",
" @wraps(func)\n",
" def trace(*args, indent = [0],**kw):\n",
" with redirect_stdout(sys.stderr):\n",
" print(' '*indent[0], end='')\n",
"\n",
" print('{}('.format(func.__name__), end='')\n",
" if args:\n",
" print(*map(repr, args), sep=', ', end='')\n",
" if kw:\n",
" print(',', *('{}={}'.format(k,repr(v)) for k, v in kw), sep=', ', end='')\n",
" print(')')\n",
" if slowdown: time.sleep(0.5)\n",
" indent[0] += 1\n",
" out = func(*args, **kw)\n",
" indent[0] -= 1\n",
" with redirect_stdout(sys.stderr):\n",
" print(' '*indent[0], end='')\n",
"\n",
" print('{}('.format(func.__name__), end='')\n",
" if args:\n",
" print(*map(repr, args), sep=', ', end='')\n",
" if kw:\n",
" print(',', *('{}={}'.format(k,repr(v)) for k, v in kw), sep=', ', end='')\n",
" print(') ->', out)\n",
" return out\n",
" return trace\n",
" return trace\n",
"\n",
"\n",
"@lru_cache()\n",
"# @trace\n",
"@listify\n",
"def allpartition(seq, *, max_length=3):\n",
" max_length = min(len(seq), max_length)\n",
" for length in range(max_length, 1 - 1, -1):\n",
" yield seq[0:length], seq[length:]"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('abc', 'd'), ('ab', 'cd'), ('a', 'bcd')]"
]
},
"execution_count": 232,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"allpartition('abcd')"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('ABCDEFGHIJKLMNOPQRSTUVWXYZ', ''),\n",
" ('ABCDEFGHIJKLMNOPQRSTUVWXY', 'Z'),\n",
" ('ABCDEFGHIJKLMNOPQRSTUVWX', 'YZ'),\n",
" ('ABCDEFGHIJKLMNOPQRSTUVW', 'XYZ'),\n",
" ('ABCDEFGHIJKLMNOPQRSTUV', 'WXYZ'),\n",
" ('ABCDEFGHIJKLMNOPQRSTU', 'VWXYZ'),\n",
" ('ABCDEFGHIJKLMNOPQRST', 'UVWXYZ'),\n",
" ('ABCDEFGHIJKLMNOPQRS', 'TUVWXYZ'),\n",
" ('ABCDEFGHIJKLMNOPQR', 'STUVWXYZ'),\n",
" ('ABCDEFGHIJKLMNOPQ', 'RSTUVWXYZ'),\n",
" ('ABCDEFGHIJKLMNOP', 'QRSTUVWXYZ'),\n",
" ('ABCDEFGHIJKLMNO', 'PQRSTUVWXYZ'),\n",
" ('ABCDEFGHIJKLMN', 'OPQRSTUVWXYZ'),\n",
" ('ABCDEFGHIJKLM', 'NOPQRSTUVWXYZ'),\n",
" ('ABCDEFGHIJKL', 'MNOPQRSTUVWXYZ'),\n",
" ('ABCDEFGHIJK', 'LMNOPQRSTUVWXYZ'),\n",
" ('ABCDEFGHIJ', 'KLMNOPQRSTUVWXYZ'),\n",
" ('ABCDEFGHI', 'JKLMNOPQRSTUVWXYZ'),\n",
" ('ABCDEFGH', 'IJKLMNOPQRSTUVWXYZ'),\n",
" ('ABCDEFG', 'HIJKLMNOPQRSTUVWXYZ'),\n",
" ('ABCDEF', 'GHIJKLMNOPQRSTUVWXYZ'),\n",
" ('ABCDE', 'FGHIJKLMNOPQRSTUVWXYZ'),\n",
" ('ABCD', 'EFGHIJKLMNOPQRSTUVWXYZ'),\n",
" ('ABC', 'DEFGHIJKLMNOPQRSTUVWXYZ'),\n",
" ('AB', 'CDEFGHIJKLMNOPQRSTUVWXYZ'),\n",
" ('A', 'BCDEFGHIJKLMNOPQRSTUVWXYZ')]"
]
},
"execution_count": 179,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from string import ascii_uppercase\n",
"allpartition(ascii_uppercase, max_length = 1000)"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from itertools import combinations, chain"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from collections import namedtuple\n",
"\n",
"class SeqScore(namedtuple('aa', ['seq', 'score'])):\n",
" def __add__(self, other):\n",
" return SeqScore(self.seq + ' ' + other.seq, self.score * other.score)\n",
"def tm(seq):\n",
" return SeqScore(seq, 1/(sum(map(ord, seq)) % 13 + 1))"
]
},
{
"cell_type": "code",
"execution_count": 233,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"@lru_cache()\n",
"@trace\n",
"def allsep(seq):\n",
" out = []\n",
" for part1, part2 in allpartition(seq):\n",
" seqscore1 = tm(part1)\n",
" if part2:\n",
" out.extend(seqscore1 + seqscore2 for seqscore2 in allsep(part2))\n",
" else:\n",
" out.append(seqscore1)\n",
" \n",
" return tuple(sorted(out, key = lambda x: -x.score)[:3])"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<function __main__.trace.<locals>.trace.<locals>.trace>"
]
},
"execution_count": 234,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"allsep('abc')"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 859 µs, sys: 0 ns, total: 859 µs\n",
"Wall time: 868 µs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"allsep('g') -> (SeqScore(seq='g', score=0.07692307692307693),)\n",
"allsep('fg') -> (SeqScore(seq='fg', score=0.09090909090909091), SeqScore(seq='f g', score=0.00641025641025641))\n",
"allsep('efg') -> (SeqScore(seq='efg', score=0.125), SeqScore(seq='ef g', score=0.008547008547008548), SeqScore(seq='e fg', score=0.008264462809917356))\n",
"allsep('cefg') -> (SeqScore(seq='cef g', score=0.019230769230769232), SeqScore(seq='ce fg', score=0.015151515151515152), SeqScore(seq='c efg', score=0.013888888888888888))\n",
"allsep('bcefg') -> (SeqScore(seq='bc efg', score=0.041666666666666664), SeqScore(seq='bce fg', score=0.006993006993006994), SeqScore(seq='bc ef g', score=0.002849002849002849))\n",
"allsep('abcefg') -> (SeqScore(seq='ab cef g', score=0.019230769230769232), SeqScore(seq='ab ce fg', score=0.015151515151515152), SeqScore(seq='abc efg', score=0.013888888888888888))\n"
]
},
{
"data": {
"text/plain": [
"(SeqScore(seq='ab cef g', score=0.019230769230769232),\n",
" SeqScore(seq='ab ce fg', score=0.015151515151515152),\n",
" SeqScore(seq='abc efg', score=0.013888888888888888))"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time allsep('abcefg')"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def allsep(seq):\n",
" out = []\n",
" for part1, part2 in allpartition(seq):\n",
" seqscore1 = tm(part1)\n",
" if part2:\n",
" out.extend(seqscore1 + seqscore2 for seqscore2 in allsep(part2))\n",
" else:\n",
" out.append(seqscore1)\n",
" \n",
" return tuple(sorted(out, key = lambda x: -x.score)[:3])"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 411 µs, sys: 0 ns, total: 411 µs\n",
"Wall time: 417 µs\n"
]
},
{
"data": {
"text/plain": [
"(SeqScore(seq='ab cef g', score=0.019230769230769232),\n",
" SeqScore(seq='ab ce fg', score=0.015151515151515152),\n",
" SeqScore(seq='abc efg', score=0.013888888888888888))"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time allsep('abcefg')"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://gist.github.com/aac0083f7958c3b0979c\r\n"
]
}
],
"source": [
"!gist --update aac0083f7958c3b0979c allsep.ipynb "
]
},
{
"cell_type": "code",
"execution_count": 237,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"allpartition(['1', '2', '3', '4'], 2)\n",
" allpartition(['2', '3', '4'], 2)\n",
" allpartition(['3', '4'], 2)\n",
" allpartition(['4'], 2)\n",
" allpartition(['4'], 2) -> [[['4']]]\n",
" allpartition(['3', '4'], 2) -> [[['3'], ['4']]]\n",
" allpartition(['2', '3', '4'], 2) -> [[['2'], ['3'], ['4']]]\n",
"allpartition(['1', '2', '3', '4'], 2) -> [[['1'], ['2'], ['3'], ['4']]]\n"
]
},
{
"data": {
"text/plain": [
"[[['1'], ['2'], ['3'], ['4']]]"
]
},
"execution_count": 237,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from itertools import chain\n",
"\n",
"@trace(slowdown= True)\n",
"@listify\n",
"def allpartition(sequence, k):\n",
" time.sleep(0.5)\n",
" if len(sequence) == 1:\n",
" yield [sequence]\n",
" else:\n",
" for i in range(1, k):\n",
" part1 = [sequence[:i]]\n",
" for part2 in allpartition(sequence[i:], k):\n",
" yield list(chain(part1, part2))\n",
"\n",
"a = list('1234')\n",
"allpartition(a, 2)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import time"
]
},
{
"cell_type": "code",
"execution_count": 202,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"time.sleep(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment