Last active
November 10, 2015 04:26
-
-
Save Carreau/b8ed0853ab93a1943319 to your computer and use it in GitHub Desktop.
Advance Python hacker within
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Playing With Python (python 3.5 of course)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"A bit of advance Python programming." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Disclamer : This notebook contain **a lot** of bad ideas." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Aparte" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Please try Xonsh, a shell that allow you to use python syntax on modules\n", | |
"\n", | |
"```\n", | |
"pip install xonsh\n", | |
"```\n", | |
"\n", | |
"```\n", | |
"$ [print('enjoy') for i in range(10)]\n", | |
"```\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Context manager" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The classical context manager you are used to:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n", | |
"2\n", | |
"3\n", | |
"4\n", | |
"hello\n", | |
"everyone\n" | |
] | |
} | |
], | |
"source": [ | |
"with open('myfile.txt') as f:\n", | |
" print(f.read())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": false | |
}, | |
"source": [ | |
"The context manager ensure that whatever happend when the file is open, the\n", | |
"file will be closed after." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"## How to write a context manager ?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"### simple method:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def fib(*,max=100):\n", | |
" yield 1\n", | |
" yield 1\n", | |
" previous, current = 1,1\n", | |
" while True:\n", | |
" previous, current = current, previous+current\n", | |
" yield current\n", | |
" if current > max:\n", | |
" raise StopIteration\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n", | |
"1\n", | |
"2\n", | |
"3\n", | |
"5\n", | |
"8\n", | |
"13\n", | |
"21\n", | |
"34\n", | |
"55\n", | |
"89\n", | |
"144\n" | |
] | |
} | |
], | |
"source": [ | |
"for i in fib():\n", | |
" print(i)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def g_running_mean(iterator):\n", | |
" rsum = 0\n", | |
" rnumber = 0\n", | |
" for data in iterator:\n", | |
" rsum += data\n", | |
" rnumber += 1\n", | |
" yield rsum/rnumber\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from itertools import islice\n", | |
"\n", | |
"def things(start):\n", | |
" value = start\n", | |
" while value != 1:\n", | |
" yield value\n", | |
" if value % 2 == 0:\n", | |
" value = value*3+1\n", | |
" else : \n", | |
" value = value // 2\n", | |
"\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def group_by_2(iterable):\n", | |
" iterator = iter(iterable)\n", | |
" while True:\n", | |
" yield (next(iterator), next(iterator))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from contextlib import contextmanager" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"@contextmanager\n", | |
"def mycontext(a):\n", | |
" print('entering mycontext')\n", | |
" try:\n", | |
" yield a\n", | |
" except Exception as e:\n", | |
" print('exiting mycontext')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"entering mycontext\n", | |
"I KNOW HOW TO WRITE A CONTEXT MANAGER !\n" | |
] | |
} | |
], | |
"source": [ | |
"with mycontext('i know how to write a context manager !') as value:\n", | |
" print(value.upper())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"entering mycontext\n", | |
"exiting mycontext\n" | |
] | |
} | |
], | |
"source": [ | |
"with mycontext(0) as v:\n", | |
" print(1/v)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Longer method (not more complicated)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"class MyContext(object):\n", | |
" \n", | |
" def __init__(self, value=None):\n", | |
" self.v = value\n", | |
" \n", | |
" def __enter__(self):\n", | |
" print('entering')\n", | |
" return self.v\n", | |
" \n", | |
" \n", | |
" def __exit__(self, exc_type, exc_value, traceback):\n", | |
" print('exit', exc_type, exc_value)\n", | |
" #return False/True\n", | |
" \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"entering\n", | |
"1.0\n", | |
"exit None None\n" | |
] | |
} | |
], | |
"source": [ | |
"with MyContext(1) as c:\n", | |
" print(1/c)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"entering\n", | |
"exit <class 'ZeroDivisionError'> division by zero\n" | |
] | |
}, | |
{ | |
"ename": "ZeroDivisionError", | |
"evalue": "division by zero", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-13-1546bd4d75bc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mMyContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" | |
] | |
} | |
], | |
"source": [ | |
"with MyContext(0) as c:\n", | |
" print(1/c)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The context manager may not receive any value, and is allowed to not be assigned:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"entering\n", | |
"still works\n", | |
"exit None None\n" | |
] | |
} | |
], | |
"source": [ | |
"c = MyContext()\n", | |
"\n", | |
"with c:\n", | |
" print('still works')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The exit part is still executed !" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Useful example\n", | |
"\n", | |
"Log everytime a file is open and for how long:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import datetime\n", | |
"import traceback\n", | |
"import io" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"class LogUsage(object):\n", | |
" \n", | |
" def __init__(self, filename, **options):\n", | |
" self.filename = filename \n", | |
" self.options = options\n", | |
" \n", | |
" def __enter__(self):\n", | |
" self._start = datetime.datetime.now()\n", | |
" \n", | |
" self._f = io.open(self.filename, **self.options)\n", | |
" self._logfile = io.open(self.filename+'.log', 'at')\n", | |
" self._logfile.writelines(['\\nstart processing ', self.filename, ' at ', str(self._start)])\n", | |
" return self._f\n", | |
" \n", | |
" \n", | |
" def __exit__(self, exc_type, exc_value, tb):\n", | |
" self._f.close()\n", | |
" if exc_type:\n", | |
" self._logfile.writelines(' something went wrong ! \\n')\n", | |
" self._logfile.writelines(traceback.format_exception(exc_type, exc_value, tb))\n", | |
" delta = (datetime.datetime.now() - self._start)\n", | |
" self._logfile.writelines(['\\nstop processing ', self.filename, ' after ', str(delta.seconds),'.',str(int(delta.microseconds/1e3)), ' seconds'])\n", | |
" self._logfile.close()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import sys\n", | |
"from IPython.core.ultratb import ColorTB\n", | |
"\n", | |
"def highlight_tb():\n", | |
" ColorTB('LightBG')(sys.exc_info())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n", | |
"2\n", | |
"3\n", | |
"4\n", | |
"hello\n", | |
"Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n", | |
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-20-b838b6927900>\"\u001b[0;36m, line \u001b[0;32m6\u001b[0;36m, in \u001b[0;35m<module>\u001b[0;36m\u001b[0m\n", | |
"\u001b[0;31m time.sleep(float(l)/10)\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m\u001b[0;31m:\u001b[0m could not convert string to float: 'hello\\n'\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"try:\n", | |
" import time \n", | |
" with LogUsage('myfile.txt') as f:\n", | |
" for l in f.readlines():\n", | |
" print(l, end='')\n", | |
" time.sleep(float(l)/10)\n", | |
"except Exception as e:\n", | |
" highlight_tb()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\r\n", | |
"start processing myfile.txt at 2015-10-26 17:00:50.855749\r\n", | |
"stop processing myfile.txt after 1.376 seconds\r\n", | |
"start processing myfile.txt at 2015-10-26 17:01:06.959869\r\n", | |
"stop processing myfile.txt after 1.376 seconds\r\n", | |
"start processing myfile.txt at 2015-10-26 17:02:43.057339\r\n", | |
"stop processing myfile.txt after 1.372 seconds\r\n", | |
"start processing myfile.txt at 2015-10-26 17:04:29.358947something went wrong !Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-11-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.17 seconds\r\n", | |
"start processing myfile.txt at 2015-10-28 11:53:08.991259something went wrong !Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-3-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.21 seconds\r\n", | |
"start processing myfile.txt at 2015-10-28 15:43:59.458339something went wrong !Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-30-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.17 seconds\r\n", | |
"start processing myfile.txt at 2015-10-28 15:44:54.151116 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-33-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.24 seconds\r\n", | |
"start processing myfile.txt at 2015-10-28 16:15:41.165153 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-16-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.9 seconds\r\n", | |
"start processing myfile.txt at 2015-10-28 16:54:21.322234 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-68-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.14 seconds\r\n", | |
"start processing myfile.txt at 2015-10-28 16:58:32.333745 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-18-9fc91c0489dd>\", line 5, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.15 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 13:47:48.396624\r\n", | |
"stop processing myfile.txt after 1.10 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 13:48:48.658500\r\n", | |
"stop processing myfile.txt after 1.21 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 13:50:26.635655\r\n", | |
"stop processing myfile.txt after 1.12 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 13:51:19.575909 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-21-b70ea1902583>\", line 6, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.16 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 13:51:55.779654 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-23-b838b6927900>\", line 6, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.17 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 14:04:09.134502 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-18-b838b6927900>\", line 6, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.16 seconds\r\n", | |
"start processing myfile.txt at 2015-10-29 14:04:21.209909 something went wrong ! \r\n", | |
"Traceback (most recent call last):\r\n", | |
" File \"<ipython-input-20-b838b6927900>\", line 6, in <module>\r\n", | |
" time.sleep(float(l)/10)\r\n", | |
"ValueError: could not convert string to float: 'hello\\n'\r\n", | |
"\r\n", | |
"stop processing myfile.txt after 1.13 seconds" | |
] | |
} | |
], | |
"source": [ | |
"cat myfile.txt.log" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Exercise: \n", | |
"\n", | |
"Write a context manager that write (& check and read) the checksum of a file, before giving it to you. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import hashlib" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"mymd5 = hashlib.md5(b'somehting').hexdigest()\n", | |
"\n", | |
"# Oops corrupted data...\n", | |
"hashlib.md5(b'something').hexdigest() == mymd5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#def/class open_md5():\n", | |
"# \n", | |
"# ..." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Homework" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"(dont' do this at home) Write a context manager that temporarly replace the builtin open with our above logger. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import builtins" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True\n" | |
] | |
} | |
], | |
"source": [ | |
"print(open == builtins.open)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"example on how to swith builtins, and why it is complicated to follow. \n", | |
"Try to guess what the folowign will print. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"hi\n", | |
"=====\n", | |
"<built-in function print>\n", | |
"<built-in function print>\n", | |
"<built-in function print>\n", | |
"=====\n", | |
"<built-in function print>\n" | |
] | |
} | |
], | |
"source": [ | |
"_saved_open = open\n", | |
"builtins.open = print\n", | |
"open('hi')\n", | |
"open('=====')\n", | |
"\n", | |
"open(open)\n", | |
"print(open)\n", | |
"print(print)\n", | |
"\n", | |
"open('=====')\n", | |
"\n", | |
"print(builtins.open)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"builtins.open = _saved_open" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import io\n", | |
"assert open == io.open" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"builtins.open == io.open" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Did you do it with the class of the `@contextmanager` ?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# A reminder about `__things__` metods" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`__things__` are called `dunder`s (I guess for double under), you might hear that in talks, and it's easier\n", | |
"and quicker to say than `underscore underscore init underscprffft underschrulpf`:\n", | |
"\n", | |
" - `__init__` -> dunder init\n", | |
" - `__enter__` -> dunder enter\n", | |
" - `__exit__` -> dunder enter\n", | |
" - `__...__` -> dunder ...\n", | |
" \n", | |
"One or a goup of these are often refered to as magic methods.\n", | |
"\n", | |
"> a sufficiently advanced technology is indistinguishable from magic \n", | |
"\n", | |
"Aka: you probably do not want to name your methods with a dunder (`__...__`),\n", | |
"or start it with a double underscore (`__...`)\n", | |
"\n", | |
"In case you do not remember the **convention** is to start private methods with a **single** underscore,\n", | |
"(and to use a **double underscore** if your want your code to break in unexpected ways)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Many Python object are **Things** because they implement a group of such magics methods with `__dunder__`.\n", | |
"\n", | |
"> TIP: on many website dunder are show in italic, use backticks\n", | |
"\n", | |
"\n", | |
"Dunder are used for operator overloading, and some builtins function, and also python **private** internals.\n", | |
"\n", | |
"Don't be afraid to break all the things !\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Examples:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"class Callable(object):\n", | |
" \n", | |
" def __init__(self, increment):\n", | |
" self.increment = increment\n", | |
" \n", | |
" def __call__(self, other):\n", | |
" return other + self.increment\n", | |
" \n", | |
" def __getattr__(self, string):\n", | |
" d = {'one':1,'two':2,'three':3}\n", | |
" v = d[string]\n", | |
" return self.increment + v\n", | |
" \n", | |
" \n", | |
"by3 = Callable(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"12" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"by3(9)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"13" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Callable(6)(7)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"7" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Callable(6).one" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"You can do bad things" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from SPQR import X,M" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"we are in 2015, and I'm 29, I though was born in...." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"MCMLXXXVI" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"M.M.X.V - X.X.I.X" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# really bad idea" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### More bad idea : can you have the above be **also** a contextmanager ?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### useless example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import webbrowser\n", | |
"class GH(object):\n", | |
" \n", | |
" def __matmul__(self, other):\n", | |
" webbrowser.open('https://github.com/{}'.format(other.name))\n", | |
" \n", | |
" \n", | |
"github = GH()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"class User(object):\n", | |
" \n", | |
" def __init__(self, name):\n", | |
" self.name = name\n", | |
" \n", | |
" def __matmul__(self, other):\n", | |
" return other.__matmul__(self)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"me = User('carreau')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"me @ github" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### iterator/iterable/generator " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"what type is range ?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"range" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"range" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[type, type, builtin_function_or_method]" | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(map(type, (range, GH, open)))" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"`range` is of type `type`, so `range` is a class" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n", | |
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-44-a906f0c290fe>\"\u001b[0;36m, line \u001b[0;32m2\u001b[0;36m, in \u001b[0;35m<module>\u001b[0;36m\u001b[0m\n", | |
"\u001b[0;31m class MyRange(range):\u001b[0m\n", | |
"\u001b[0;31mTypeError\u001b[0m\u001b[0;31m:\u001b[0m type 'range' is not an acceptable base type\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"try:\n", | |
" class MyRange(range):\n", | |
" pass\n", | |
"except Exception as e:\n", | |
" highlight_tb()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"But you cannot inherit from `range`." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So iterable define `__iter__` or `__getitem__`, you think that itarable can have lenght, but you can defien the lenght manually:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"class Meat(object):\n", | |
" \n", | |
" def __len__(self):\n", | |
" return 42\n", | |
" \n", | |
" def __getitem__(self, index):\n", | |
" if index > 50:\n", | |
" raise StopIteration\n", | |
" if index % 2:\n", | |
" return 'lamb'\n", | |
" else :\n", | |
" return 'porc'\n", | |
" \n", | |
" def __iter__(self):\n", | |
" def it():\n", | |
" yield 'banana'\n", | |
" yield 'apple'\n", | |
" yield 'fries'\n", | |
" yield 'burger'\n", | |
" yield 'brussels sprouts'\n", | |
" yield 'strawberries'\n", | |
" return it()\n", | |
" \n", | |
" \n", | |
"meat = Meat()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"42" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(meat)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n", | |
"porc\n", | |
"lamb\n" | |
] | |
} | |
], | |
"source": [ | |
"for i in range(len(meat)):\n", | |
" print(meat[i])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"banana\n", | |
"apple\n", | |
"fries\n", | |
"burger\n", | |
"brussels sprouts\n", | |
"strawberries\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in meat:\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"('banana', 'banana')\n", | |
"('apple', 'apple')\n", | |
"('fries', 'fries')\n", | |
"('burger', 'burger')\n", | |
"('brussels sprouts', 'brussels sprouts')\n", | |
"('strawberries', 'strawberries')\n" | |
] | |
} | |
], | |
"source": [ | |
"iter1 = iter(meat)\n", | |
"iter2 = iter(meat)\n", | |
"for m in zip(iter1, iter2):\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"see what happend if you iterate on the iterator instead of the iterable...." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"iter1 = iter(meat)\n", | |
"iter2 = iter1\n", | |
"z= zip(iter1, iter2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"('banana', 'apple')\n", | |
"('fries', 'burger')\n", | |
"('brussels sprouts', 'strawberries')\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in z:\n", | |
" print(m)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`iter2` advance to next when `iter1` advance, and vice versa. " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### bad idea : overwrite `__getitem__` to be the same as `__call__`\n", | |
"\n", | |
"Question : What can't you do ?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"class MyPrint(object):\n", | |
" \n", | |
" def __call__(self, args):\n", | |
" print(args)\n", | |
" \n", | |
"MyPrint.__getitem__ = MyPrint.__call__\n", | |
"\n", | |
"p = MyPrint()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"3\n" | |
] | |
} | |
], | |
"source": [ | |
"p(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(3, 7, 9)\n" | |
] | |
} | |
], | |
"source": [ | |
"p[3,7,9]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"hello\n" | |
] | |
} | |
], | |
"source": [ | |
"p['hello']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### WTF is yield ?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### What's with the alone `*`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"f = fib()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"call next on the **iterator** so advance correctly" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 57, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"next(f)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"next(fib())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Typical \"you should use a generator\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def function(data):\n", | |
" \n", | |
" myarray = []\n", | |
" for value in data:\n", | |
" myarray.append(compute(data))\n", | |
" \n", | |
" return myarray\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### example running mean:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"31.333333333333332\n", | |
"46.84615384615385\n", | |
"70.42857142857143\n", | |
"106.4\n", | |
"161.4375\n", | |
"245.88235294117646\n", | |
"375.77777777777777\n", | |
"576.0526315789474\n", | |
"885.5\n", | |
"1364.5714285714287\n" | |
] | |
} | |
], | |
"source": [ | |
"n=11\n", | |
"for v in islice(g_running_mean(fib(max=1e6)), n, n+10):\n", | |
" print(v)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[999.0,\n", | |
" 749.0,\n", | |
" 582.3333333333334,\n", | |
" 467.75,\n", | |
" 448.8,\n", | |
" 405.0,\n", | |
" 427.0,\n", | |
" 408.5,\n", | |
" 378.55555555555554,\n", | |
" 347.6,\n", | |
" 319.09090909090907,\n", | |
" 301.0833333333333,\n", | |
" 281.84615384615387,\n", | |
" 263.5,\n", | |
" 246.73333333333332,\n", | |
" 233.625,\n", | |
" 220.94117647058823,\n", | |
" 211.72222222222223,\n", | |
" 202.0,\n", | |
" 192.55,\n", | |
" 183.66666666666666,\n", | |
" 176.1818181818182,\n", | |
" 168.91304347826087,\n", | |
" 162.04166666666666,\n", | |
" 156.08,\n", | |
" 150.30769230769232,\n", | |
" 145.44444444444446,\n", | |
" 140.57142857142858,\n", | |
" 135.86206896551724,\n", | |
" 131.76666666666668,\n", | |
" 127.70967741935483,\n", | |
" 124.3125,\n", | |
" 120.81818181818181,\n", | |
" 117.38235294117646,\n", | |
" 114.4,\n", | |
" 111.38888888888889,\n", | |
" 108.89189189189189,\n", | |
" 106.26315789473684,\n", | |
" 103.64102564102564,\n", | |
" 101.375,\n", | |
" 99.04878048780488,\n", | |
" 97.14285714285714,\n", | |
" 95.09302325581395,\n", | |
" 93.02272727272727,\n", | |
" 91.24444444444444,\n", | |
" 89.3913043478261,\n", | |
" 87.8936170212766,\n", | |
" 86.25,\n", | |
" 84.57142857142857,\n", | |
" 83.14,\n", | |
" 81.62745098039215,\n", | |
" 80.42307692307692,\n", | |
" 79.0754716981132,\n", | |
" 77.68518518518519,\n", | |
" 76.50909090909092,\n", | |
" 75.25,\n", | |
" 74.26315789473684,\n", | |
" 73.13793103448276,\n", | |
" 71.96610169491525,\n", | |
" 70.98333333333333,\n", | |
" 69.91803278688525,\n", | |
" 69.09677419354838,\n", | |
" 68.14285714285714,\n", | |
" 67.140625,\n", | |
" 66.3076923076923,\n", | |
" 65.39393939393939,\n", | |
" 64.70149253731343,\n", | |
" 63.88235294117647,\n", | |
" 63.01449275362319,\n", | |
" 62.3,\n", | |
" 61.50704225352113,\n", | |
" 60.916666666666664,\n", | |
" 60.205479452054796,\n", | |
" 59.445945945945944,\n", | |
" 58.82666666666667,\n", | |
" 58.13157894736842,\n", | |
" 57.62337662337662,\n", | |
" 57.0,\n", | |
" 56.32911392405063,\n", | |
" 55.7875,\n", | |
" 55.17283950617284,\n", | |
" 54.73170731707317,\n", | |
" 54.18072289156626,\n", | |
" 53.583333333333336,\n", | |
" 53.10588235294118,\n", | |
" 52.55813953488372,\n", | |
" 52.172413793103445,\n", | |
" 51.68181818181818,\n", | |
" 51.146067415730336,\n", | |
" 50.72222222222222,\n", | |
" 50.23076923076923,\n", | |
" 49.891304347826086,\n", | |
" 49.45161290322581,\n", | |
" 48.96808510638298,\n", | |
" 48.589473684210525,\n", | |
" 48.145833333333336,\n", | |
" 47.845360824742265,\n", | |
" 47.44897959183673,\n", | |
" 47.01010101010101,\n", | |
" 46.67,\n", | |
" 46.26732673267327,\n", | |
" 46.0,\n", | |
" 45.640776699029125,\n", | |
" 45.24038461538461,\n", | |
" 44.93333333333333,\n", | |
" 44.56603773584906,\n", | |
" 44.32710280373832,\n", | |
" 44.0,\n", | |
" 43.63302752293578,\n", | |
" 43.35454545454545,\n", | |
" 43.01801801801802,\n", | |
" 42.80357142857143,\n", | |
" 42.50442477876106,\n", | |
" 42.166666666666664,\n", | |
" 41.91304347826087,\n", | |
" 41.60344827586207,\n", | |
" 41.41025641025641,\n", | |
" 41.13559322033898,\n", | |
" 40.8235294117647,\n", | |
" 40.59166666666667,\n", | |
" 40.30578512396694,\n", | |
" 40.131147540983605,\n", | |
" 39.8780487804878,\n", | |
" 39.58870967741935,\n", | |
" 39.376,\n", | |
" 39.111111111111114,\n", | |
" 38.95275590551181,\n", | |
" 38.71875,\n", | |
" 38.44961240310077,\n", | |
" 38.253846153846155,\n", | |
" 38.00763358778626,\n", | |
" 37.86363636363637,\n", | |
" 37.64661654135338,\n", | |
" 37.3955223880597,\n", | |
" 37.214814814814815,\n", | |
" 36.98529411764706,\n", | |
" 36.85401459854015,\n", | |
" 36.65217391304348,\n", | |
" 36.41726618705036,\n", | |
" 36.25,\n", | |
" 36.0354609929078,\n", | |
" 35.91549295774648,\n", | |
" 35.72727272727273,\n", | |
" 35.50694444444444,\n", | |
" 35.351724137931036,\n", | |
" 35.15068493150685,\n", | |
" 35.04081632653061,\n", | |
" 34.86486486486486,\n", | |
" 34.65771812080537,\n", | |
" 34.513333333333335,\n", | |
" 34.324503311258276,\n", | |
" 34.223684210526315,\n", | |
" 34.05882352941177,\n", | |
" 33.86363636363637,\n", | |
" 33.729032258064514,\n", | |
" 33.55128205128205,\n", | |
" 33.45859872611465,\n", | |
" 33.30379746835443,\n", | |
" 33.119496855345915,\n", | |
" 32.99375,\n", | |
" 32.82608695652174,\n", | |
" 32.74074074074074,\n", | |
" 32.59509202453988,\n", | |
" 32.420731707317074,\n", | |
" 32.303030303030305,\n", | |
" 32.144578313253014,\n", | |
" 32.06586826347306,\n", | |
" 31.928571428571427,\n", | |
" 31.763313609467456,\n", | |
" 31.652941176470588,\n", | |
" 31.502923976608187,\n", | |
" 31.430232558139537,\n", | |
" 31.30057803468208,\n", | |
" 31.14367816091954,\n", | |
" 31.04,\n", | |
" 30.897727272727273,\n", | |
" 30.83050847457627,\n", | |
" 30.707865168539325,\n", | |
" 30.558659217877096,\n", | |
" 30.461111111111112,\n", | |
" 30.325966850828728,\n", | |
" 30.263736263736263,\n", | |
" 30.147540983606557,\n", | |
" 30.005434782608695,\n", | |
" 29.913513513513514,\n", | |
" 29.78494623655914,\n", | |
" 29.727272727272727,\n", | |
" 29.617021276595743,\n", | |
" 29.48148148148148,\n", | |
" 29.394736842105264,\n", | |
" 29.272251308900522,\n", | |
" 29.21875,\n", | |
" 29.1139896373057,\n", | |
" 28.984536082474225,\n", | |
" 28.902564102564103,\n", | |
" 28.785714285714285,\n", | |
" 28.736040609137056,\n", | |
" 28.636363636363637,\n", | |
" 28.512562814070353,\n", | |
" 28.435,\n", | |
" 28.323383084577113,\n", | |
" 28.277227722772277,\n", | |
" 28.182266009852217,\n", | |
" 28.063725490196077,\n", | |
" 27.990243902439026,\n", | |
" 27.883495145631066,\n", | |
" 27.840579710144926,\n", | |
" 27.75,\n", | |
" 27.636363636363637,\n", | |
" 27.566666666666666,\n", | |
" 27.46445497630332,\n", | |
" 27.42452830188679,\n", | |
" 27.338028169014084,\n", | |
" 27.22897196261682,\n", | |
" 27.162790697674417,\n", | |
" 27.064814814814813,\n", | |
" 27.027649769585253,\n", | |
" 26.94495412844037,\n", | |
" 26.840182648401825,\n", | |
" 26.777272727272727,\n", | |
" 26.683257918552037,\n", | |
" 26.64864864864865,\n", | |
" 26.569506726457398,\n", | |
" 26.46875,\n", | |
" 26.40888888888889,\n", | |
" 26.31858407079646,\n", | |
" 26.2863436123348,\n", | |
" 26.210526315789473,\n", | |
" 26.11353711790393,\n", | |
" 26.056521739130435,\n", | |
" 25.96969696969697,\n", | |
" 25.939655172413794,\n", | |
" 25.86695278969957,\n", | |
" 25.773504273504273,\n", | |
" 25.719148936170214,\n", | |
" 25.635593220338983,\n", | |
" 25.60759493670886,\n", | |
" 25.537815126050422,\n", | |
" 25.447698744769873,\n", | |
" 25.395833333333332,\n", | |
" 25.315352697095435,\n", | |
" 25.289256198347108,\n", | |
" 25.22222222222222,\n", | |
" 25.135245901639344,\n", | |
" 25.085714285714285,\n", | |
" 25.008130081300813,\n", | |
" 24.983805668016196,\n", | |
" 24.919354838709676,\n", | |
" 24.835341365461847,\n", | |
" 24.788,\n", | |
" 24.713147410358566,\n", | |
" 24.69047619047619,\n", | |
" 24.628458498023715,\n", | |
" 24.54724409448819,\n", | |
" 24.501960784313727,\n", | |
" 24.4296875,\n", | |
" 24.408560311284045,\n", | |
" 24.348837209302324,\n", | |
" 24.27027027027027,\n", | |
" 24.226923076923075,\n", | |
" 24.157088122605366,\n", | |
" 24.137404580152673,\n", | |
" 24.079847908745247,\n", | |
" 24.00378787878788,\n", | |
" 23.962264150943398,\n", | |
" 23.894736842105264,\n", | |
" 23.876404494382022,\n", | |
" 23.82089552238806,\n", | |
" 23.74721189591078,\n", | |
" 23.70740740740741,\n", | |
" 23.642066420664207,\n", | |
" 23.625,\n", | |
" 23.571428571428573,\n", | |
" 23.5,\n", | |
" 23.46181818181818,\n", | |
" 23.39855072463768,\n", | |
" 23.382671480144403,\n", | |
" 23.33093525179856,\n", | |
" 23.261648745519715,\n", | |
" 23.225,\n", | |
" 23.16370106761566,\n", | |
" 23.148936170212767,\n", | |
" 23.098939929328623,\n", | |
" 23.031690140845072,\n", | |
" 22.996491228070177,\n", | |
" 22.937062937062937,\n", | |
" 22.923344947735192,\n", | |
" 22.875,\n", | |
" 22.80968858131488,\n", | |
" 22.775862068965516,\n", | |
" 22.718213058419245,\n", | |
" 22.705479452054796,\n", | |
" 22.658703071672356,\n", | |
" 22.595238095238095,\n", | |
" 22.56271186440678,\n", | |
" 22.506756756756758,\n", | |
" 22.494949494949495,\n", | |
" 22.449664429530202,\n", | |
" 22.387959866220736,\n", | |
" 22.356666666666666]" | |
] | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(islice(g_running_mean(things(999)), 300))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(0, 1)\n", | |
"(2, 3)\n", | |
"(4, 5)\n", | |
"(6, 7)\n", | |
"(8, 9)\n", | |
"(10, 11)\n", | |
"(12, 13)\n", | |
"(14, 15)\n" | |
] | |
} | |
], | |
"source": [ | |
"for x in group_by_2(range(17)):\n", | |
" print(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def ungroup_by_2(iterator):\n", | |
" for x,y in iterator:\n", | |
" yield y\n", | |
" yield x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[1, 0, 3, 2, 5, 4, 7, 6, 9, 8]" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(ungroup_by_2(group_by_2(range(10))))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def fizzbuzz(sequence):\n", | |
" for x in sequence:\n", | |
" if (x % 5 == 0) and (x % 7 == 0):\n", | |
" yield 'fizzbuzz'\n", | |
" elif x % 5 == 0 :\n", | |
" yield 'fizz'\n", | |
" elif x % 7 == 0 :\n", | |
" yield 'buzz'\n", | |
" else:\n", | |
" yield x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"fizzbuzz\n", | |
"1\n", | |
"2\n", | |
"3\n", | |
"4\n", | |
"fizz\n", | |
"6\n", | |
"buzz\n", | |
"8\n", | |
"9\n", | |
"fizz\n", | |
"11\n", | |
"12\n", | |
"13\n", | |
"buzz\n", | |
"fizz\n", | |
"16\n", | |
"17\n", | |
"18\n", | |
"19\n", | |
"fizz\n", | |
"buzz\n", | |
"22\n", | |
"23\n", | |
"24\n", | |
"fizz\n", | |
"26\n", | |
"27\n", | |
"buzz\n", | |
"29\n", | |
"fizz\n", | |
"31\n", | |
"32\n", | |
"33\n", | |
"34\n", | |
"fizzbuzz\n", | |
"36\n", | |
"37\n", | |
"38\n", | |
"39\n", | |
"fizz\n", | |
"41\n", | |
"buzz\n", | |
"43\n", | |
"44\n", | |
"fizz\n", | |
"46\n", | |
"47\n", | |
"48\n", | |
"buzz\n" | |
] | |
} | |
], | |
"source": [ | |
"for x in fizzbuzz(range(50)):\n", | |
" print(x)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## https://www.youtube.com/watch?v=cMo4fnCbSPc\n", | |
"\n", | |
"Big Data in Little Laptop: A Streaming Story in Python | EuroSciPy 2015 | Juan Nunez-Iglesias" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Yield from" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def example(a, b):\n", | |
" yield from a\n", | |
" yield from b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2, 3, 5, 8, 13, 21, 34]" | |
] | |
}, | |
"execution_count": 68, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(example(range(10), fib(max=30)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def example_yield(a, b):\n", | |
" yield a\n", | |
" yield b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[range(0, 10), <generator object fib at 0x1051e6e08>]" | |
] | |
}, | |
"execution_count": 70, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(example_yield(range(10), fib(max=30)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# switch to 3.4" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Play with `__code__`" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"In Python 3, it is relatively easy to swap the implmentation of two fucntion, or inspect what a function can do. \n", | |
"To do so we want to access wht `__code__` attribute of objects. " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Example:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def add(a, b):\n", | |
" return a+b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def sub(a, b): \n", | |
" return a-b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def closure(a,b):\n", | |
" return add(a,b)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"closure(2,1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def swap(a,b):\n", | |
" a.__code__, b.__code__ = b.__code__, a.__code__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"swap(add,sub)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"closure(2,1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"add(2,1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sub(2,1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`__code__` actually allow you to access the bytecode of the object, (which is an implementation detail of CPython)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import dis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 2 0 LOAD_FAST 0 (a)\n", | |
" 3 LOAD_FAST 1 (b)\n", | |
" 6 BINARY_SUBTRACT\n", | |
" 7 RETURN_VALUE\n" | |
] | |
} | |
], | |
"source": [ | |
"dis.dis(add.__code__)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ok, let's have a small look at somethogna bit higer level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def myadd(a, b, optional_behavior=False):\n", | |
" if optional_behavior is True:\n", | |
" print('hey')\n", | |
" return a+b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The slowest run took 11.33 times longer than the fastest. This could mean that an intermediate result is being cached \n", | |
"10000000 loops, best of 3: 121 ns per loop\n", | |
"The slowest run took 5.94 times longer than the fastest. This could mean that an intermediate result is being cached \n", | |
"10000000 loops, best of 3: 183 ns per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit add(258,259)\n", | |
"%timeit myadd(258,259)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 2 0 LOAD_FAST 0 (a)\n", | |
" 3 LOAD_FAST 1 (b)\n", | |
" 6 BINARY_SUBTRACT\n", | |
" 7 RETURN_VALUE\n" | |
] | |
} | |
], | |
"source": [ | |
"dis.dis(add)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 2 0 LOAD_FAST 2 (optional_behavior)\n", | |
" 3 LOAD_CONST 1 (True)\n", | |
" 6 COMPARE_OP 8 (is)\n", | |
" 9 POP_JUMP_IF_FALSE 25\n", | |
"\n", | |
" 3 12 LOAD_GLOBAL 0 (print)\n", | |
" 15 LOAD_CONST 2 ('hey')\n", | |
" 18 CALL_FUNCTION 1 (1 positional, 0 keyword pair)\n", | |
" 21 POP_TOP\n", | |
" 22 JUMP_FORWARD 0 (to 25)\n", | |
"\n", | |
" 4 >> 25 LOAD_FAST 0 (a)\n", | |
" 28 LOAD_FAST 1 (b)\n", | |
" 31 BINARY_ADD\n", | |
" 32 RETURN_VALUE\n" | |
] | |
} | |
], | |
"source": [ | |
"dis.dis(myadd)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Need some utility functions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def flatten(nseq):\n", | |
" if len(nseq) > 1:\n", | |
" x = [nseq[0]]\n", | |
" x.extend(flatten(nseq[1]))\n", | |
" return x\n", | |
" else:\n", | |
" return nseq\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"In the case where I have a nested sequence, this flattens it" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[1, 2, 3]" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"flatten([1,[2,[3]]])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Onto an actual problem" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"I have 2 sequences, I want to determine the **lenght** of the longuest common subsequence.\n", | |
"For example \"H**ac**k**e**r Withi**n**\" vs \"D**a**ta S**c**i**en**ce\" the common Subsequence is `a` `c` `e` `n` of lenght 4.\n", | |
"\n", | |
"Below is an implementation of an algorythme the calculate such a lenght." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import time \n", | |
"def diffs(seq1, seq2, debug=False, backtrack=True):\n", | |
" current = [0]*(len(seq2)+1)\n", | |
" prev = [0]*(len(seq2)+1)\n", | |
" cp = [[]]\n", | |
" if backtrack is True:\n", | |
" cc = [[(0,0,0)]]*(len(seq2)+1)\n", | |
" cp = [[(0,0,0)]]*(len(seq2)+1)\n", | |
" \n", | |
" if debug is True:\n", | |
" print(' '+', '.join([c for c in seq2]))\n", | |
" \n", | |
" for j,c1 in enumerate(seq1):\n", | |
" for i,c2 in enumerate(seq2):\n", | |
" cr = current[i]\n", | |
" pr = prev[i+1]\n", | |
" if (c1 == c2):\n", | |
" l = [prev[i]+1, cr, pr]\n", | |
" m = max(l)\n", | |
" current[i+1] = m\n", | |
" if backtrack is True:\n", | |
" ind = l.index(m)\n", | |
" cc[i+1] = [(j,i,m), cp[i]]\n", | |
" else:\n", | |
" current[i+1] = cr if cr > pr else pr\n", | |
" if backtrack in [True,'yes', 1]:\n", | |
" ind,m = (0,cr) if cr > pr else (1, pr)\n", | |
" if ind == 0:\n", | |
" cc[i+1] = cc[i]\n", | |
" else :\n", | |
" cc[i+1] = cp[i+1]\n", | |
" current, prev = prev, current\n", | |
" if backtrack is True:\n", | |
" cc, cp = cp, cc\n", | |
"\n", | |
" if debug:\n", | |
" print(c1, prev[1:])\n", | |
" return flatten(cp[-1]), prev[-1] \n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([(12, 9, 4), (4, 8, 3), (2, 6, 2), (1, 3, 1), (0, 0, 0)], 4)" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"diffs('Hacker Within', 'Data Science')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" D, a, t, a, , S, c, i, e, n, c, e\n", | |
"H [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"a [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n", | |
"c [0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]\n", | |
"k [0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]\n", | |
"e [0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3]\n", | |
"r [0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3]\n", | |
" [0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]\n", | |
"W [0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]\n", | |
"i [0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]\n", | |
"t [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]\n", | |
"h [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]\n", | |
"i [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]\n", | |
"n [0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4]\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"([(12, 9, 4), (4, 8, 3), (2, 6, 2), (1, 3, 1), (0, 0, 0)], 4)" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"diffs('Hacker Within', 'Data Science', debug=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Problem, backtracking is computationally expensive" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Solution make it optional !" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"N=50" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 loops, best of 3: 229 ms per loop\n", | |
"10 loops, best of 3: 140 ms per loop\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit diffs('Hacker Within'*N, 'Data Science'*N)\n", | |
"%timeit diffs('Hacker Within'*N, 'Data Science'*N, backtrack=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"OK, we shaved out ~40% by making backtracking optional, though I'm sure we can do better. \n", | |
"Also we have a tight loop, I'm sure just computing the if, take some time." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied (use --upgrade to upgrade): line-profiler in ./anaconda3/envs/py34/lib/python3.4/site-packages\r\n" | |
] | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"!{sys.executable} -m pip install line-profiler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%load_ext line_profiler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%lprun -f diffs diffs('Hi There, what is your name'*30, 'Hello Hacker within, How are you ?'*30, backtrack=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Oh, just one of the `if` statement can take up to 18% of the time our function take to run. " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### AST to the rescue" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"the AST (Abstract Sytax Tree) can help us to deal with that." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import inspect\n", | |
"import ast" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from ast import *\n", | |
"\n", | |
"def dump(node, annotate_fields=True, include_attributes=False, indent=' '):\n", | |
" \"\"\"\n", | |
" Return a formatted dump of the tree in *node*. This is mainly useful for\n", | |
" debugging purposes. The returned string will show the names and the values\n", | |
" for fields. This makes the code impossible to evaluate, so if evaluation is\n", | |
" wanted *annotate_fields* must be set to False. Attributes such as line\n", | |
" numbers and column offsets are not dumped by default. If this is wanted,\n", | |
" *include_attributes* can be set to True.\n", | |
" \"\"\"\n", | |
" def _format(node, level=0):\n", | |
" if isinstance(node, AST):\n", | |
" fields = [(a, _format(b, level)) for a, b in iter_fields(node)]\n", | |
" if include_attributes and node._attributes:\n", | |
" fields.extend([(a, _format(getattr(node, a), level))\n", | |
" for a in node._attributes])\n", | |
" return ''.join([\n", | |
" node.__class__.__name__,\n", | |
" '(',\n", | |
" ', '.join(('%s=%s' % field for field in fields)\n", | |
" if annotate_fields else\n", | |
" (b for a, b in fields)),\n", | |
" ')'])\n", | |
" elif isinstance(node, list):\n", | |
" lines = ['[']\n", | |
" lines.extend((indent * (level + 2) + _format(x, level + 2) + ','\n", | |
" for x in node))\n", | |
" if len(lines) > 1:\n", | |
" lines.append(indent * (level + 1) + ']')\n", | |
" else:\n", | |
" lines[-1] += ']'\n", | |
" return '\\n'.join(lines)\n", | |
" return repr(node)\n", | |
" \n", | |
" if not isinstance(node, AST):\n", | |
" raise TypeError('expected AST, got %r' % node.__class__.__name__)\n", | |
" return _format(node)\n", | |
"\n", | |
"def parseprint(code, filename=\"<string>\", mode=\"exec\", **kwargs):\n", | |
" \"\"\"Parse some code from a string and pretty-print it.\"\"\"\n", | |
" node = parse(code, mode=mode) # An ode to the code\n", | |
" print(dump(node, **kwargs))\n", | |
" \n", | |
"def pn(tree):\n", | |
" print(dump(tree))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"FunctionDef(name='diffs', args=arguments(args=[\n", | |
" arg(arg='seq1', annotation=None),\n", | |
" arg(arg='seq2', annotation=None),\n", | |
" arg(arg='debug', annotation=None),\n", | |
" arg(arg='backtrack', annotation=None),\n", | |
" ], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[\n", | |
" NameConstant(value=False),\n", | |
" NameConstant(value=True),\n", | |
" ]), body=[\n", | |
" Assign(targets=[\n", | |
" Name(id='current', ctx=Store()),\n", | |
" ], value=BinOp(left=List(elts=[\n", | |
" Num(n=0),\n", | |
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n", | |
" Name(id='seq2', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n", | |
" Assign(targets=[\n", | |
" Name(id='prev', ctx=Store()),\n", | |
" ], value=BinOp(left=List(elts=[\n", | |
" Num(n=0),\n", | |
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n", | |
" Name(id='seq2', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n", | |
" Assign(targets=[\n", | |
" Name(id='cp', ctx=Store()),\n", | |
" ], value=List(elts=[\n", | |
" List(elts=[], ctx=Load()),\n", | |
" ], ctx=Load())),\n", | |
" If(test=Compare(left=Name(id='backtrack', ctx=Load()), ops=[\n", | |
" Is(),\n", | |
" ], comparators=[\n", | |
" NameConstant(value=True),\n", | |
" ]), body=[\n", | |
" Assign(targets=[\n", | |
" Name(id='cc', ctx=Store()),\n", | |
" ], value=BinOp(left=List(elts=[\n", | |
" List(elts=[\n", | |
" Tuple(elts=[\n", | |
" Num(n=0),\n", | |
" Num(n=0),\n", | |
" Num(n=0),\n", | |
" ], ctx=Load()),\n", | |
" ], ctx=Load()),\n", | |
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n", | |
" Name(id='seq2', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n", | |
" Assign(targets=[\n", | |
" Name(id='cp', ctx=Store()),\n", | |
" ], value=BinOp(left=List(elts=[\n", | |
" List(elts=[\n", | |
" Tuple(elts=[\n", | |
" Num(n=0),\n", | |
" Num(n=0),\n", | |
" Num(n=0),\n", | |
" ], ctx=Load()),\n", | |
" ], ctx=Load()),\n", | |
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n", | |
" Name(id='seq2', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n", | |
" ], orelse=[]),\n", | |
" If(test=Compare(left=Name(id='debug', ctx=Load()), ops=[\n", | |
" Is(),\n", | |
" ], comparators=[\n", | |
" NameConstant(value=True),\n", | |
" ]), body=[\n", | |
" Expr(value=Call(func=Name(id='print', ctx=Load()), args=[\n", | |
" BinOp(left=Str(s=' '), op=Add(), right=Call(func=Attribute(value=Str(s=', '), attr='join', ctx=Load()), args=[\n", | |
" ListComp(elt=Name(id='c', ctx=Load()), generators=[\n", | |
" comprehension(target=Name(id='c', ctx=Store()), iter=Name(id='seq2', ctx=Load()), ifs=[]),\n", | |
" ]),\n", | |
" ], keywords=[], starargs=None, kwargs=None)),\n", | |
" ], keywords=[], starargs=None, kwargs=None)),\n", | |
" ], orelse=[]),\n", | |
" For(target=Tuple(elts=[\n", | |
" Name(id='j', ctx=Store()),\n", | |
" Name(id='c1', ctx=Store()),\n", | |
" ], ctx=Store()), iter=Call(func=Name(id='enumerate', ctx=Load()), args=[\n", | |
" Name(id='seq1', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None), body=[\n", | |
" For(target=Tuple(elts=[\n", | |
" Name(id='i', ctx=Store()),\n", | |
" Name(id='c2', ctx=Store()),\n", | |
" ], ctx=Store()), iter=Call(func=Name(id='enumerate', ctx=Load()), args=[\n", | |
" Name(id='seq2', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None), body=[\n", | |
" Assign(targets=[\n", | |
" Name(id='cr', ctx=Store()),\n", | |
" ], value=Subscript(value=Name(id='current', ctx=Load()), slice=Index(value=Name(id='i', ctx=Load())), ctx=Load())),\n", | |
" Assign(targets=[\n", | |
" Name(id='pr', ctx=Store()),\n", | |
" ], value=Subscript(value=Name(id='prev', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Load())),\n", | |
" If(test=Compare(left=Name(id='c1', ctx=Load()), ops=[\n", | |
" Eq(),\n", | |
" ], comparators=[\n", | |
" Name(id='c2', ctx=Load()),\n", | |
" ]), body=[\n", | |
" Assign(targets=[\n", | |
" Name(id='l', ctx=Store()),\n", | |
" ], value=List(elts=[\n", | |
" BinOp(left=Subscript(value=Name(id='prev', ctx=Load()), slice=Index(value=Name(id='i', ctx=Load())), ctx=Load()), op=Add(), right=Num(n=1)),\n", | |
" Name(id='cr', ctx=Load()),\n", | |
" Name(id='pr', ctx=Load()),\n", | |
" ], ctx=Load())),\n", | |
" Assign(targets=[\n", | |
" Name(id='m', ctx=Store()),\n", | |
" ], value=Call(func=Name(id='max', ctx=Load()), args=[\n", | |
" Name(id='l', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None)),\n", | |
" Assign(targets=[\n", | |
" Subscript(value=Name(id='current', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Store()),\n", | |
" ], value=Name(id='m', ctx=Load())),\n", | |
" If(test=Compare(left=Name(id='backtrack', ctx=Load()), ops=[\n", | |
" Is(),\n", | |
" ], comparators=[\n", | |
" NameConstant(value=True),\n", | |
" ]), body=[\n", | |
" Assign(targets=[\n", | |
" Name(id='ind', ctx=Store()),\n", | |
" ], value=Call(func=Attribute(value=Name(id='l', ctx=Load()), attr='index', ctx=Load()), args=[\n", | |
" Name(id='m', ctx=Load()),\n", | |
" ], keywords=[], starargs=None, kwargs=None)),\n", | |
" Assign(targets=[\n", | |
" Subscript(value=Name(id='cc', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Store()),\n", | |
" ], value=List(elts=[\n", | |
" Tuple(elts=[\n", | |
" Name(id='j', ctx=Load()),\n", | |
" Name(id='i', ctx=Load()),\n", | |
" Name(id='m', ctx=Load()),\n", | |
" ], ctx=Load()),\n", | |
" Subscript(value=Name(id='cp', ctx=Load()), slice=Index(value=Name(id='i', ctx=Load())), ctx=Load()),\n", | |
" ], ctx=Load())),\n", | |
" ], orelse=[]),\n", | |
" ], orelse=[\n", | |
" Assign(targets=[\n", | |
" Subscript(value=Name(id='current', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Store()),\n", | |
" ], value=IfExp(test=Compare(left=Name(id='cr', ctx=Load()), ops=[\n", | |
" Gt(),\n", | |
" ], comparators=[\n", | |
" Name(id='pr', ctx=Load()),\n", | |
" ]), body=Name(id='cr', ctx=Load()), orelse=Name(id='pr', ctx=Load()))),\n", | |
" If(test=Compare(left=Name(id='backtrack', ctx=Load()), ops=[\n", | |
" In(),\n", | |
" ], comparators=[\n", | |
" List(elts=[\n", | |
" NameConstant(value=True),\n", | |
" Str(s='yes'),\n", | |
" Num(n=1),\n", | |
" ], ctx=Load()),\n", | |
" ]), body=[\n", | |
" Assign(targets=[\n", | |
" Tuple(elts=[\n", | |