Skip to content

Instantly share code, notes, and snippets.

@Carreau
Last active November 10, 2015 04:26
Show Gist options
  • Save Carreau/b8ed0853ab93a1943319 to your computer and use it in GitHub Desktop.
Save Carreau/b8ed0853ab93a1943319 to your computer and use it in GitHub Desktop.
Advance Python hacker within
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Playing With Python (python 3.5 of course)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A bit of advance Python programming."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Disclamer : This notebook contain **a lot** of bad ideas."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Aparte"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Please try Xonsh, a shell that allow you to use python syntax on modules\n",
"\n",
"```\n",
"pip install xonsh\n",
"```\n",
"\n",
"```\n",
"$ [print('enjoy') for i in range(10)]\n",
"```\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Context manager"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The classical context manager you are used to:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"2\n",
"3\n",
"4\n",
"hello\n",
"everyone\n"
]
}
],
"source": [
"with open('myfile.txt') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"The context manager ensure that whatever happend when the file is open, the\n",
"file will be closed after."
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## How to write a context manager ?"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### simple method:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def fib(*,max=100):\n",
" yield 1\n",
" yield 1\n",
" previous, current = 1,1\n",
" while True:\n",
" previous, current = current, previous+current\n",
" yield current\n",
" if current > max:\n",
" raise StopIteration\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"1\n",
"2\n",
"3\n",
"5\n",
"8\n",
"13\n",
"21\n",
"34\n",
"55\n",
"89\n",
"144\n"
]
}
],
"source": [
"for i in fib():\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def g_running_mean(iterator):\n",
" rsum = 0\n",
" rnumber = 0\n",
" for data in iterator:\n",
" rsum += data\n",
" rnumber += 1\n",
" yield rsum/rnumber\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from itertools import islice\n",
"\n",
"def things(start):\n",
" value = start\n",
" while value != 1:\n",
" yield value\n",
" if value % 2 == 0:\n",
" value = value*3+1\n",
" else : \n",
" value = value // 2\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def group_by_2(iterable):\n",
" iterator = iter(iterable)\n",
" while True:\n",
" yield (next(iterator), next(iterator))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from contextlib import contextmanager"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"@contextmanager\n",
"def mycontext(a):\n",
" print('entering mycontext')\n",
" try:\n",
" yield a\n",
" except Exception as e:\n",
" print('exiting mycontext')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entering mycontext\n",
"I KNOW HOW TO WRITE A CONTEXT MANAGER !\n"
]
}
],
"source": [
"with mycontext('i know how to write a context manager !') as value:\n",
" print(value.upper())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entering mycontext\n",
"exiting mycontext\n"
]
}
],
"source": [
"with mycontext(0) as v:\n",
" print(1/v)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Longer method (not more complicated)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class MyContext(object):\n",
" \n",
" def __init__(self, value=None):\n",
" self.v = value\n",
" \n",
" def __enter__(self):\n",
" print('entering')\n",
" return self.v\n",
" \n",
" \n",
" def __exit__(self, exc_type, exc_value, traceback):\n",
" print('exit', exc_type, exc_value)\n",
" #return False/True\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entering\n",
"1.0\n",
"exit None None\n"
]
}
],
"source": [
"with MyContext(1) as c:\n",
" print(1/c)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entering\n",
"exit <class 'ZeroDivisionError'> division by zero\n"
]
},
{
"ename": "ZeroDivisionError",
"evalue": "division by zero",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-1546bd4d75bc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mMyContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mZeroDivisionError\u001b[0m: division by zero"
]
}
],
"source": [
"with MyContext(0) as c:\n",
" print(1/c)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The context manager may not receive any value, and is allowed to not be assigned:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"entering\n",
"still works\n",
"exit None None\n"
]
}
],
"source": [
"c = MyContext()\n",
"\n",
"with c:\n",
" print('still works')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The exit part is still executed !"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Useful example\n",
"\n",
"Log everytime a file is open and for how long:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import datetime\n",
"import traceback\n",
"import io"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class LogUsage(object):\n",
" \n",
" def __init__(self, filename, **options):\n",
" self.filename = filename \n",
" self.options = options\n",
" \n",
" def __enter__(self):\n",
" self._start = datetime.datetime.now()\n",
" \n",
" self._f = io.open(self.filename, **self.options)\n",
" self._logfile = io.open(self.filename+'.log', 'at')\n",
" self._logfile.writelines(['\\nstart processing ', self.filename, ' at ', str(self._start)])\n",
" return self._f\n",
" \n",
" \n",
" def __exit__(self, exc_type, exc_value, tb):\n",
" self._f.close()\n",
" if exc_type:\n",
" self._logfile.writelines(' something went wrong ! \\n')\n",
" self._logfile.writelines(traceback.format_exception(exc_type, exc_value, tb))\n",
" delta = (datetime.datetime.now() - self._start)\n",
" self._logfile.writelines(['\\nstop processing ', self.filename, ' after ', str(delta.seconds),'.',str(int(delta.microseconds/1e3)), ' seconds'])\n",
" self._logfile.close()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sys\n",
"from IPython.core.ultratb import ColorTB\n",
"\n",
"def highlight_tb():\n",
" ColorTB('LightBG')(sys.exc_info())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"2\n",
"3\n",
"4\n",
"hello\n",
"Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n",
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-20-b838b6927900>\"\u001b[0;36m, line \u001b[0;32m6\u001b[0;36m, in \u001b[0;35m<module>\u001b[0;36m\u001b[0m\n",
"\u001b[0;31m time.sleep(float(l)/10)\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m\u001b[0;31m:\u001b[0m could not convert string to float: 'hello\\n'\n",
"\n"
]
}
],
"source": [
"try:\n",
" import time \n",
" with LogUsage('myfile.txt') as f:\n",
" for l in f.readlines():\n",
" print(l, end='')\n",
" time.sleep(float(l)/10)\n",
"except Exception as e:\n",
" highlight_tb()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r\n",
"start processing myfile.txt at 2015-10-26 17:00:50.855749\r\n",
"stop processing myfile.txt after 1.376 seconds\r\n",
"start processing myfile.txt at 2015-10-26 17:01:06.959869\r\n",
"stop processing myfile.txt after 1.376 seconds\r\n",
"start processing myfile.txt at 2015-10-26 17:02:43.057339\r\n",
"stop processing myfile.txt after 1.372 seconds\r\n",
"start processing myfile.txt at 2015-10-26 17:04:29.358947something went wrong !Traceback (most recent call last):\r\n",
" File \"<ipython-input-11-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.17 seconds\r\n",
"start processing myfile.txt at 2015-10-28 11:53:08.991259something went wrong !Traceback (most recent call last):\r\n",
" File \"<ipython-input-3-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.21 seconds\r\n",
"start processing myfile.txt at 2015-10-28 15:43:59.458339something went wrong !Traceback (most recent call last):\r\n",
" File \"<ipython-input-30-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.17 seconds\r\n",
"start processing myfile.txt at 2015-10-28 15:44:54.151116 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-33-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.24 seconds\r\n",
"start processing myfile.txt at 2015-10-28 16:15:41.165153 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-16-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.9 seconds\r\n",
"start processing myfile.txt at 2015-10-28 16:54:21.322234 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-68-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.14 seconds\r\n",
"start processing myfile.txt at 2015-10-28 16:58:32.333745 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-18-9fc91c0489dd>\", line 5, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.15 seconds\r\n",
"start processing myfile.txt at 2015-10-29 13:47:48.396624\r\n",
"stop processing myfile.txt after 1.10 seconds\r\n",
"start processing myfile.txt at 2015-10-29 13:48:48.658500\r\n",
"stop processing myfile.txt after 1.21 seconds\r\n",
"start processing myfile.txt at 2015-10-29 13:50:26.635655\r\n",
"stop processing myfile.txt after 1.12 seconds\r\n",
"start processing myfile.txt at 2015-10-29 13:51:19.575909 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-21-b70ea1902583>\", line 6, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.16 seconds\r\n",
"start processing myfile.txt at 2015-10-29 13:51:55.779654 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-23-b838b6927900>\", line 6, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.17 seconds\r\n",
"start processing myfile.txt at 2015-10-29 14:04:09.134502 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-18-b838b6927900>\", line 6, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.16 seconds\r\n",
"start processing myfile.txt at 2015-10-29 14:04:21.209909 something went wrong ! \r\n",
"Traceback (most recent call last):\r\n",
" File \"<ipython-input-20-b838b6927900>\", line 6, in <module>\r\n",
" time.sleep(float(l)/10)\r\n",
"ValueError: could not convert string to float: 'hello\\n'\r\n",
"\r\n",
"stop processing myfile.txt after 1.13 seconds"
]
}
],
"source": [
"cat myfile.txt.log"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exercise: \n",
"\n",
"Write a context manager that write (& check and read) the checksum of a file, before giving it to you. "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import hashlib"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mymd5 = hashlib.md5(b'somehting').hexdigest()\n",
"\n",
"# Oops corrupted data...\n",
"hashlib.md5(b'something').hexdigest() == mymd5"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#def/class open_md5():\n",
"# \n",
"# ..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Homework"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"(dont' do this at home) Write a context manager that temporarly replace the builtin open with our above logger. "
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import builtins"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"print(open == builtins.open)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"example on how to swith builtins, and why it is complicated to follow. \n",
"Try to guess what the folowign will print. "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hi\n",
"=====\n",
"<built-in function print>\n",
"<built-in function print>\n",
"<built-in function print>\n",
"=====\n",
"<built-in function print>\n"
]
}
],
"source": [
"_saved_open = open\n",
"builtins.open = print\n",
"open('hi')\n",
"open('=====')\n",
"\n",
"open(open)\n",
"print(open)\n",
"print(print)\n",
"\n",
"open('=====')\n",
"\n",
"print(builtins.open)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"builtins.open = _saved_open"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import io\n",
"assert open == io.open"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"builtins.open == io.open"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Did you do it with the class of the `@contextmanager` ?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# A reminder about `__things__` metods"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`__things__` are called `dunder`s (I guess for double under), you might hear that in talks, and it's easier\n",
"and quicker to say than `underscore underscore init underscprffft underschrulpf`:\n",
"\n",
" - `__init__` -> dunder init\n",
" - `__enter__` -> dunder enter\n",
" - `__exit__` -> dunder enter\n",
" - `__...__` -> dunder ...\n",
" \n",
"One or a goup of these are often refered to as magic methods.\n",
"\n",
"> a sufficiently advanced technology is indistinguishable from magic \n",
"\n",
"Aka: you probably do not want to name your methods with a dunder (`__...__`),\n",
"or start it with a double underscore (`__...`)\n",
"\n",
"In case you do not remember the **convention** is to start private methods with a **single** underscore,\n",
"(and to use a **double underscore** if your want your code to break in unexpected ways)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Many Python object are **Things** because they implement a group of such magics methods with `__dunder__`.\n",
"\n",
"> TIP: on many website dunder are show in italic, use backticks\n",
"\n",
"\n",
"Dunder are used for operator overloading, and some builtins function, and also python **private** internals.\n",
"\n",
"Don't be afraid to break all the things !\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Examples:"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class Callable(object):\n",
" \n",
" def __init__(self, increment):\n",
" self.increment = increment\n",
" \n",
" def __call__(self, other):\n",
" return other + self.increment\n",
" \n",
" def __getattr__(self, string):\n",
" d = {'one':1,'two':2,'three':3}\n",
" v = d[string]\n",
" return self.increment + v\n",
" \n",
" \n",
"by3 = Callable(3)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"12"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"by3(9)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"13"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Callable(6)(7)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Callable(6).one"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can do bad things"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from SPQR import X,M"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"we are in 2015, and I'm 29, I though was born in...."
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"MCMLXXXVI"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"M.M.X.V - X.X.I.X"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# really bad idea"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### More bad idea : can you have the above be **also** a contextmanager ?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### useless example"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import webbrowser\n",
"class GH(object):\n",
" \n",
" def __matmul__(self, other):\n",
" webbrowser.open('https://github.com/{}'.format(other.name))\n",
" \n",
" \n",
"github = GH()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class User(object):\n",
" \n",
" def __init__(self, name):\n",
" self.name = name\n",
" \n",
" def __matmul__(self, other):\n",
" return other.__matmul__(self)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"me = User('carreau')"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"me @ github"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### iterator/iterable/generator "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"what type is range ?"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"range"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"range"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[type, type, builtin_function_or_method]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(map(type, (range, GH, open)))"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"`range` is of type `type`, so `range` is a class"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n",
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-44-a906f0c290fe>\"\u001b[0;36m, line \u001b[0;32m2\u001b[0;36m, in \u001b[0;35m<module>\u001b[0;36m\u001b[0m\n",
"\u001b[0;31m class MyRange(range):\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m\u001b[0;31m:\u001b[0m type 'range' is not an acceptable base type\n",
"\n"
]
}
],
"source": [
"try:\n",
" class MyRange(range):\n",
" pass\n",
"except Exception as e:\n",
" highlight_tb()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"But you cannot inherit from `range`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So iterable define `__iter__` or `__getitem__`, you think that itarable can have lenght, but you can defien the lenght manually:"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class Meat(object):\n",
" \n",
" def __len__(self):\n",
" return 42\n",
" \n",
" def __getitem__(self, index):\n",
" if index > 50:\n",
" raise StopIteration\n",
" if index % 2:\n",
" return 'lamb'\n",
" else :\n",
" return 'porc'\n",
" \n",
" def __iter__(self):\n",
" def it():\n",
" yield 'banana'\n",
" yield 'apple'\n",
" yield 'fries'\n",
" yield 'burger'\n",
" yield 'brussels sprouts'\n",
" yield 'strawberries'\n",
" return it()\n",
" \n",
" \n",
"meat = Meat()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"42"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(meat)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n",
"porc\n",
"lamb\n"
]
}
],
"source": [
"for i in range(len(meat)):\n",
" print(meat[i])"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"banana\n",
"apple\n",
"fries\n",
"burger\n",
"brussels sprouts\n",
"strawberries\n"
]
}
],
"source": [
"for m in meat:\n",
" print(m)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('banana', 'banana')\n",
"('apple', 'apple')\n",
"('fries', 'fries')\n",
"('burger', 'burger')\n",
"('brussels sprouts', 'brussels sprouts')\n",
"('strawberries', 'strawberries')\n"
]
}
],
"source": [
"iter1 = iter(meat)\n",
"iter2 = iter(meat)\n",
"for m in zip(iter1, iter2):\n",
" print(m)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"see what happend if you iterate on the iterator instead of the iterable...."
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"iter1 = iter(meat)\n",
"iter2 = iter1\n",
"z= zip(iter1, iter2)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('banana', 'apple')\n",
"('fries', 'burger')\n",
"('brussels sprouts', 'strawberries')\n"
]
}
],
"source": [
"for m in z:\n",
" print(m)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`iter2` advance to next when `iter1` advance, and vice versa. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### bad idea : overwrite `__getitem__` to be the same as `__call__`\n",
"\n",
"Question : What can't you do ?"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class MyPrint(object):\n",
" \n",
" def __call__(self, args):\n",
" print(args)\n",
" \n",
"MyPrint.__getitem__ = MyPrint.__call__\n",
"\n",
"p = MyPrint()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n"
]
}
],
"source": [
"p(3)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(3, 7, 9)\n"
]
}
],
"source": [
"p[3,7,9]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hello\n"
]
}
],
"source": [
"p['hello']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### WTF is yield ?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What's with the alone `*`"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"f = fib()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"call next on the **iterator** so advance correctly"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(f)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(fib())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Typical \"you should use a generator\""
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def function(data):\n",
" \n",
" myarray = []\n",
" for value in data:\n",
" myarray.append(compute(data))\n",
" \n",
" return myarray\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### example running mean:"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"31.333333333333332\n",
"46.84615384615385\n",
"70.42857142857143\n",
"106.4\n",
"161.4375\n",
"245.88235294117646\n",
"375.77777777777777\n",
"576.0526315789474\n",
"885.5\n",
"1364.5714285714287\n"
]
}
],
"source": [
"n=11\n",
"for v in islice(g_running_mean(fib(max=1e6)), n, n+10):\n",
" print(v)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"[999.0,\n",
" 749.0,\n",
" 582.3333333333334,\n",
" 467.75,\n",
" 448.8,\n",
" 405.0,\n",
" 427.0,\n",
" 408.5,\n",
" 378.55555555555554,\n",
" 347.6,\n",
" 319.09090909090907,\n",
" 301.0833333333333,\n",
" 281.84615384615387,\n",
" 263.5,\n",
" 246.73333333333332,\n",
" 233.625,\n",
" 220.94117647058823,\n",
" 211.72222222222223,\n",
" 202.0,\n",
" 192.55,\n",
" 183.66666666666666,\n",
" 176.1818181818182,\n",
" 168.91304347826087,\n",
" 162.04166666666666,\n",
" 156.08,\n",
" 150.30769230769232,\n",
" 145.44444444444446,\n",
" 140.57142857142858,\n",
" 135.86206896551724,\n",
" 131.76666666666668,\n",
" 127.70967741935483,\n",
" 124.3125,\n",
" 120.81818181818181,\n",
" 117.38235294117646,\n",
" 114.4,\n",
" 111.38888888888889,\n",
" 108.89189189189189,\n",
" 106.26315789473684,\n",
" 103.64102564102564,\n",
" 101.375,\n",
" 99.04878048780488,\n",
" 97.14285714285714,\n",
" 95.09302325581395,\n",
" 93.02272727272727,\n",
" 91.24444444444444,\n",
" 89.3913043478261,\n",
" 87.8936170212766,\n",
" 86.25,\n",
" 84.57142857142857,\n",
" 83.14,\n",
" 81.62745098039215,\n",
" 80.42307692307692,\n",
" 79.0754716981132,\n",
" 77.68518518518519,\n",
" 76.50909090909092,\n",
" 75.25,\n",
" 74.26315789473684,\n",
" 73.13793103448276,\n",
" 71.96610169491525,\n",
" 70.98333333333333,\n",
" 69.91803278688525,\n",
" 69.09677419354838,\n",
" 68.14285714285714,\n",
" 67.140625,\n",
" 66.3076923076923,\n",
" 65.39393939393939,\n",
" 64.70149253731343,\n",
" 63.88235294117647,\n",
" 63.01449275362319,\n",
" 62.3,\n",
" 61.50704225352113,\n",
" 60.916666666666664,\n",
" 60.205479452054796,\n",
" 59.445945945945944,\n",
" 58.82666666666667,\n",
" 58.13157894736842,\n",
" 57.62337662337662,\n",
" 57.0,\n",
" 56.32911392405063,\n",
" 55.7875,\n",
" 55.17283950617284,\n",
" 54.73170731707317,\n",
" 54.18072289156626,\n",
" 53.583333333333336,\n",
" 53.10588235294118,\n",
" 52.55813953488372,\n",
" 52.172413793103445,\n",
" 51.68181818181818,\n",
" 51.146067415730336,\n",
" 50.72222222222222,\n",
" 50.23076923076923,\n",
" 49.891304347826086,\n",
" 49.45161290322581,\n",
" 48.96808510638298,\n",
" 48.589473684210525,\n",
" 48.145833333333336,\n",
" 47.845360824742265,\n",
" 47.44897959183673,\n",
" 47.01010101010101,\n",
" 46.67,\n",
" 46.26732673267327,\n",
" 46.0,\n",
" 45.640776699029125,\n",
" 45.24038461538461,\n",
" 44.93333333333333,\n",
" 44.56603773584906,\n",
" 44.32710280373832,\n",
" 44.0,\n",
" 43.63302752293578,\n",
" 43.35454545454545,\n",
" 43.01801801801802,\n",
" 42.80357142857143,\n",
" 42.50442477876106,\n",
" 42.166666666666664,\n",
" 41.91304347826087,\n",
" 41.60344827586207,\n",
" 41.41025641025641,\n",
" 41.13559322033898,\n",
" 40.8235294117647,\n",
" 40.59166666666667,\n",
" 40.30578512396694,\n",
" 40.131147540983605,\n",
" 39.8780487804878,\n",
" 39.58870967741935,\n",
" 39.376,\n",
" 39.111111111111114,\n",
" 38.95275590551181,\n",
" 38.71875,\n",
" 38.44961240310077,\n",
" 38.253846153846155,\n",
" 38.00763358778626,\n",
" 37.86363636363637,\n",
" 37.64661654135338,\n",
" 37.3955223880597,\n",
" 37.214814814814815,\n",
" 36.98529411764706,\n",
" 36.85401459854015,\n",
" 36.65217391304348,\n",
" 36.41726618705036,\n",
" 36.25,\n",
" 36.0354609929078,\n",
" 35.91549295774648,\n",
" 35.72727272727273,\n",
" 35.50694444444444,\n",
" 35.351724137931036,\n",
" 35.15068493150685,\n",
" 35.04081632653061,\n",
" 34.86486486486486,\n",
" 34.65771812080537,\n",
" 34.513333333333335,\n",
" 34.324503311258276,\n",
" 34.223684210526315,\n",
" 34.05882352941177,\n",
" 33.86363636363637,\n",
" 33.729032258064514,\n",
" 33.55128205128205,\n",
" 33.45859872611465,\n",
" 33.30379746835443,\n",
" 33.119496855345915,\n",
" 32.99375,\n",
" 32.82608695652174,\n",
" 32.74074074074074,\n",
" 32.59509202453988,\n",
" 32.420731707317074,\n",
" 32.303030303030305,\n",
" 32.144578313253014,\n",
" 32.06586826347306,\n",
" 31.928571428571427,\n",
" 31.763313609467456,\n",
" 31.652941176470588,\n",
" 31.502923976608187,\n",
" 31.430232558139537,\n",
" 31.30057803468208,\n",
" 31.14367816091954,\n",
" 31.04,\n",
" 30.897727272727273,\n",
" 30.83050847457627,\n",
" 30.707865168539325,\n",
" 30.558659217877096,\n",
" 30.461111111111112,\n",
" 30.325966850828728,\n",
" 30.263736263736263,\n",
" 30.147540983606557,\n",
" 30.005434782608695,\n",
" 29.913513513513514,\n",
" 29.78494623655914,\n",
" 29.727272727272727,\n",
" 29.617021276595743,\n",
" 29.48148148148148,\n",
" 29.394736842105264,\n",
" 29.272251308900522,\n",
" 29.21875,\n",
" 29.1139896373057,\n",
" 28.984536082474225,\n",
" 28.902564102564103,\n",
" 28.785714285714285,\n",
" 28.736040609137056,\n",
" 28.636363636363637,\n",
" 28.512562814070353,\n",
" 28.435,\n",
" 28.323383084577113,\n",
" 28.277227722772277,\n",
" 28.182266009852217,\n",
" 28.063725490196077,\n",
" 27.990243902439026,\n",
" 27.883495145631066,\n",
" 27.840579710144926,\n",
" 27.75,\n",
" 27.636363636363637,\n",
" 27.566666666666666,\n",
" 27.46445497630332,\n",
" 27.42452830188679,\n",
" 27.338028169014084,\n",
" 27.22897196261682,\n",
" 27.162790697674417,\n",
" 27.064814814814813,\n",
" 27.027649769585253,\n",
" 26.94495412844037,\n",
" 26.840182648401825,\n",
" 26.777272727272727,\n",
" 26.683257918552037,\n",
" 26.64864864864865,\n",
" 26.569506726457398,\n",
" 26.46875,\n",
" 26.40888888888889,\n",
" 26.31858407079646,\n",
" 26.2863436123348,\n",
" 26.210526315789473,\n",
" 26.11353711790393,\n",
" 26.056521739130435,\n",
" 25.96969696969697,\n",
" 25.939655172413794,\n",
" 25.86695278969957,\n",
" 25.773504273504273,\n",
" 25.719148936170214,\n",
" 25.635593220338983,\n",
" 25.60759493670886,\n",
" 25.537815126050422,\n",
" 25.447698744769873,\n",
" 25.395833333333332,\n",
" 25.315352697095435,\n",
" 25.289256198347108,\n",
" 25.22222222222222,\n",
" 25.135245901639344,\n",
" 25.085714285714285,\n",
" 25.008130081300813,\n",
" 24.983805668016196,\n",
" 24.919354838709676,\n",
" 24.835341365461847,\n",
" 24.788,\n",
" 24.713147410358566,\n",
" 24.69047619047619,\n",
" 24.628458498023715,\n",
" 24.54724409448819,\n",
" 24.501960784313727,\n",
" 24.4296875,\n",
" 24.408560311284045,\n",
" 24.348837209302324,\n",
" 24.27027027027027,\n",
" 24.226923076923075,\n",
" 24.157088122605366,\n",
" 24.137404580152673,\n",
" 24.079847908745247,\n",
" 24.00378787878788,\n",
" 23.962264150943398,\n",
" 23.894736842105264,\n",
" 23.876404494382022,\n",
" 23.82089552238806,\n",
" 23.74721189591078,\n",
" 23.70740740740741,\n",
" 23.642066420664207,\n",
" 23.625,\n",
" 23.571428571428573,\n",
" 23.5,\n",
" 23.46181818181818,\n",
" 23.39855072463768,\n",
" 23.382671480144403,\n",
" 23.33093525179856,\n",
" 23.261648745519715,\n",
" 23.225,\n",
" 23.16370106761566,\n",
" 23.148936170212767,\n",
" 23.098939929328623,\n",
" 23.031690140845072,\n",
" 22.996491228070177,\n",
" 22.937062937062937,\n",
" 22.923344947735192,\n",
" 22.875,\n",
" 22.80968858131488,\n",
" 22.775862068965516,\n",
" 22.718213058419245,\n",
" 22.705479452054796,\n",
" 22.658703071672356,\n",
" 22.595238095238095,\n",
" 22.56271186440678,\n",
" 22.506756756756758,\n",
" 22.494949494949495,\n",
" 22.449664429530202,\n",
" 22.387959866220736,\n",
" 22.356666666666666]"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(islice(g_running_mean(things(999)), 300))"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(0, 1)\n",
"(2, 3)\n",
"(4, 5)\n",
"(6, 7)\n",
"(8, 9)\n",
"(10, 11)\n",
"(12, 13)\n",
"(14, 15)\n"
]
}
],
"source": [
"for x in group_by_2(range(17)):\n",
" print(x)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def ungroup_by_2(iterator):\n",
" for x,y in iterator:\n",
" yield y\n",
" yield x"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[1, 0, 3, 2, 5, 4, 7, 6, 9, 8]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(ungroup_by_2(group_by_2(range(10))))"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def fizzbuzz(sequence):\n",
" for x in sequence:\n",
" if (x % 5 == 0) and (x % 7 == 0):\n",
" yield 'fizzbuzz'\n",
" elif x % 5 == 0 :\n",
" yield 'fizz'\n",
" elif x % 7 == 0 :\n",
" yield 'buzz'\n",
" else:\n",
" yield x"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fizzbuzz\n",
"1\n",
"2\n",
"3\n",
"4\n",
"fizz\n",
"6\n",
"buzz\n",
"8\n",
"9\n",
"fizz\n",
"11\n",
"12\n",
"13\n",
"buzz\n",
"fizz\n",
"16\n",
"17\n",
"18\n",
"19\n",
"fizz\n",
"buzz\n",
"22\n",
"23\n",
"24\n",
"fizz\n",
"26\n",
"27\n",
"buzz\n",
"29\n",
"fizz\n",
"31\n",
"32\n",
"33\n",
"34\n",
"fizzbuzz\n",
"36\n",
"37\n",
"38\n",
"39\n",
"fizz\n",
"41\n",
"buzz\n",
"43\n",
"44\n",
"fizz\n",
"46\n",
"47\n",
"48\n",
"buzz\n"
]
}
],
"source": [
"for x in fizzbuzz(range(50)):\n",
" print(x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## https://www.youtube.com/watch?v=cMo4fnCbSPc\n",
"\n",
"Big Data in Little Laptop: A Streaming Story in Python | EuroSciPy 2015 | Juan Nunez-Iglesias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Yield from"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def example(a, b):\n",
" yield from a\n",
" yield from b"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2, 3, 5, 8, 13, 21, 34]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(example(range(10), fib(max=30)))"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def example_yield(a, b):\n",
" yield a\n",
" yield b"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[range(0, 10), <generator object fib at 0x1051e6e08>]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(example_yield(range(10), fib(max=30)))"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# switch to 3.4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Play with `__code__`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In Python 3, it is relatively easy to swap the implmentation of two fucntion, or inspect what a function can do. \n",
"To do so we want to access wht `__code__` attribute of objects. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Example:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def add(a, b):\n",
" return a+b"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def sub(a, b): \n",
" return a-b"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def closure(a,b):\n",
" return add(a,b)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"closure(2,1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def swap(a,b):\n",
" a.__code__, b.__code__ = b.__code__, a.__code__"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"swap(add,sub)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"closure(2,1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add(2,1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sub(2,1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`__code__` actually allow you to access the bytecode of the object, (which is an implementation detail of CPython)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import dis"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2 0 LOAD_FAST 0 (a)\n",
" 3 LOAD_FAST 1 (b)\n",
" 6 BINARY_SUBTRACT\n",
" 7 RETURN_VALUE\n"
]
}
],
"source": [
"dis.dis(add.__code__)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ok, let's have a small look at somethogna bit higer level"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def myadd(a, b, optional_behavior=False):\n",
" if optional_behavior is True:\n",
" print('hey')\n",
" return a+b"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The slowest run took 11.33 times longer than the fastest. This could mean that an intermediate result is being cached \n",
"10000000 loops, best of 3: 121 ns per loop\n",
"The slowest run took 5.94 times longer than the fastest. This could mean that an intermediate result is being cached \n",
"10000000 loops, best of 3: 183 ns per loop\n"
]
}
],
"source": [
"%timeit add(258,259)\n",
"%timeit myadd(258,259)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2 0 LOAD_FAST 0 (a)\n",
" 3 LOAD_FAST 1 (b)\n",
" 6 BINARY_SUBTRACT\n",
" 7 RETURN_VALUE\n"
]
}
],
"source": [
"dis.dis(add)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2 0 LOAD_FAST 2 (optional_behavior)\n",
" 3 LOAD_CONST 1 (True)\n",
" 6 COMPARE_OP 8 (is)\n",
" 9 POP_JUMP_IF_FALSE 25\n",
"\n",
" 3 12 LOAD_GLOBAL 0 (print)\n",
" 15 LOAD_CONST 2 ('hey')\n",
" 18 CALL_FUNCTION 1 (1 positional, 0 keyword pair)\n",
" 21 POP_TOP\n",
" 22 JUMP_FORWARD 0 (to 25)\n",
"\n",
" 4 >> 25 LOAD_FAST 0 (a)\n",
" 28 LOAD_FAST 1 (b)\n",
" 31 BINARY_ADD\n",
" 32 RETURN_VALUE\n"
]
}
],
"source": [
"dis.dis(myadd)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Need some utility functions"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def flatten(nseq):\n",
" if len(nseq) > 1:\n",
" x = [nseq[0]]\n",
" x.extend(flatten(nseq[1]))\n",
" return x\n",
" else:\n",
" return nseq\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the case where I have a nested sequence, this flattens it"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[1, 2, 3]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flatten([1,[2,[3]]])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Onto an actual problem"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I have 2 sequences, I want to determine the **lenght** of the longuest common subsequence.\n",
"For example \"H**ac**k**e**r Withi**n**\" vs \"D**a**ta S**c**i**en**ce\" the common Subsequence is `a` `c` `e` `n` of lenght 4.\n",
"\n",
"Below is an implementation of an algorythme the calculate such a lenght."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import time \n",
"def diffs(seq1, seq2, debug=False, backtrack=True):\n",
" current = [0]*(len(seq2)+1)\n",
" prev = [0]*(len(seq2)+1)\n",
" cp = [[]]\n",
" if backtrack is True:\n",
" cc = [[(0,0,0)]]*(len(seq2)+1)\n",
" cp = [[(0,0,0)]]*(len(seq2)+1)\n",
" \n",
" if debug is True:\n",
" print(' '+', '.join([c for c in seq2]))\n",
" \n",
" for j,c1 in enumerate(seq1):\n",
" for i,c2 in enumerate(seq2):\n",
" cr = current[i]\n",
" pr = prev[i+1]\n",
" if (c1 == c2):\n",
" l = [prev[i]+1, cr, pr]\n",
" m = max(l)\n",
" current[i+1] = m\n",
" if backtrack is True:\n",
" ind = l.index(m)\n",
" cc[i+1] = [(j,i,m), cp[i]]\n",
" else:\n",
" current[i+1] = cr if cr > pr else pr\n",
" if backtrack in [True,'yes', 1]:\n",
" ind,m = (0,cr) if cr > pr else (1, pr)\n",
" if ind == 0:\n",
" cc[i+1] = cc[i]\n",
" else :\n",
" cc[i+1] = cp[i+1]\n",
" current, prev = prev, current\n",
" if backtrack is True:\n",
" cc, cp = cp, cc\n",
"\n",
" if debug:\n",
" print(c1, prev[1:])\n",
" return flatten(cp[-1]), prev[-1] \n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"([(12, 9, 4), (4, 8, 3), (2, 6, 2), (1, 3, 1), (0, 0, 0)], 4)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diffs('Hacker Within', 'Data Science')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" D, a, t, a, , S, c, i, e, n, c, e\n",
"H [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"a [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n",
"c [0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]\n",
"k [0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2]\n",
"e [0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3]\n",
"r [0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3]\n",
" [0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]\n",
"W [0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]\n",
"i [0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]\n",
"t [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]\n",
"h [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]\n",
"i [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]\n",
"n [0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4]\n"
]
},
{
"data": {
"text/plain": [
"([(12, 9, 4), (4, 8, 3), (2, 6, 2), (1, 3, 1), (0, 0, 0)], 4)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diffs('Hacker Within', 'Data Science', debug=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Problem, backtracking is computationally expensive"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Solution make it optional !"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"N=50"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loops, best of 3: 229 ms per loop\n",
"10 loops, best of 3: 140 ms per loop\n"
]
}
],
"source": [
"%timeit diffs('Hacker Within'*N, 'Data Science'*N)\n",
"%timeit diffs('Hacker Within'*N, 'Data Science'*N, backtrack=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"OK, we shaved out ~40% by making backtracking optional, though I'm sure we can do better. \n",
"Also we have a tight loop, I'm sure just computing the if, take some time."
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied (use --upgrade to upgrade): line-profiler in ./anaconda3/envs/py34/lib/python3.4/site-packages\r\n"
]
}
],
"source": [
"import sys\n",
"!{sys.executable} -m pip install line-profiler"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%load_ext line_profiler"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%lprun -f diffs diffs('Hi There, what is your name'*30, 'Hello Hacker within, How are you ?'*30, backtrack=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Oh, just one of the `if` statement can take up to 18% of the time our function take to run. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### AST to the rescue"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"the AST (Abstract Sytax Tree) can help us to deal with that."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import inspect\n",
"import ast"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from ast import *\n",
"\n",
"def dump(node, annotate_fields=True, include_attributes=False, indent=' '):\n",
" \"\"\"\n",
" Return a formatted dump of the tree in *node*. This is mainly useful for\n",
" debugging purposes. The returned string will show the names and the values\n",
" for fields. This makes the code impossible to evaluate, so if evaluation is\n",
" wanted *annotate_fields* must be set to False. Attributes such as line\n",
" numbers and column offsets are not dumped by default. If this is wanted,\n",
" *include_attributes* can be set to True.\n",
" \"\"\"\n",
" def _format(node, level=0):\n",
" if isinstance(node, AST):\n",
" fields = [(a, _format(b, level)) for a, b in iter_fields(node)]\n",
" if include_attributes and node._attributes:\n",
" fields.extend([(a, _format(getattr(node, a), level))\n",
" for a in node._attributes])\n",
" return ''.join([\n",
" node.__class__.__name__,\n",
" '(',\n",
" ', '.join(('%s=%s' % field for field in fields)\n",
" if annotate_fields else\n",
" (b for a, b in fields)),\n",
" ')'])\n",
" elif isinstance(node, list):\n",
" lines = ['[']\n",
" lines.extend((indent * (level + 2) + _format(x, level + 2) + ','\n",
" for x in node))\n",
" if len(lines) > 1:\n",
" lines.append(indent * (level + 1) + ']')\n",
" else:\n",
" lines[-1] += ']'\n",
" return '\\n'.join(lines)\n",
" return repr(node)\n",
" \n",
" if not isinstance(node, AST):\n",
" raise TypeError('expected AST, got %r' % node.__class__.__name__)\n",
" return _format(node)\n",
"\n",
"def parseprint(code, filename=\"<string>\", mode=\"exec\", **kwargs):\n",
" \"\"\"Parse some code from a string and pretty-print it.\"\"\"\n",
" node = parse(code, mode=mode) # An ode to the code\n",
" print(dump(node, **kwargs))\n",
" \n",
"def pn(tree):\n",
" print(dump(tree))\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FunctionDef(name='diffs', args=arguments(args=[\n",
" arg(arg='seq1', annotation=None),\n",
" arg(arg='seq2', annotation=None),\n",
" arg(arg='debug', annotation=None),\n",
" arg(arg='backtrack', annotation=None),\n",
" ], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[\n",
" NameConstant(value=False),\n",
" NameConstant(value=True),\n",
" ]), body=[\n",
" Assign(targets=[\n",
" Name(id='current', ctx=Store()),\n",
" ], value=BinOp(left=List(elts=[\n",
" Num(n=0),\n",
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n",
" Name(id='seq2', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n",
" Assign(targets=[\n",
" Name(id='prev', ctx=Store()),\n",
" ], value=BinOp(left=List(elts=[\n",
" Num(n=0),\n",
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n",
" Name(id='seq2', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n",
" Assign(targets=[\n",
" Name(id='cp', ctx=Store()),\n",
" ], value=List(elts=[\n",
" List(elts=[], ctx=Load()),\n",
" ], ctx=Load())),\n",
" If(test=Compare(left=Name(id='backtrack', ctx=Load()), ops=[\n",
" Is(),\n",
" ], comparators=[\n",
" NameConstant(value=True),\n",
" ]), body=[\n",
" Assign(targets=[\n",
" Name(id='cc', ctx=Store()),\n",
" ], value=BinOp(left=List(elts=[\n",
" List(elts=[\n",
" Tuple(elts=[\n",
" Num(n=0),\n",
" Num(n=0),\n",
" Num(n=0),\n",
" ], ctx=Load()),\n",
" ], ctx=Load()),\n",
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n",
" Name(id='seq2', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n",
" Assign(targets=[\n",
" Name(id='cp', ctx=Store()),\n",
" ], value=BinOp(left=List(elts=[\n",
" List(elts=[\n",
" Tuple(elts=[\n",
" Num(n=0),\n",
" Num(n=0),\n",
" Num(n=0),\n",
" ], ctx=Load()),\n",
" ], ctx=Load()),\n",
" ], ctx=Load()), op=Mult(), right=BinOp(left=Call(func=Name(id='len', ctx=Load()), args=[\n",
" Name(id='seq2', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None), op=Add(), right=Num(n=1)))),\n",
" ], orelse=[]),\n",
" If(test=Compare(left=Name(id='debug', ctx=Load()), ops=[\n",
" Is(),\n",
" ], comparators=[\n",
" NameConstant(value=True),\n",
" ]), body=[\n",
" Expr(value=Call(func=Name(id='print', ctx=Load()), args=[\n",
" BinOp(left=Str(s=' '), op=Add(), right=Call(func=Attribute(value=Str(s=', '), attr='join', ctx=Load()), args=[\n",
" ListComp(elt=Name(id='c', ctx=Load()), generators=[\n",
" comprehension(target=Name(id='c', ctx=Store()), iter=Name(id='seq2', ctx=Load()), ifs=[]),\n",
" ]),\n",
" ], keywords=[], starargs=None, kwargs=None)),\n",
" ], keywords=[], starargs=None, kwargs=None)),\n",
" ], orelse=[]),\n",
" For(target=Tuple(elts=[\n",
" Name(id='j', ctx=Store()),\n",
" Name(id='c1', ctx=Store()),\n",
" ], ctx=Store()), iter=Call(func=Name(id='enumerate', ctx=Load()), args=[\n",
" Name(id='seq1', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None), body=[\n",
" For(target=Tuple(elts=[\n",
" Name(id='i', ctx=Store()),\n",
" Name(id='c2', ctx=Store()),\n",
" ], ctx=Store()), iter=Call(func=Name(id='enumerate', ctx=Load()), args=[\n",
" Name(id='seq2', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None), body=[\n",
" Assign(targets=[\n",
" Name(id='cr', ctx=Store()),\n",
" ], value=Subscript(value=Name(id='current', ctx=Load()), slice=Index(value=Name(id='i', ctx=Load())), ctx=Load())),\n",
" Assign(targets=[\n",
" Name(id='pr', ctx=Store()),\n",
" ], value=Subscript(value=Name(id='prev', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Load())),\n",
" If(test=Compare(left=Name(id='c1', ctx=Load()), ops=[\n",
" Eq(),\n",
" ], comparators=[\n",
" Name(id='c2', ctx=Load()),\n",
" ]), body=[\n",
" Assign(targets=[\n",
" Name(id='l', ctx=Store()),\n",
" ], value=List(elts=[\n",
" BinOp(left=Subscript(value=Name(id='prev', ctx=Load()), slice=Index(value=Name(id='i', ctx=Load())), ctx=Load()), op=Add(), right=Num(n=1)),\n",
" Name(id='cr', ctx=Load()),\n",
" Name(id='pr', ctx=Load()),\n",
" ], ctx=Load())),\n",
" Assign(targets=[\n",
" Name(id='m', ctx=Store()),\n",
" ], value=Call(func=Name(id='max', ctx=Load()), args=[\n",
" Name(id='l', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None)),\n",
" Assign(targets=[\n",
" Subscript(value=Name(id='current', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Store()),\n",
" ], value=Name(id='m', ctx=Load())),\n",
" If(test=Compare(left=Name(id='backtrack', ctx=Load()), ops=[\n",
" Is(),\n",
" ], comparators=[\n",
" NameConstant(value=True),\n",
" ]), body=[\n",
" Assign(targets=[\n",
" Name(id='ind', ctx=Store()),\n",
" ], value=Call(func=Attribute(value=Name(id='l', ctx=Load()), attr='index', ctx=Load()), args=[\n",
" Name(id='m', ctx=Load()),\n",
" ], keywords=[], starargs=None, kwargs=None)),\n",
" Assign(targets=[\n",
" Subscript(value=Name(id='cc', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Store()),\n",
" ], value=List(elts=[\n",
" Tuple(elts=[\n",
" Name(id='j', ctx=Load()),\n",
" Name(id='i', ctx=Load()),\n",
" Name(id='m', ctx=Load()),\n",
" ], ctx=Load()),\n",
" Subscript(value=Name(id='cp', ctx=Load()), slice=Index(value=Name(id='i', ctx=Load())), ctx=Load()),\n",
" ], ctx=Load())),\n",
" ], orelse=[]),\n",
" ], orelse=[\n",
" Assign(targets=[\n",
" Subscript(value=Name(id='current', ctx=Load()), slice=Index(value=BinOp(left=Name(id='i', ctx=Load()), op=Add(), right=Num(n=1))), ctx=Store()),\n",
" ], value=IfExp(test=Compare(left=Name(id='cr', ctx=Load()), ops=[\n",
" Gt(),\n",
" ], comparators=[\n",
" Name(id='pr', ctx=Load()),\n",
" ]), body=Name(id='cr', ctx=Load()), orelse=Name(id='pr', ctx=Load()))),\n",
" If(test=Compare(left=Name(id='backtrack', ctx=Load()), ops=[\n",
" In(),\n",
" ], comparators=[\n",
" List(elts=[\n",
" NameConstant(value=True),\n",
" Str(s='yes'),\n",
" Num(n=1),\n",
" ], ctx=Load()),\n",
" ]), body=[\n",
" Assign(targets=[\n",
" Tuple(elts=[\n",