Skip to content

Instantly share code, notes, and snippets.

@astynax
Last active December 23, 2015 17:59
Show Gist options
  • Save astynax/6673026 to your computer and use it in GitHub Desktop.
Save astynax/6673026 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \u0420\u0430\u0431\u043e\u0442\u0430 \u0441 \u0431\u0438\u043d\u0430\u0440\u043d\u044b\u043c\u0438 (\u0438 \u043d\u0435 \u0442\u043e\u043b\u044c\u043a\u043e) \u0444\u0430\u0439\u043b\u0430\u043c\u0438\n",
"\n",
"# io + struct\n",
"\n",
"* [io](http://docs.python.org/2/library/io.html)\n",
" * BytesIO\n",
" * StringIO\n",
"* [struct](http://docs.python.org/2/library/struct.html)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import io\n",
"import struct"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**SrtingIO** \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f \u0442\u043e\u0433\u0434\u0430, \u043a\u043e\u0433\u0434\u0430 \u0432\u044b\u0432\u043e\u0434 \u0432 \u0444\u0430\u0439\u043b \u043d\u0443\u0436\u043d\u043e \u043f\u0435\u0440\u0435\u043d\u0430\u043f\u0440\u0430\u0432\u0438\u0442\u044c \u0432 \u0441\u0442\u0440\u043e\u043a\u0443.\n",
"\n",
"\u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c \u0432\u0441\u0435\u0433\u0434\u0430 \u0431\u0443\u0434\u0435\u0442 **Unicode**!\n",
"\n",
"\u041f\u0440\u0438 \u044d\u0442\u043e\u043c \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a\u0438 \u043c\u043e\u0436\u043d\u043e \u043f\u0440\u0438\u0432\u043e\u0434\u0438\u0442\u044c \u043a \u0435\u0434\u0438\u043d\u043e\u043c\u0443 \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u0443:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with io.StringIO(newline='\\r\\n') as sio:\n",
" sio.write(u'Title\\n')\n",
" sio.writelines([u'- One\\n', u'- Two\\n', u'- Three\\n', u'!\u042e\u043d\u0438\u043a\u043e\u0434!\\n'])\n",
" sio.seek(0, 0)\n",
" print repr(sio.read())\n",
" # print repr(sio.getvalue()) - \u043c\u043e\u0436\u043d\u043e \u0438 \u0442\u0430\u043a"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"u'Title\\r\\n- One\\r\\n- Two\\r\\n- Three\\r\\n!\\u042e\\u043d\\u0438\\u043a\\u043e\\u0434!\\r\\n'\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**BytesIO** \u0441\u0438\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f \u0442\u043e\u0433\u0434\u0430, \u043a\u043e\u0433\u0434\u0430 \u043d\u0443\u0436\u043d\u043e \u0445\u0440\u0430\u043d\u0438\u0442\u044c \u0432 \u0444\u0430\u0439\u043b\u043e\u043f\u043e\u0434\u043e\u0431\u043d\u043e\u043c \u043e\u0431\u044a\u0435\u043a\u0442\u0435 *\"\u0441\u044b\u0440\u044b\u0435\"* \u0434\u0430\u043d\u043d\u044b\u0435:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio = io.BytesIO()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio.write(chr(42)) # \u043e\u043f\u0435\u0440\u0430\u0446\u0438\u0438 \u0437\u0430\u043f\u0438\u0441\u0438 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u044e\u0442 \u0434\u043b\u0438\u043d\u0443 \u0437\u0430\u043f\u0438\u0441\u0430\u043d\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 \u0431\u0430\u0439\u0442\u0430\u0445"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"1L"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio.tell() # \u043f\u043e\u0437\u0438\u0446\u0438\u044f \u0432 \u0444\u0430\u0439\u043b\u0435 \u0432 \u0431\u0430\u0439\u0442\u0430\u0445 \u043e\u0442 \u043d\u0430\u0447\u0430\u043b\u0430"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"1L"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# \u043f\u0435\u0440\u0435\u043c\u0435\u0449\u0435\u043d\u0438\u0435 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442 \u043d\u043e\u0432\u0443\u044e \u043f\u043e\u0437\u0438\u0446\u0438\u044e \u043e\u0442 \u043d\u0430\u0447\u0430\u043b\u0430\n",
"bio.seek(0, 0) # \u043f\u0440\u044b\u0436\u043e\u043a \u0432 \u043d\u0430\u0447\u0430\u043b\u043e\n",
"bio.seek(0, 2) # \u043f\u0440\u044b\u0436\u043e\u043a \u0432 \u043a\u043e\u043d\u0435\u0446\n",
"bio.seek(-1, 1) # \u043f\u0440\u044b\u0436\u043e\u043a \u043d\u0430 \u0431\u0430\u0439\u0442 \u043d\u0430\u0437\u0430\u0434 \u043e\u0442 \u0442\u0435\u043a\u0443\u0449\u0435\u0439 \u043f\u043e\u0437\u0438\u0446\u0438\u0438"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"0L"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio.write(chr(0))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"1L"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio.getvalue()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"'\\x00'"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio.seek(0, 0)\n",
"bio.write(chr(0) + chr(1) + chr(2))\n",
"\n",
"bio.seek(-1, 2)\n",
"prev_pos = bio.tell()\n",
"print repr(bio.getvalue()) # getvalue, \u0432 \u043e\u0442\u043b\u0438\u0447\u0438\u0438 \u043e\u0442 read, \u043d\u0435 \u0442\u0435\u0440\u0435\u0431\u0443\u0435\u0442 \u043f\u0435\u0440\u0435\u043c\u0435\u0449\u0430\u0435\u043d\u0438\u044f \u0432 \u043d\u0430\u0447\u0430\u043b\u043e\n",
"assert bio.tell() == prev_pos # getvalue \u043d\u0435 \u043c\u0435\u043d\u044f\u0435\u0442 \u043f\u043e\u0437\u0438\u0446\u0438\u044e\n",
"\n",
"bio.truncate() # \u043e\u0442\u0431\u0440\u0430\u0441\u044b\u0432\u0430\u0435\u043c \u0432\u0441\u0451 \u0441\u043e\u0436\u0434\u0435\u0440\u0436\u0438\u043c\u043e\u0435 \u043e\u0442 \u0442\u0435\u043a\u0443\u0449\u0435\u0439 \u043f\u043e\u0437\u0438\u0446\u0438\u0438 \u0434\u043e \u043a\u043e\u043d\u0446\u0430 \u0444\u0430\u0439\u043b\u0430\n",
"print repr(bio.getvalue())"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"'\\x00\\x01\\x02'\n",
"'\\x00\\x01'\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bio.seek(0, 0)\n",
"print repr(bio.read(1))\n",
"print repr(bio.read(100)) # read \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442 \u0441\u0442\u043e\u043b\u044c\u043a\u043e \u0431\u0430\u0439\u0442, \u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043c\u043e\u0436\u0435\u0442 \u043f\u0440\u043e\u0447\u0435\u0441\u0442\u044c"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"'\\x00'\n",
"'\\x01'\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## struct"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"struct.pack('BLd', 1, 1, 1) # \u0431\u0435\u0437\u0437\u043d\u0430\u043a\u043e\u0432\u044b\u0439 \u0431\u0430\u0439\u0442, \u0431\u0435\u0437\u0437\u043d\u0430\u043a\u043e\u0432\u044b\u0439 long, \u0438 double"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"'\\x01\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf0?'"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print struct.unpack('d', struct.pack('d', 3.14))\n",
"print struct.unpack('f', struct.pack('f', 3.14)) # \u0442\u043e\u0447\u043d\u043e\u0441\u0442\u044c float-\u043e\u0432 \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u0430"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(3.14,)\n",
"(3.140000104904175,)\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"struct.pack('LL?', 100, 200, True) # (long, long, bool)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"'d\\x00\\x00\\x00\\xc8\\x00\\x00\\x00\\x01'"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class Storage(object):\n",
" \"\"\"\n",
" \u0425\u0440\u0430\u043d\u0438\u043b\u0438\u0449\u0435 \u0433\u043e\u043c\u043e\u0433\u0435\u043d\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 \u043a\u043e\u043c\u043f\u0430\u043a\u0442\u043d\u043e\u0439 \u0444\u043e\u0440\u043c\u0435\n",
" \"\"\"\n",
" def __init__(self, struct, io):\n",
" self._struct = struct\n",
" self._io = io\n",
"\n",
" def add(self, data):\n",
" self._io.seek(0, 2)\n",
" val = self._struct.pack(*data)\n",
" self._io.write(val)\n",
"\n",
" def remove(self, idx):\n",
" pass\n",
"\n",
" def __getitem__(self, idx):\n",
" off = idx * self._struct.size\n",
" self._io.seek(off, 0)\n",
" return self._struct.unpack(\n",
" self._io.read(self._struct.size))\n",
" \n",
" def __iter__(self):\n",
" self._io.seek(0, 0)\n",
" while True:\n",
" val = self._io.read(self._struct.size)\n",
" if len(val) < self._struct.size:\n",
" break\n",
" yield self._struct.unpack(val)\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"store = Storage(struct.Struct('LL?'), io.BytesIO())"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"store.add((100, 201, True))\n",
"store.add((120, 250, False))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print list(store)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[(100, 201, True), (120, 250, False)]\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u041f\u043e\u043f\u0440\u043e\u0431\u0443\u0435\u043c \u0433\u0440\u0443\u0431\u043e \u0441\u043f\u0440\u043e\u0444\u0438\u043b\u0438\u0440\u043e\u0432\u0430\u0442\u044c"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sys\n",
"from random import choice, randint"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def fake_tuple():\n",
" return (randint(-1000, 1000), randint(-500, 500), choice([True, False]))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"l = [fake_tuple() for _ in xrange(10000)]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sum(sum(map(sys.getsizeof, i)) for i in l) # \u0440\u0430\u0437\u043c\u0435\u0440 \u0432 \u043f\u0430\u043c\u044f\u0442\u0438"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 21,
"text": [
"360000"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"store = Storage(struct.Struct('ll?'), io.BytesIO())"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for i in l:\n",
" store.add(i)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sys.getsizeof(store) + sys.getsizeof(store._io.getvalue()) # \u0440\u0430\u0437\u043c\u0435\u0440 \u0432 \u043f\u0430\u043c\u044f\u0442\u0438"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
"90049"
]
}
],
"prompt_number": 24
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\u0432 **40** \u0440\u0430\u0437 \u043c\u0435\u043d\u044c\u0448\u0435 - \u043d\u0435\u043f\u043b\u043e\u0445\u043e! \u0442\u0435\u043f\u0435\u0440\u044c\n",
"\n",
"### \u043f\u043e\u043c\u0435\u0440\u044f\u0435\u043c \u0441\u043a\u043e\u0440\u043e\u0441\u0442\u044c"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"indexes = [randint(0, 9999) for _ in xrange(10000)] # \u043f\u043e\u0437\u0438\u0446\u0438\u0438"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit len([l[i] for i in indexes])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000 loops, best of 3: 541 \u00b5s per loop\n"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit len([store[i] for i in indexes])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 10.6 ms per loop\n"
]
}
],
"prompt_number": 27
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\u041d\u0430\u0448 \u043d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u044c \u043c\u0435\u0434\u043b\u0435\u043d\u043d\u0435\u0435 \u043f\u0440\u0438\u043c\u0435\u0440\u043d\u043e \u0432 **20** \u0440\u0430\u0437 - \u0437\u0430\u0442\u0440\u0430\u0442\u044b \u043d\u0430 \u0440\u0430\u0441\u043f\u0430\u043a\u043e\u0432\u044b\u0432\u0430\u043d\u0438\u0435"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \u0410 \u0447\u0442\u043e \u0441 \u043a\u043e\u043d\u0442\u0435\u0439\u043d\u0435\u0440\u043d\u044b\u043c\u0438 \u0442\u0438\u043f\u0430\u043c\u0438?\n",
"\n",
"### \u041f\u043e\u043f\u0440\u043e\u0431\u0443\u0435\u043c \u0440\u0435\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u0442\u044c"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def pack(x):\n",
" def scalar_packer(fmt):\n",
" return lambda x: 'S' + fmt + struct.pack(fmt, x)\n",
"\n",
" def ctr_packer(prefix, item_packer):\n",
" def packer(x):\n",
" return prefix + struct.pack('l', len(x)) + ''.join(item_packer(x))\n",
" return packer\n",
"\n",
" return {\n",
" int: scalar_packer('l'),\n",
" bool: scalar_packer('?'),\n",
" float: scalar_packer('d'),\n",
" str: lambda x: (\n",
" lambda y: 'U%s%s' % (struct.pack('l', len(y)), y)\n",
" )(\n",
" struct.pack('%ds' % len(x), x)\n",
" ),\n",
" tuple: ctr_packer('T', lambda x: map(pack, x)),\n",
" list: ctr_packer('L', lambda x: map(pack, x)),\n",
" dict: ctr_packer('D', lambda x: [pack(k) + pack(v) for (k, v) in x.iteritems()])\n",
" }[\n",
" type(x)\n",
" ](x)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def unpack(buf):\n",
" def read(fmt):\n",
" s = struct.Struct(fmt)\n",
" return s.unpack(buf.read(s.size))[0]\n",
" \n",
" def read_scalar():\n",
" return read(read('1s'))\n",
" \n",
" def read_str():\n",
" return read('%ds' % read('l'))\n",
" \n",
" def read_ctr_to(cls):\n",
" return lambda: cls(unpack(buf) for _ in range(read('l')))\n",
" \n",
" def read_dict():\n",
" return dict(\n",
" (unpack(buf), unpack(buf))\n",
" for _ in xrange(read('l')))\n",
"\n",
" return {\n",
" 'S': read_scalar,\n",
" 'U': read_str,\n",
" 'T': read_ctr_to(tuple),\n",
" 'L': read_ctr_to(list),\n",
" 'D': read_dict\n",
" }[\n",
" read('1s')\n",
" ]()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 29
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\u0418\u0441\u043f\u044b\u0442\u0430\u0435\u043c!"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"unpack(io.BytesIO(pack(('ABC',))))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 30,
"text": [
"('ABC',)"
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"unpack(io.BytesIO(pack(\n",
" [1, 2, True, ('ABC',), {'k': [], 42: 3.14}]\n",
")))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 31,
"text": [
"[1, 2, True, ('ABC',), {42: 3.14, 'k': []}]"
]
}
],
"prompt_number": 31
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment