orbeckst/speed benchmark.ipynb

## speed benchmark.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Speed testing persistent offsets\n",
    "Trying different mechanisms to serialize a dict data structure with a large numpy array included.\n",
    "\n",
    "THIS notebook was run on a Macbook Pro Mac OS X 10.6.8 with a Core Duo 2.6 GHz and a SSD disk."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = {'offsets': np.sort(np.random.uniform(high=2e8, size=int(2e6))),\n",
    "        'ctime': 123456789,\n",
    "        'size': 987654321}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  Pickle Speed test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import cPickle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Default pickle "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('test-pickle.pkl', 'wb') as f:\n",
    "    cPickle.dump(data, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-r--r--  1 oliver  staff    41M 13 Dec 11:28 test-pickle.pkl\r\n"
     ]
    }
   ],
   "source": [
    "!ls -lh test-pickle.pkl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_pickle = open('test-pickle.pkl', 'rb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 loops, best of 3: 6.43 s per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit cPickle.load(fp_pickle); fp_pickle.seek(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_pickle.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Highest protocol "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('test-pickle-fast.pkl', 'wb') as f:\n",
    "    cPickle.dump(data, f, protocol=cPickle.HIGHEST_PROTOCOL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-r--r--  1 oliver  staff    15M 13 Dec 11:33 test-pickle-fast.pkl\r\n"
     ]
    }
   ],
   "source": [
    "!ls -lh test-pickle-fast.pkl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_pickle = open('test-pickle-fast.pkl', 'rb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10 loops, best of 3: 39.7 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit cPickle.load(fp_pickle); fp_pickle.seek(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_pickle.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using the `HIGHEST_PROTOCOL` is important, the file size is ~1/3 and the loading speed up is"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "161.9647355163728"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "6.43 / 39.7e-3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# JSON Speed test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data_json = data.copy()\n",
    "data_json['offsets'] = list(data_json['offsets'])\n",
    "\n",
    "with open('test-json.json', 'w') as f:\n",
    "    json.dump(data_json, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-r--r--  1 oliver  staff    38M 13 Dec 11:33 test-json.json\r\n"
     ]
    }
   ],
   "source": [
    "!ls -lh test-json.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_json = open('test-json.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 loops, best of 3: 1.05 s per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit json.load(fp_json); fp_json.seek(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_json.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Marshal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import marshal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('test-marshal.marsh', 'w') as f:\n",
    "    marshal.dump(data, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-r--r--  1 oliver  staff    15M 13 Dec 11:34 test-marshal.marsh\r\n"
     ]
    }
   ],
   "source": [
    "!ls -lh test-marshal.marsh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_marsh = open('test-marshal.marsh')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 loops, best of 3: 17.6 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit marshal.load(fp_marsh); fp_marsh.seek(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_marsh.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## numpy native\n",
    "@mnmelo suggested using `numpy.savez` (or `numpy.savez_compressed`): \n",
    "\n",
    "We can also save the offsets directly as a numpy array. I'm not sure if this entails endianness problems, but might be faster, since it's a method native to the object.\n",
    "Since we also have to write out filesize and modification time we can create a second array with these two values and save the whole thing as a packed set of numpy arrays (using `numpy.savez`; and representing the modification time as a long int)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### np.savez (uncompressed) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.savez(\"test-savez.npz\", offsets=data['offsets'], \n",
    "         size=data['size'], ctime=data['ctime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-r--r--  1 oliver  staff    15M 13 Dec 11:48 test-savez.npz\r\n"
     ]
    }
   ],
   "source": [
    "!ls -lh test-savez.npz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_savez = open(\"test-savez.npz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The slowest run took 6.25 times longer than the fastest. This could mean that an intermediate result is being cached \n",
      "10000 loops, best of 3: 126 µs per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit np.load(fp_savez); fp_savez.seek(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_loaded = np.load(fp_savez)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2000000,)\n",
      "123456789\n"
     ]
    }
   ],
   "source": [
    "print(data_loaded['offsets'].shape)\n",
    "print(data_loaded['ctime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_savez.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "File size is as small as all the other good solutions. Loading speed compared to `cPickle.HIGHEST_PROTOCOL`: speed up (worst case estimate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50.41269841269841"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "39.7e-3/(6.25 * 126e-6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### np.savez_compressed "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.savez(\"test-savez-compressed.npz\", offsets=data['offsets'], \n",
    "         size=data['size'], ctime=data['ctime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-r--r--  1 oliver  staff    15M 13 Dec 11:50 test-savez-compressed.npz\r\n"
     ]
    }
   ],
   "source": [
    "!ls -lh test-savez-compressed.npz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_savez = open(\"test-savez-compressed.npz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The slowest run took 6.47 times longer than the fastest. This could mean that an intermediate result is being cached \n",
      "10000 loops, best of 3: 137 µs per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit np.load(fp_savez); fp_savez.seek(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_loaded = np.load(fp_savez)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2000000,)\n",
      "123456789\n"
     ]
    }
   ],
   "source": [
    "print(data_loaded['offsets'].shape)\n",
    "print(data_loaded['ctime'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fp_savez.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Speed testing persistent offsets\n",
	"Trying different mechanisms to serialize a dict data structure with a large numpy array included.\n",
	"\n",
	"THIS notebook was run on a Macbook Pro Mac OS X 10.6.8 with a Core Duo 2.6 GHz and a SSD disk."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"data = {'offsets': np.sort(np.random.uniform(high=2e8, size=int(2e6))),\n",
	" 'ctime': 123456789,\n",
	" 'size': 987654321}"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Pickle Speed test"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import cPickle"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Default pickle "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"with open('test-pickle.pkl', 'wb') as f:\n",
	" cPickle.dump(data, f)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-rw-r--r-- 1 oliver staff 41M 13 Dec 11:28 test-pickle.pkl\r\n"
	]
	}
	],
	"source": [
	"!ls -lh test-pickle.pkl"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_pickle = open('test-pickle.pkl', 'rb')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1 loops, best of 3: 6.43 s per loop\n"
	]
	}
	],
	"source": [
	"%timeit cPickle.load(fp_pickle); fp_pickle.seek(0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_pickle.close()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Highest protocol "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"with open('test-pickle-fast.pkl', 'wb') as f:\n",
	" cPickle.dump(data, f, protocol=cPickle.HIGHEST_PROTOCOL)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-rw-r--r-- 1 oliver staff 15M 13 Dec 11:33 test-pickle-fast.pkl\r\n"
	]
	}
	],
	"source": [
	"!ls -lh test-pickle-fast.pkl"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_pickle = open('test-pickle-fast.pkl', 'rb')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"10 loops, best of 3: 39.7 ms per loop\n"
	]
	}
	],
	"source": [
	"%timeit cPickle.load(fp_pickle); fp_pickle.seek(0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_pickle.close()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Using the `HIGHEST_PROTOCOL` is important, the file size is ~1/3 and the loading speed up is"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 65,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"161.9647355163728"
	]
	},
	"execution_count": 65,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"6.43 / 39.7e-3"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# JSON Speed test"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import json"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"data_json = data.copy()\n",
	"data_json['offsets'] = list(data_json['offsets'])\n",
	"\n",
	"with open('test-json.json', 'w') as f:\n",
	" json.dump(data_json, f)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-rw-r--r-- 1 oliver staff 38M 13 Dec 11:33 test-json.json\r\n"
	]
	}
	],
	"source": [
	"!ls -lh test-json.json"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_json = open('test-json.json')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1 loops, best of 3: 1.05 s per loop\n"
	]
	}
	],
	"source": [
	"%timeit json.load(fp_json); fp_json.seek(0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_json.close()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Marshal"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import marshal"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"with open('test-marshal.marsh', 'w') as f:\n",
	" marshal.dump(data, f)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-rw-r--r-- 1 oliver staff 15M 13 Dec 11:34 test-marshal.marsh\r\n"
	]
	}
	],
	"source": [
	"!ls -lh test-marshal.marsh"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_marsh = open('test-marshal.marsh')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"100 loops, best of 3: 17.6 ms per loop\n"
	]
	}
	],
	"source": [
	"%timeit marshal.load(fp_marsh); fp_marsh.seek(0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_marsh.close()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"collapsed": true
	},
	"source": [
	"## numpy native\n",
	"@mnmelo suggested using `numpy.savez` (or `numpy.savez_compressed`): \n",
	"\n",
	"We can also save the offsets directly as a numpy array. I'm not sure if this entails endianness problems, but might be faster, since it's a method native to the object.\n",
	"Since we also have to write out filesize and modification time we can create a second array with these two values and save the whole thing as a packed set of numpy arrays (using `numpy.savez`; and representing the modification time as a long int)."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### np.savez (uncompressed) "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 50,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"np.savez(\"test-savez.npz\", offsets=data['offsets'], \n",
	" size=data['size'], ctime=data['ctime'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 51,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-rw-r--r-- 1 oliver staff 15M 13 Dec 11:48 test-savez.npz\r\n"
	]
	}
	],
	"source": [
	"!ls -lh test-savez.npz"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 52,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_savez = open(\"test-savez.npz\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 53,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The slowest run took 6.25 times longer than the fastest. This could mean that an intermediate result is being cached \n",
	"10000 loops, best of 3: 126 µs per loop\n"
	]
	}
	],
	"source": [
	"%timeit np.load(fp_savez); fp_savez.seek(0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 54,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"data_loaded = np.load(fp_savez)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 55,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(2000000,)\n",
	"123456789\n"
	]
	}
	],
	"source": [
	"print(data_loaded['offsets'].shape)\n",
	"print(data_loaded['ctime'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 48,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_savez.close()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"File size is as small as all the other good solutions. Loading speed compared to `cPickle.HIGHEST_PROTOCOL`: speed up (worst case estimate)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"50.41269841269841"
	]
	},
	"execution_count": 66,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"39.7e-3/(6.25 * 126e-6)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### np.savez_compressed "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 56,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"np.savez(\"test-savez-compressed.npz\", offsets=data['offsets'], \n",
	" size=data['size'], ctime=data['ctime'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 57,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-rw-r--r-- 1 oliver staff 15M 13 Dec 11:50 test-savez-compressed.npz\r\n"
	]
	}
	],
	"source": [
	"!ls -lh test-savez-compressed.npz"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 58,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_savez = open(\"test-savez-compressed.npz\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 59,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The slowest run took 6.47 times longer than the fastest. This could mean that an intermediate result is being cached \n",
	"10000 loops, best of 3: 137 µs per loop\n"
	]
	}
	],
	"source": [
	"%timeit np.load(fp_savez); fp_savez.seek(0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 60,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"data_loaded = np.load(fp_savez)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 61,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(2000000,)\n",
	"123456789\n"
	]
	}
	],
	"source": [
	"print(data_loaded['offsets'].shape)\n",
	"print(data_loaded['ctime'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 62,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"fp_savez.close()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.11"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}