Skip to content

Instantly share code, notes, and snippets.

@jiffyclub
Created May 8, 2014 21:49
Show Gist options
  • Save jiffyclub/330846d54b51053f2dc9 to your computer and use it in GitHub Desktop.
Save jiffyclub/330846d54b51053f2dc9 to your computer and use it in GitHub Desktop.
Comparison of two ways of concatenating pandas Index objects.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:35f356f5ac018d8c56048eeba0aaef6924f28a5cf0b3f93c727990071818548d"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from functools import reduce\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"indexes = (pd.Index(range(100000)),\n",
" pd.Index(range(100000, 200000)),\n",
" pd.Index(range(200000, 300000)),\n",
" pd.Index(range(300000, 400000)),\n",
" pd.Index(range(400000, 500000)))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"indexes2 = (pd.Index([1]),\n",
" pd.Index([2]),\n",
" pd.Index([3]),\n",
" pd.Index([4]),\n",
" pd.Index([5]))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def concat_indexes1(indexes):\n",
" append = lambda x, y: x.append(y)\n",
" return reduce(append, indexes)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"concat_indexes1(indexes2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"Int64Index([1, 2, 3, 4, 5], dtype='int64')"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit concat_indexes1(indexes2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10000 loops, best of 3: 121 \u00b5s per loop\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit concat_indexes1(indexes)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 2.7 ms per loop\n"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def concat_indexes2(indexes):\n",
" return pd.Index(np.concatenate(indexes))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"concat_indexes2(indexes2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"Int64Index([1, 2, 3, 4, 5], dtype='int64')"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit concat_indexes2(indexes2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10000 loops, best of 3: 17.2 \u00b5s per loop\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit concat_indexes2(indexes)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000 loops, best of 3: 1.1 ms per loop\n"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment