Created
April 20, 2016 21:33
-
-
Save tdhopper/91f03250892c12c6e0d35ca6d2ade1ca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"import shelve" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df1 = pd.DataFrame({\"x\": [1,2,3,4], \"y\": [\"a\", \"b\", \"c\", \"a\"], \"z\": [\"m\", \"n\", \"m\", \"n\"]})\n", | |
"df2 = pd.DataFrame({\"x\": [4,3,2,1], \"y\": [\"d\", \"d\", \"a\", \"a\"], \"z\": [\"n\", \"n\", \"n\", \"n\"]})" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Concatenating dataframes is usually pretty easy." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>z</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>m</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>b</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>c</td>\n", | |
" <td>m</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>a</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>d</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>3</td>\n", | |
" <td>d</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2</td>\n", | |
" <td>a</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" x y z\n", | |
"0 1 a m\n", | |
"1 2 b n\n", | |
"2 3 c m\n", | |
"3 4 a n\n", | |
"4 4 d n\n", | |
"5 3 d n\n", | |
"6 2 a n\n", | |
"7 1 a n" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.concat([df1, df2], ignore_index=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"However, the handy new Category datatype doesn't allow it if the categories aren't identical." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"for col in ['x', 'y']:\n", | |
" df1[col] = df1[col].astype('category')\n", | |
" df2[col] = df2[col].astype('category')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "incompatible categories in categorical concat", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-11-e4fd87734c99>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/tools/merge.py\u001b[0m in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)\u001b[0m\n\u001b[1;32m 833\u001b[0m \u001b[0mverify_integrity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverify_integrity\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 834\u001b[0m copy=copy)\n\u001b[0;32m--> 835\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 836\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 837\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/tools/merge.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1023\u001b[0m new_data = concatenate_block_managers(\n\u001b[1;32m 1024\u001b[0m \u001b[0mmgrs_indexers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnew_axes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1025\u001b[0;31m concat_axis=self.axis, copy=self.copy)\n\u001b[0m\u001b[1;32m 1026\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1027\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_consolidate_inplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mconcatenate_block_managers\u001b[0;34m(mgrs_indexers, axes, concat_axis, copy)\u001b[0m\n\u001b[1;32m 4472\u001b[0m copy=copy),\n\u001b[1;32m 4473\u001b[0m placement=placement)\n\u001b[0;32m-> 4474\u001b[0;31m for placement, join_units in concat_plan]\n\u001b[0m\u001b[1;32m 4475\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4476\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mBlockManager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 4472\u001b[0m copy=copy),\n\u001b[1;32m 4473\u001b[0m placement=placement)\n\u001b[0;32m-> 4474\u001b[0;31m for placement, join_units in concat_plan]\n\u001b[0m\u001b[1;32m 4475\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4476\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mBlockManager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mconcatenate_join_units\u001b[0;34m(join_units, concat_axis, copy)\u001b[0m\n\u001b[1;32m 4577\u001b[0m \u001b[0mconcat_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconcat_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4578\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4579\u001b[0;31m \u001b[0mconcat_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_concat_compat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_concat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconcat_axis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4580\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4581\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconcat_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/core/common.py\u001b[0m in \u001b[0;36m_concat_compat\u001b[0;34m(to_concat, axis)\u001b[0m\n\u001b[1;32m 2722\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0;34m'category'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtyps\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2723\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcategorical\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0m_concat_compat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2724\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_concat_compat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_concat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2725\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2726\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnonempty\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Users/tdhopper/miniconda/envs/concat_categoricals/lib/python3.5/site-packages/pandas/core/categorical.py\u001b[0m in \u001b[0;36m_concat_compat\u001b[0;34m(to_concat, axis)\u001b[0m\n\u001b[1;32m 1948\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcategoricals\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1949\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcategories\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_dtype_equal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1950\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"incompatible categories in categorical concat\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1951\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1952\u001b[0m \u001b[0;31m# we've already checked that all categoricals are the same, so if their\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: incompatible categories in categorical concat" | |
] | |
} | |
], | |
"source": [ | |
"pd.concat([df1, df2], ignore_index=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_possible_values(dataframes, column_names, shelf_name):\n", | |
" \"\"\"Do first pass over dataframes to find possible categorical values.\"\"\"\n", | |
" with shelve.open(shelf_name, writeback=True) as shelf:\n", | |
" for col in column_names:\n", | |
" shelf[col] = set()\n", | |
" for df in dataframes:\n", | |
" for col in column_names:\n", | |
" for val in df[col]:\n", | |
" shelf[col].add(val)\n", | |
" \n", | |
"def concat(dataframes, categorical_columns, ignore_index=False, shelf_name=\"possible_values.shl\"):\n", | |
" \"\"\"Concatenate dataframes with unordered categorical columns. \n", | |
" \n", | |
" Will mutate categorical columns of origial dataframes.\n", | |
" \n", | |
" dataframes: list of dataframes.\n", | |
" categorical_columns: list of names of unordered, categorical columns.\n", | |
" ignore_index: same as from pd.concat.\n", | |
" shelf_name: filename for shelve object to store possible values.\n", | |
" \"\"\"\n", | |
" get_possible_values(dataframes, categorical_columns, shelf_name)\n", | |
" with shelve.open(shelf_name) as shelf:\n", | |
" for df in dataframes:\n", | |
" for col in categorical_columns:\n", | |
" df[col] = pd.Categorical(df[col], categories=shelf[col], ordered=False)\n", | |
" return pd.concat(dataframes, axis=0, ignore_index=ignore_index)\\\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>z</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>m</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>b</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>c</td>\n", | |
" <td>m</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>a</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4</td>\n", | |
" <td>d</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3</td>\n", | |
" <td>d</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>a</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>n</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" x y z\n", | |
"0 1 a m\n", | |
"1 2 b n\n", | |
"2 3 c m\n", | |
"3 4 a n\n", | |
"0 4 d n\n", | |
"1 3 d n\n", | |
"2 2 a n\n", | |
"3 1 a n" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = concat([df1, df2], ['x', 'y'])\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"x category\n", | |
"y category\n", | |
"z object\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.dtypes" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
unfortunately this doesn't work any more in my setup:
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.13.final.0
python-bits: 64
OS: Darwin
OS-release: 15.6.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: de_DE.UTF-8
LOCALE: None.None
pandas: 0.20.1
pytest: 3.0.6
pip: 9.0.1
setuptools: 34.2.0
Cython: 0.25.2
numpy: 1.12.0
scipy: 0.19.0
xarray: 0.9.1
IPython: 5.2.2
sphinx: 1.5.2
patsy: 0.4.1
dateutil: 2.5.0
pytz: 2016.10
blosc: None
bottleneck: 1.2.1
tables: 3.3.0
numexpr: 2.6.2
feather: None
matplotlib: 2.0.2
openpyxl: 2.4.2
xlrd: 1.0.0
xlwt: 1.2.0
xlsxwriter: 0.9.6
lxml: 3.6.4
bs4: 4.5.3
html5lib: 0.999
sqlalchemy: 1.1.4
pymysql: 0.7.10.None
psycopg2: None
jinja2: 2.9.6
s3fs: None
pandas_gbq: 0.2.0
pandas_datareader: None
For anyone who wants a shorter version of the above (without using shelve, which give me this complain below:
File "---.py", line 104, in get_possible_values
with shelve.open(shelf_name, writeback=True) as shelf:
AttributeError: DbfilenameShelf instance has no attribute '__exit__'
def concat(dataframes, categorical_columns, ignore_index=False):
"""Concatenate dataframes with unordered categorical columns.
Will mutate categorical columns of origial dataframes.
dataframes: list of dataframes.
categorical_columns: list of names of unordered, categorical columns.
ignore_index: same as from pd.concat.
shelf_name: filename for shelve object to store possible values.
"""
# Get all possible values for all categorical_columns
possible_values = {}
for col in categorical_columns:
possible_values[col] = set()
for df in dataframes:
for col in categorical_columns:
for val in df[col]:
possible_values[col].add(val)
# Use pd.Categorical() to re-categorizing the values in all columns
for df in dataframes:
for col in categorical_columns:
df[col] = pd.Categorical(
df[col], categories=possible_values[col], ordered=False)
return pd.concat(dataframes, axis=0, ignore_index=ignore_index)\
PS: you won't need to do all this if you are running pandas 0.19 or later. In my case I gotta live with 0.18 and this saved my life today! Thank you @tdhopper !
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There is a ticket about this here: pandas-dev/pandas#12699