Skip to content

Instantly share code, notes, and snippets.

@kokes
Created March 13, 2016 10:07
Show Gist options
  • Save kokes/ec05c336d23c129afbef to your computer and use it in GitHub Desktop.
Save kokes/ec05c336d23c129afbef to your computer and use it in GitHub Desktop.
Recreating issue #744 in pandas.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from StringIO import StringIO"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>value</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>a=0,b=4,c=6 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>a=10,b=5,c=-1 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>a=20,c=60 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>a=430 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>e</th>\n",
" <td>c=200</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value\n",
"id \n",
"a a=0,b=4,c=6 ...\n",
"b a=10,b=5,c=-1 ...\n",
"c a=20,c=60 ...\n",
"d a=430 ...\n",
"e c=200"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f=StringIO(\"\"\"id\tvalue\n",
"a\ta=0,b=4,c=6 \n",
"b\ta=10,b=5,c=-1 \n",
"c\ta=20,c=60 \n",
"d\ta=430 \n",
"e\tc=200\"\"\")\n",
"df = pd.read_table(f, index_col=[0])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np \n",
"def value_convert(value, fields=[\"a\",\"b\",\"c\"]): \n",
" pairs = value.split(\",\") \n",
" from collections import defaultdict \n",
" d = defaultdict(int) \n",
" for pair in pairs: \n",
" k, v = pair.split(\"=\") \n",
" d[k] = v \n",
" results = np.array([d[f] for f in fields], \n",
" dtype='int64') \n",
" return results "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>value</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>[0, 4, 6]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>[10, 5, -1]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>[20, 0, 60]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>[430, 0, 0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>e</th>\n",
" <td>[0, 0, 200]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value\n",
"id \n",
"a [0, 4, 6]\n",
"b [10, 5, -1]\n",
"c [20, 0, 60]\n",
"d [430, 0, 0]\n",
"e [0, 0, 200]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f.seek(0)\n",
"pd.read_table(f, index_col=[0], converters={'value': value_convert})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment