Created
October 18, 2014 14:05
-
-
Save satouy/cb1dd4498da9866577aa to your computer and use it in GitHub Desktop.
original Take-R-pandas-dataframe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:b71d98c5ecb07776f4d14e7508277b828a788395e0fc1bba70ebe0a839ae6f78" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"sage/R\u3068Pandas(Sage)\u3067\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306e\u76f8\u4e92\u5909\u63db (\u7af9\u672c\u6c0f)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"http://www.pwv.co.jp/~take/TakeWiki/index.php?sage%2FR\u3068Pandas%28Sage%29\u3067\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306e\u76f8\u4e92\u5909\u63db" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"made by Y.Sato Sat 18 Oct, 2014" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"R\u3068Pandas(Sage)\u3067\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306e\u76f8\u4e92\u5909\u63db" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"\u30c7\u30fc\u30bf\u5909\u63db\u3067\u4f7f\u7528\u3059\u308b\u30d1\u30c3\u30b1\u30fc\u30b8\u3068\u30e9\u30a4\u30d6\u30e9\u30ea " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%load_ext sage" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# R\u3068Pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3092\u76f8\u4e92\u306b\u5909\u63db\u3059\u308b\u65b9\u6cd5\n", | |
"# Sage\u3067\u306f\u3001numpy\u3068pandas\u3092\u30a4\u30f3\u30dd\u30fc\u30c8\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"# R\u3067\u306fjsonlite\u30d1\u30c3\u30b1\u30fc\u30b8\u3092\u4f7f\u7528\n", | |
"r('library(jsonlite)')\n", | |
"# \u4f8b\u3068\u3057\u3066R Graphic Cookbook\u306e\u30c7\u30fc\u30bf\u3092\u4f7f\u7528\n", | |
"#r(\"install.packages('gcookbook')\")\n", | |
"r('library(gcookbook)')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"[1] \"gcookbook\" \"jsonlite\" \"stats\" \"graphics\" \"grDevices\" \"utils\" \"datasets\" \"methods\" \n", | |
"[9] \"base\" " | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"R\u304b\u3089Pandas\u3078\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u5909\u63db" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# R\u304b\u3089JSON\u5f62\u5f0f\u3067\u30c7\u30fc\u30bf\u3092\u6301\u3063\u3066\u304f\u308b\u65b9\u6cd5\n", | |
"# \u4f8b\u3068\u3057\u3066\u3001gcookbook\u306e\u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf\u3092R\u304b\u3089\u53d6\u5f97\u3059\u308b\n", | |
"test_json = r('toJSON(heightweight, pretty=FALSE)')\n", | |
"# heightweight = pd.read_json(sageobj(test_json)); heightweight.head()\n", | |
"heightweight = pd.read_json(sageobj(test_json)['DATA']); heightweight.head()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ageMonth</th>\n", | |
" <th>ageYear</th>\n", | |
" <th>heightIn</th>\n", | |
" <th>sex</th>\n", | |
" <th>weightLb</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 143</td>\n", | |
" <td> 11.92</td>\n", | |
" <td> 56.3</td>\n", | |
" <td> f</td>\n", | |
" <td> 85.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 155</td>\n", | |
" <td> 12.92</td>\n", | |
" <td> 62.3</td>\n", | |
" <td> f</td>\n", | |
" <td> 105.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 153</td>\n", | |
" <td> 12.75</td>\n", | |
" <td> 63.3</td>\n", | |
" <td> f</td>\n", | |
" <td> 108.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 161</td>\n", | |
" <td> 13.42</td>\n", | |
" <td> 59.0</td>\n", | |
" <td> f</td>\n", | |
" <td> 92.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 191</td>\n", | |
" <td> 15.92</td>\n", | |
" <td> 62.5</td>\n", | |
" <td> f</td>\n", | |
" <td> 112.5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
" ageMonth ageYear heightIn sex weightLb\n", | |
"0 143 11.92 56.3 f 85.0\n", | |
"1 155 12.92 62.3 f 105.0\n", | |
"2 153 12.75 63.3 f 108.0\n", | |
"3 161 13.42 59.0 f 92.0\n", | |
"4 191 15.92 62.5 f 112.5" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# sageobj(test_json);" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# sageobj(test_json)['DATA'];" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"\u95a2\u6570\u306b\u307e\u3068\u3081\u308b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# \u3053\u308c\u3092\u95a2\u6570\u306b\u307e\u3068\u3081\u308b\n", | |
"# R\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3092pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306b\u5909\u63db\u3059\u308b\n", | |
"def RDf2PandaDf(name):\n", | |
" json_str = r('toJSON(%s, pretty=FALSE)' % name)\n", | |
" return pd.read_json(sageobj(json_str)['DATA'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"RDf2PandasDf\u306e\u4f7f\u7528\u4f8b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print RDf2PandaDf('heightweight').head()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" ageMonth ageYear heightIn sex weightLb\n", | |
"0 143 11.92 56.3 f 85.0\n", | |
"1 155 12.92 62.3 f 105.0\n", | |
"2 153 12.75 63.3 f 108.0\n", | |
"3 161 13.42 59.0 f 92.0\n", | |
"4 191 15.92 62.5 f 112.5\n" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Pandas\u304b\u3089R\u3078\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u306b\u5909\u63db" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# \u540c\u69d8\u306b\u95a2\u6570\u306b\u307e\u3068\u3081\u308b\n", | |
"# pandas\u306e\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3092R\u306b\u6e21\u3059\n", | |
"def PandaDf2RDf(df, name):\n", | |
" l = [dict(zip(df.columns, x)) for x in df.values.tolist()]\n", | |
" json_str = str(l)\n", | |
" json_str = json_str.replace(\"'\", '\\\\\"')\n", | |
" r('%s <- fromJSON(\"%s\")' % (name, json_str))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"PandaDf2RDf\u306e\u4f7f\u7528\u4f8b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Pandas\u306e\u30c7\u30fc\u30bf\u3092R\u306b\u6e21\u3059\n", | |
"age = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]\n", | |
"sex = ['F', 'M', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M']\n", | |
"df = pd.DataFrame({'age': age, 'sex': sex}); df.head()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 20</td>\n", | |
" <td> F</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 22</td>\n", | |
" <td> M</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 25</td>\n", | |
" <td> M</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 27</td>\n", | |
" <td> M</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 21</td>\n", | |
" <td> F</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
" age sex\n", | |
"0 20 F\n", | |
"1 22 M\n", | |
"2 25 M\n", | |
"3 27 M\n", | |
"4 21 F" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"PandaDf2RDf(df, \"a\")\n", | |
"r('a')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": [ | |
" age sex\n", | |
"1 20 F\n", | |
"2 22 M\n", | |
"3 25 M\n", | |
"4 27 M\n", | |
"5 21 F\n", | |
"6 23 M\n", | |
"7 37 F\n", | |
"8 31 M\n", | |
"9 61 F\n", | |
"10 45 M\n", | |
"11 41 F\n", | |
"12 32 M" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment