Created
March 30, 2016 19:01
-
-
Save TomAugspurger/37097cce6a3368a8dad7cf2c8a9e5e92 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"%load_ext rpy2.ipython" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%%R\n", | |
"library(ggplot2)\n", | |
"\n", | |
"write.csv(diamonds, 'diamonds.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('diamonds.csv', index_col=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>carat</th>\n", | |
" <th>cut</th>\n", | |
" <th>color</th>\n", | |
" <th>clarity</th>\n", | |
" <th>depth</th>\n", | |
" <th>table</th>\n", | |
" <th>price</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>z</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.23</td>\n", | |
" <td>Ideal</td>\n", | |
" <td>E</td>\n", | |
" <td>SI2</td>\n", | |
" <td>61.5</td>\n", | |
" <td>55.0</td>\n", | |
" <td>326</td>\n", | |
" <td>3.95</td>\n", | |
" <td>3.98</td>\n", | |
" <td>2.43</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.21</td>\n", | |
" <td>Premium</td>\n", | |
" <td>E</td>\n", | |
" <td>SI1</td>\n", | |
" <td>59.8</td>\n", | |
" <td>61.0</td>\n", | |
" <td>326</td>\n", | |
" <td>3.89</td>\n", | |
" <td>3.84</td>\n", | |
" <td>2.31</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.23</td>\n", | |
" <td>Good</td>\n", | |
" <td>E</td>\n", | |
" <td>VS1</td>\n", | |
" <td>56.9</td>\n", | |
" <td>65.0</td>\n", | |
" <td>327</td>\n", | |
" <td>4.05</td>\n", | |
" <td>4.07</td>\n", | |
" <td>2.31</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.29</td>\n", | |
" <td>Premium</td>\n", | |
" <td>I</td>\n", | |
" <td>VS2</td>\n", | |
" <td>62.4</td>\n", | |
" <td>58.0</td>\n", | |
" <td>334</td>\n", | |
" <td>4.20</td>\n", | |
" <td>4.23</td>\n", | |
" <td>2.63</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.31</td>\n", | |
" <td>Good</td>\n", | |
" <td>J</td>\n", | |
" <td>SI2</td>\n", | |
" <td>63.3</td>\n", | |
" <td>58.0</td>\n", | |
" <td>335</td>\n", | |
" <td>4.34</td>\n", | |
" <td>4.35</td>\n", | |
" <td>2.75</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53936</th>\n", | |
" <td>0.72</td>\n", | |
" <td>Ideal</td>\n", | |
" <td>D</td>\n", | |
" <td>SI1</td>\n", | |
" <td>60.8</td>\n", | |
" <td>57.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.75</td>\n", | |
" <td>5.76</td>\n", | |
" <td>3.50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53937</th>\n", | |
" <td>0.72</td>\n", | |
" <td>Good</td>\n", | |
" <td>D</td>\n", | |
" <td>SI1</td>\n", | |
" <td>63.1</td>\n", | |
" <td>55.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.69</td>\n", | |
" <td>5.75</td>\n", | |
" <td>3.61</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53938</th>\n", | |
" <td>0.70</td>\n", | |
" <td>Very Good</td>\n", | |
" <td>D</td>\n", | |
" <td>SI1</td>\n", | |
" <td>62.8</td>\n", | |
" <td>60.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.66</td>\n", | |
" <td>5.68</td>\n", | |
" <td>3.56</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53939</th>\n", | |
" <td>0.86</td>\n", | |
" <td>Premium</td>\n", | |
" <td>H</td>\n", | |
" <td>SI2</td>\n", | |
" <td>61.0</td>\n", | |
" <td>58.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>6.15</td>\n", | |
" <td>6.12</td>\n", | |
" <td>3.74</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53940</th>\n", | |
" <td>0.75</td>\n", | |
" <td>Ideal</td>\n", | |
" <td>D</td>\n", | |
" <td>SI2</td>\n", | |
" <td>62.2</td>\n", | |
" <td>55.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.83</td>\n", | |
" <td>5.87</td>\n", | |
" <td>3.64</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>53940 rows × 10 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" carat cut color clarity depth table price x y z\n", | |
"1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", | |
"2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", | |
"3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", | |
"4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", | |
"5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75\n", | |
"... ... ... ... ... ... ... ... ... ... ...\n", | |
"53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50\n", | |
"53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61\n", | |
"53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56\n", | |
"53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74\n", | |
"53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64\n", | |
"\n", | |
"[53940 rows x 10 columns]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Multiple Aggregations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th colspan=\"2\" halign=\"left\">depth</th>\n", | |
" <th colspan=\"2\" halign=\"left\">carat</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>min</th>\n", | |
" <th>max</th>\n", | |
" <th>Average Weight</th>\n", | |
" <th>+/-</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>cut</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Fair</th>\n", | |
" <td>43.0</td>\n", | |
" <td>79.0</td>\n", | |
" <td>1.046137</td>\n", | |
" <td>0.516404</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Good</th>\n", | |
" <td>54.3</td>\n", | |
" <td>67.0</td>\n", | |
" <td>0.849185</td>\n", | |
" <td>0.454054</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ideal</th>\n", | |
" <td>43.0</td>\n", | |
" <td>66.7</td>\n", | |
" <td>0.702837</td>\n", | |
" <td>0.432876</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Premium</th>\n", | |
" <td>58.0</td>\n", | |
" <td>63.0</td>\n", | |
" <td>0.891955</td>\n", | |
" <td>0.515262</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Very Good</th>\n", | |
" <td>56.8</td>\n", | |
" <td>64.9</td>\n", | |
" <td>0.806381</td>\n", | |
" <td>0.459435</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" depth carat \n", | |
" min max Average Weight +/-\n", | |
"cut \n", | |
"Fair 43.0 79.0 1.046137 0.516404\n", | |
"Good 54.3 67.0 0.849185 0.454054\n", | |
"Ideal 43.0 66.7 0.702837 0.432876\n", | |
"Premium 58.0 63.0 0.891955 0.515262\n", | |
"Very Good 56.8 64.9 0.806381 0.459435" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" df.groupby('cut')\n", | |
" .agg({'carat': {'Average Weight': 'mean', '+/-': 'std'},\n", | |
" 'depth': ['min', 'max']})\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Summaries Back in Original" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>carat</th>\n", | |
" <th>cut</th>\n", | |
" <th>color</th>\n", | |
" <th>clarity</th>\n", | |
" <th>depth</th>\n", | |
" <th>table</th>\n", | |
" <th>price</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>z</th>\n", | |
" <th>xy</th>\n", | |
" <th>xz</th>\n", | |
" <th>mean_xy</th>\n", | |
" <th>var_xy</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.23</td>\n", | |
" <td>Ideal</td>\n", | |
" <td>E</td>\n", | |
" <td>SI2</td>\n", | |
" <td>61.5</td>\n", | |
" <td>55.0</td>\n", | |
" <td>326</td>\n", | |
" <td>3.95</td>\n", | |
" <td>3.98</td>\n", | |
" <td>2.43</td>\n", | |
" <td>7.93</td>\n", | |
" <td>6.38</td>\n", | |
" <td>5.507451</td>\n", | |
" <td>4.537332</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0.21</td>\n", | |
" <td>Premium</td>\n", | |
" <td>E</td>\n", | |
" <td>SI1</td>\n", | |
" <td>59.8</td>\n", | |
" <td>61.0</td>\n", | |
" <td>326</td>\n", | |
" <td>3.89</td>\n", | |
" <td>3.84</td>\n", | |
" <td>2.31</td>\n", | |
" <td>7.73</td>\n", | |
" <td>6.20</td>\n", | |
" <td>5.973887</td>\n", | |
" <td>5.807050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0.23</td>\n", | |
" <td>Good</td>\n", | |
" <td>E</td>\n", | |
" <td>VS1</td>\n", | |
" <td>56.9</td>\n", | |
" <td>65.0</td>\n", | |
" <td>327</td>\n", | |
" <td>4.05</td>\n", | |
" <td>4.07</td>\n", | |
" <td>2.31</td>\n", | |
" <td>8.12</td>\n", | |
" <td>6.36</td>\n", | |
" <td>5.838785</td>\n", | |
" <td>4.443224</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0.29</td>\n", | |
" <td>Premium</td>\n", | |
" <td>I</td>\n", | |
" <td>VS2</td>\n", | |
" <td>62.4</td>\n", | |
" <td>58.0</td>\n", | |
" <td>334</td>\n", | |
" <td>4.20</td>\n", | |
" <td>4.23</td>\n", | |
" <td>2.63</td>\n", | |
" <td>8.43</td>\n", | |
" <td>6.83</td>\n", | |
" <td>5.973887</td>\n", | |
" <td>5.807050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>0.31</td>\n", | |
" <td>Good</td>\n", | |
" <td>J</td>\n", | |
" <td>SI2</td>\n", | |
" <td>63.3</td>\n", | |
" <td>58.0</td>\n", | |
" <td>335</td>\n", | |
" <td>4.34</td>\n", | |
" <td>4.35</td>\n", | |
" <td>2.75</td>\n", | |
" <td>8.69</td>\n", | |
" <td>7.09</td>\n", | |
" <td>5.838785</td>\n", | |
" <td>4.443224</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53936</th>\n", | |
" <td>0.72</td>\n", | |
" <td>Ideal</td>\n", | |
" <td>D</td>\n", | |
" <td>SI1</td>\n", | |
" <td>60.8</td>\n", | |
" <td>57.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.75</td>\n", | |
" <td>5.76</td>\n", | |
" <td>3.50</td>\n", | |
" <td>11.51</td>\n", | |
" <td>9.25</td>\n", | |
" <td>5.507451</td>\n", | |
" <td>4.537332</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53937</th>\n", | |
" <td>0.72</td>\n", | |
" <td>Good</td>\n", | |
" <td>D</td>\n", | |
" <td>SI1</td>\n", | |
" <td>63.1</td>\n", | |
" <td>55.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.69</td>\n", | |
" <td>5.75</td>\n", | |
" <td>3.61</td>\n", | |
" <td>11.44</td>\n", | |
" <td>9.30</td>\n", | |
" <td>5.838785</td>\n", | |
" <td>4.443224</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53938</th>\n", | |
" <td>0.70</td>\n", | |
" <td>Very Good</td>\n", | |
" <td>D</td>\n", | |
" <td>SI1</td>\n", | |
" <td>62.8</td>\n", | |
" <td>60.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.66</td>\n", | |
" <td>5.68</td>\n", | |
" <td>3.56</td>\n", | |
" <td>11.34</td>\n", | |
" <td>9.22</td>\n", | |
" <td>5.740696</td>\n", | |
" <td>4.854494</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53939</th>\n", | |
" <td>0.86</td>\n", | |
" <td>Premium</td>\n", | |
" <td>H</td>\n", | |
" <td>SI2</td>\n", | |
" <td>61.0</td>\n", | |
" <td>58.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>6.15</td>\n", | |
" <td>6.12</td>\n", | |
" <td>3.74</td>\n", | |
" <td>12.27</td>\n", | |
" <td>9.89</td>\n", | |
" <td>5.973887</td>\n", | |
" <td>5.807050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>53940</th>\n", | |
" <td>0.75</td>\n", | |
" <td>Ideal</td>\n", | |
" <td>D</td>\n", | |
" <td>SI2</td>\n", | |
" <td>62.2</td>\n", | |
" <td>55.0</td>\n", | |
" <td>2757</td>\n", | |
" <td>5.83</td>\n", | |
" <td>5.87</td>\n", | |
" <td>3.64</td>\n", | |
" <td>11.70</td>\n", | |
" <td>9.47</td>\n", | |
" <td>5.507451</td>\n", | |
" <td>4.537332</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>53940 rows × 14 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" carat cut color clarity depth table price x y z \\\n", | |
"1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43 \n", | |
"2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 \n", | |
"3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31 \n", | |
"4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63 \n", | |
"5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75 \n", | |
"... ... ... ... ... ... ... ... ... ... ... \n", | |
"53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50 \n", | |
"53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61 \n", | |
"53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56 \n", | |
"53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74 \n", | |
"53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64 \n", | |
"\n", | |
" xy xz mean_xy var_xy \n", | |
"1 7.93 6.38 5.507451 4.537332 \n", | |
"2 7.73 6.20 5.973887 5.807050 \n", | |
"3 8.12 6.36 5.838785 4.443224 \n", | |
"4 8.43 6.83 5.973887 5.807050 \n", | |
"5 8.69 7.09 5.838785 4.443224 \n", | |
"... ... ... ... ... \n", | |
"53936 11.51 9.25 5.507451 4.537332 \n", | |
"53937 11.44 9.30 5.838785 4.443224 \n", | |
"53938 11.34 9.22 5.740696 4.854494 \n", | |
"53939 12.27 9.89 5.973887 5.807050 \n", | |
"53940 11.70 9.47 5.507451 4.537332 \n", | |
"\n", | |
"[53940 rows x 14 columns]" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" df.assign(xy = lambda d: d.x + d.y, xz = lambda d: d.x + d.z)\n", | |
" .assign(mean_xy=lambda x: x.groupby('cut').x.transform('mean'),\n", | |
" var_xy=lambda x: x.groupby('cut').xy.transform('var'))\n", | |
"\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So that's possbile, but maybe not that legible." | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment