Created
July 1, 2016 00:31
-
-
Save wlattner/4c34b9f0c6fe6dcd79983bc9fe6e70a2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cases = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/cases.csv\")\n", | |
"storms = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/storms.csv\")\n", | |
"pollution = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/pollution.csv\")\n", | |
"tb = pd.read_csv(\"https://raw.githubusercontent.com/rstudio/EDAWR/master/data-raw/tb.csv\")\n", | |
"iris = pd.read_csv(\"./iris.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Gather columns into rows." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"```r\n", | |
"> tidyr::gather(cases, \"year\", \"n\", 2:4)\n", | |
" country year n\n", | |
"1 FR 2011 7000\n", | |
"2 DE 2011 5800\n", | |
"3 US 2011 15000\n", | |
"4 FR 2012 6900\n", | |
"5 DE 2012 6000\n", | |
"6 US 2012 14000\n", | |
"7 FR 2013 7000\n", | |
"8 DE 2013 6200\n", | |
"9 US 2013 13000\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>country</th>\n", | |
" <th>year</th>\n", | |
" <th>n</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>FR</td>\n", | |
" <td>2011</td>\n", | |
" <td>7000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>DE</td>\n", | |
" <td>2011</td>\n", | |
" <td>5800</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>US</td>\n", | |
" <td>2011</td>\n", | |
" <td>15000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>FR</td>\n", | |
" <td>2012</td>\n", | |
" <td>6900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>DE</td>\n", | |
" <td>2012</td>\n", | |
" <td>6000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>US</td>\n", | |
" <td>2012</td>\n", | |
" <td>14000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>FR</td>\n", | |
" <td>2013</td>\n", | |
" <td>7000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>DE</td>\n", | |
" <td>2013</td>\n", | |
" <td>6200</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>US</td>\n", | |
" <td>2013</td>\n", | |
" <td>13000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" country year n\n", | |
"0 FR 2011 7000\n", | |
"1 DE 2011 5800\n", | |
"2 US 2011 15000\n", | |
"3 FR 2012 6900\n", | |
"4 DE 2012 6000\n", | |
"5 US 2012 14000\n", | |
"6 FR 2013 7000\n", | |
"7 DE 2013 6200\n", | |
"8 US 2013 13000" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.melt(cases, id_vars=\"country\", var_name=\"year\", value_name=\"n\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Spread rows into columns." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"```r\n", | |
"> tidyr::spread(pollution, size, amount)\n", | |
" city large small\n", | |
"1 Beijing 121 56\n", | |
"2 London 22 16\n", | |
"3 New York 23 14\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>size</th>\n", | |
" <th>large</th>\n", | |
" <th>small</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>city</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Beijing</th>\n", | |
" <td>121</td>\n", | |
" <td>56</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>London</th>\n", | |
" <td>22</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>New York</th>\n", | |
" <td>23</td>\n", | |
" <td>14</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"size large small\n", | |
"city \n", | |
"Beijing 121 56\n", | |
"London 22 16\n", | |
"New York 23 14" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pollution.pivot(index=\"city\", columns=\"size\", values=\"amount\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Separate one column into several." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> tidyr::separate(storms, date, c(\"y\", \"m\", \"d\"))\n", | |
"# A tibble: 6 x 6\n", | |
" storm wind pressure y m d\n", | |
"* <chr> <int> <int> <chr> <chr> <chr>\n", | |
"1 Alberto 110 1007 2000 08 03\n", | |
"2 Alex 45 1009 1998 07 27\n", | |
"3 Allison 65 1005 1995 06 03\n", | |
"4 Ana 40 1013 1997 06 30\n", | |
"5 Arlene 50 1010 1999 06 11\n", | |
"6 Arthur 45 1010 1996 06 17\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>storm</th>\n", | |
" <th>wind</th>\n", | |
" <th>pressure</th>\n", | |
" <th>d</th>\n", | |
" <th>m</th>\n", | |
" <th>y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Alberto</td>\n", | |
" <td>110</td>\n", | |
" <td>1007</td>\n", | |
" <td>03</td>\n", | |
" <td>08</td>\n", | |
" <td>2000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Alex</td>\n", | |
" <td>45</td>\n", | |
" <td>1009</td>\n", | |
" <td>27</td>\n", | |
" <td>07</td>\n", | |
" <td>1998</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Allison</td>\n", | |
" <td>65</td>\n", | |
" <td>1005</td>\n", | |
" <td>03</td>\n", | |
" <td>06</td>\n", | |
" <td>1995</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Ana</td>\n", | |
" <td>40</td>\n", | |
" <td>1013</td>\n", | |
" <td>30</td>\n", | |
" <td>06</td>\n", | |
" <td>1997</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Arlene</td>\n", | |
" <td>50</td>\n", | |
" <td>1010</td>\n", | |
" <td>11</td>\n", | |
" <td>06</td>\n", | |
" <td>1999</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>Arthur</td>\n", | |
" <td>45</td>\n", | |
" <td>1010</td>\n", | |
" <td>17</td>\n", | |
" <td>06</td>\n", | |
" <td>1996</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" storm wind pressure d m y\n", | |
"0 Alberto 110 1007 03 08 2000\n", | |
"1 Alex 45 1009 27 07 1998\n", | |
"2 Allison 65 1005 03 06 1995\n", | |
"3 Ana 40 1013 30 06 1997\n", | |
"4 Arlene 50 1010 11 06 1999\n", | |
"5 Arthur 45 1010 17 06 1996" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"storms.assign(\n", | |
" y = storms.date.map(lambda x: x.split(\"-\")[0]),\n", | |
" m = storms.date.map(lambda x: x.split(\"-\")[1]),\n", | |
" d = storms.date.map(lambda x: x.split(\"-\")[2])\n", | |
").drop(['date'], axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Extract rows that meet logical criteria" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::filter(iris, Sepal.Length > 7)\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"1 7.1 3.0 5.9 2.1 virginica\n", | |
"2 7.6 3.0 6.6 2.1 virginica\n", | |
"3 7.3 2.9 6.3 1.8 virginica\n", | |
"4 7.2 3.6 6.1 2.5 virginica\n", | |
"5 7.7 3.8 6.7 2.2 virginica\n", | |
"6 7.7 2.6 6.9 2.3 virginica\n", | |
"7 7.7 2.8 6.7 2.0 virginica\n", | |
"8 7.2 3.2 6.0 1.8 virginica\n", | |
"9 7.2 3.0 5.8 1.6 virginica\n", | |
"10 7.4 2.8 6.1 1.9 virginica\n", | |
"11 7.9 3.8 6.4 2.0 virginica\n", | |
"12 7.7 3.0 6.1 2.3 virginica\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>102</th>\n", | |
" <td>7.1</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.9</td>\n", | |
" <td>2.1</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>105</th>\n", | |
" <td>7.6</td>\n", | |
" <td>3.0</td>\n", | |
" <td>6.6</td>\n", | |
" <td>2.1</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>107</th>\n", | |
" <td>7.3</td>\n", | |
" <td>2.9</td>\n", | |
" <td>6.3</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>109</th>\n", | |
" <td>7.2</td>\n", | |
" <td>3.6</td>\n", | |
" <td>6.1</td>\n", | |
" <td>2.5</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>117</th>\n", | |
" <td>7.7</td>\n", | |
" <td>3.8</td>\n", | |
" <td>6.7</td>\n", | |
" <td>2.2</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>118</th>\n", | |
" <td>7.7</td>\n", | |
" <td>2.6</td>\n", | |
" <td>6.9</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>122</th>\n", | |
" <td>7.7</td>\n", | |
" <td>2.8</td>\n", | |
" <td>6.7</td>\n", | |
" <td>2.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>125</th>\n", | |
" <td>7.2</td>\n", | |
" <td>3.2</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>129</th>\n", | |
" <td>7.2</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.8</td>\n", | |
" <td>1.6</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>130</th>\n", | |
" <td>7.4</td>\n", | |
" <td>2.8</td>\n", | |
" <td>6.1</td>\n", | |
" <td>1.9</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>131</th>\n", | |
" <td>7.9</td>\n", | |
" <td>3.8</td>\n", | |
" <td>6.4</td>\n", | |
" <td>2.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>135</th>\n", | |
" <td>7.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>6.1</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"102 7.1 3.0 5.9 2.1 virginica\n", | |
"105 7.6 3.0 6.6 2.1 virginica\n", | |
"107 7.3 2.9 6.3 1.8 virginica\n", | |
"109 7.2 3.6 6.1 2.5 virginica\n", | |
"117 7.7 3.8 6.7 2.2 virginica\n", | |
"118 7.7 2.6 6.9 2.3 virginica\n", | |
"122 7.7 2.8 6.7 2.0 virginica\n", | |
"125 7.2 3.2 6.0 1.8 virginica\n", | |
"129 7.2 3.0 5.8 1.6 virginica\n", | |
"130 7.4 2.8 6.1 1.9 virginica\n", | |
"131 7.9 3.8 6.4 2.0 virginica\n", | |
"135 7.7 3.0 6.1 2.3 virginica" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris[iris['Sepal.Length'] > 7]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Remove duplicate rows." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::distinct(iris)\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"1 5.1 3.5 1.4 0.2 setosa\n", | |
"2 4.9 3.0 1.4 0.2 setosa\n", | |
"3 4.7 3.2 1.3 0.2 setosa\n", | |
"4 4.6 3.1 1.5 0.2 setosa\n", | |
"5 5.0 3.6 1.4 0.2 setosa\n", | |
"6 5.4 3.9 1.7 0.4 setosa\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"0 5.1 3.5 1.4 0.2 setosa\n", | |
"1 4.9 3.0 1.4 0.2 setosa\n", | |
"2 4.7 3.2 1.3 0.2 setosa\n", | |
"3 4.6 3.1 1.5 0.2 setosa\n", | |
"4 5.0 3.6 1.4 0.2 setosa" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.drop_duplicates().head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Randomly select fraction of rows." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::sample_frac(iris, 0.5, replace = TRUE)\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"94 5.0 2.3 3.3 1.0 versicolor\n", | |
"24 5.1 3.3 1.7 0.5 setosa\n", | |
"4 4.6 3.1 1.5 0.2 setosa\n", | |
"139 6.0 3.0 4.8 1.8 virginica\n", | |
"82 5.5 2.4 3.7 1.0 versicolor\n", | |
"128 6.1 3.0 4.9 1.8 virginica\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>36</th>\n", | |
" <td>5.5</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.1</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>132</th>\n", | |
" <td>6.4</td>\n", | |
" <td>2.8</td>\n", | |
" <td>5.6</td>\n", | |
" <td>2.2</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>106</th>\n", | |
" <td>4.9</td>\n", | |
" <td>2.5</td>\n", | |
" <td>4.5</td>\n", | |
" <td>1.7</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>97</th>\n", | |
" <td>6.2</td>\n", | |
" <td>2.9</td>\n", | |
" <td>4.3</td>\n", | |
" <td>1.3</td>\n", | |
" <td>versicolor</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"36 5.5 3.5 1.3 0.2 setosa\n", | |
"37 4.9 3.6 1.4 0.1 setosa\n", | |
"132 6.4 2.8 5.6 2.2 virginica\n", | |
"106 4.9 2.5 4.5 1.7 virginica\n", | |
"97 6.2 2.9 4.3 1.3 versicolor" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.sample(frac=0.5, replace=True).head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Randomly select n rows." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::sample_n(iris, 10, replace = TRUE)\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"13 4.8 3.0 1.4 0.1 setosa\n", | |
"135 6.1 2.6 5.6 1.4 virginica\n", | |
"69 6.2 2.2 4.5 1.5 versicolor\n", | |
"31 4.8 3.1 1.6 0.2 setosa\n", | |
"27 5.0 3.4 1.6 0.4 setosa\n", | |
"100 5.7 2.8 4.1 1.3 versicolor\n", | |
"86 6.0 3.4 4.5 1.6 versicolor\n", | |
"150 5.9 3.0 5.1 1.8 virginica\n", | |
"125 6.7 3.3 5.7 2.1 virginica\n", | |
"122 5.6 2.8 4.9 2.0 virginica\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>81</th>\n", | |
" <td>5.5</td>\n", | |
" <td>2.4</td>\n", | |
" <td>3.7</td>\n", | |
" <td>1.0</td>\n", | |
" <td>versicolor</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>107</th>\n", | |
" <td>7.3</td>\n", | |
" <td>2.9</td>\n", | |
" <td>6.3</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>143</th>\n", | |
" <td>6.8</td>\n", | |
" <td>3.2</td>\n", | |
" <td>5.9</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>127</th>\n", | |
" <td>6.1</td>\n", | |
" <td>3.0</td>\n", | |
" <td>4.9</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>77</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.7</td>\n", | |
" <td>versicolor</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.9</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>64</th>\n", | |
" <td>5.6</td>\n", | |
" <td>2.9</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.3</td>\n", | |
" <td>versicolor</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>4.8</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.1</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>55</th>\n", | |
" <td>5.7</td>\n", | |
" <td>2.8</td>\n", | |
" <td>4.5</td>\n", | |
" <td>1.3</td>\n", | |
" <td>versicolor</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"81 5.5 2.4 3.7 1.0 versicolor\n", | |
"107 7.3 2.9 6.3 1.8 virginica\n", | |
"143 6.8 3.2 5.9 2.3 virginica\n", | |
"127 6.1 3.0 4.9 1.8 virginica\n", | |
"77 6.7 3.0 5.0 1.7 versicolor\n", | |
"5 5.4 3.9 1.7 0.4 setosa\n", | |
"64 5.6 2.9 3.6 1.3 versicolor\n", | |
"12 4.8 3.0 1.4 0.1 setosa\n", | |
"55 5.7 2.8 4.5 1.3 versicolor\n", | |
"31 5.4 3.4 1.5 0.4 setosa" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.sample(n=10, replace=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Select rows by position" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::slice(iris, 10:15)\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"1 4.9 3.1 1.5 0.1 setosa\n", | |
"2 5.4 3.7 1.5 0.2 setosa\n", | |
"3 4.8 3.4 1.6 0.2 setosa\n", | |
"4 4.8 3.0 1.4 0.1 setosa\n", | |
"5 4.3 3.0 1.1 0.1 setosa\n", | |
"6 5.8 4.0 1.2 0.2 setosa\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.1</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.7</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>4.8</td>\n", | |
" <td>3.4</td>\n", | |
" <td>1.6</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>4.8</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.1</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>4.3</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.1</td>\n", | |
" <td>0.1</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"9 4.9 3.1 1.5 0.1 setosa\n", | |
"10 5.4 3.7 1.5 0.2 setosa\n", | |
"11 4.8 3.4 1.6 0.2 setosa\n", | |
"12 4.8 3.0 1.4 0.1 setosa\n", | |
"13 4.3 3.0 1.1 0.1 setosa" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.iloc[9:14] # 0-based vs 1-based indexing in R" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Select and order by top n entires." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::top_n(storms, 2, date)\n", | |
"# A tibble: 2 x 4\n", | |
" storm wind pressure date\n", | |
" <chr> <int> <int> <date>\n", | |
"1 Alberto 110 1007 2000-08-03\n", | |
"2 Arlene 50 1010 1999-06-11\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>storm</th>\n", | |
" <th>wind</th>\n", | |
" <th>pressure</th>\n", | |
" <th>date</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Alberto</td>\n", | |
" <td>110</td>\n", | |
" <td>1007</td>\n", | |
" <td>2000-08-03</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Arlene</td>\n", | |
" <td>50</td>\n", | |
" <td>1010</td>\n", | |
" <td>1999-06-11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" storm wind pressure date\n", | |
"0 Alberto 110 1007 2000-08-03\n", | |
"4 Arlene 50 1010 1999-06-11" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"storms.sort_values('date', ascending=False).head(n=2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Select columns by name." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::select(iris, Sepal.Width, Petal.Length, Species)\n", | |
" Sepal.Width Petal.Length Species\n", | |
"1 3.5 1.4 setosa\n", | |
"2 3.0 1.4 setosa\n", | |
"3 3.2 1.3 setosa\n", | |
"4 3.1 1.5 setosa\n", | |
"5 3.6 1.4 setosa\n", | |
"6 3.9 1.7 setosa\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Width Petal.Length Species\n", | |
"0 3.5 1.4 setosa\n", | |
"1 3.0 1.4 setosa\n", | |
"2 3.2 1.3 setosa\n", | |
"3 3.1 1.5 setosa\n", | |
"4 3.6 1.4 setosa" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris[['Sepal.Width', 'Petal.Length', 'Species']].head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Summarize data into a single row of values." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::summarize(iris, avg = mean(Sepal.Length))\n", | |
" avg\n", | |
"1 5.843333\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5.8433333333333337" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris['Sepal.Length'].mean()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Apply a summary function to each column" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::summarize_each(iris, dplyr::funs(mean))\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"1 5.843333 3.057333 3.758 1.199333 NA\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Sepal.Length 5.843333\n", | |
"Sepal.Width 3.057333\n", | |
"Petal.Length 3.758000\n", | |
"Petal.Width 1.199333\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.drop(['Species'], axis=1).apply(np.mean)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Count number of rows with each unique value of a variable." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::count(iris, Species)\n", | |
"# A tibble: 3 x 2\n", | |
" Species n\n", | |
" <fctr> <int>\n", | |
"1 setosa 50\n", | |
"2 versicolor 50\n", | |
"3 virginica 50\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Species\n", | |
"setosa 50\n", | |
"versicolor 50\n", | |
"virginica 50\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 42, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.groupby('Species').size()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::mutate(iris, sepal = Sepal.Length + Sepal.Width)\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal\n", | |
"1 5.1 3.5 1.4 0.2 setosa 8.6\n", | |
"2 4.9 3.0 1.4 0.2 setosa 7.9\n", | |
"3 4.7 3.2 1.3 0.2 setosa 7.9\n", | |
"4 4.6 3.1 1.5 0.2 setosa 7.7\n", | |
"5 5.0 3.6 1.4 0.2 setosa 8.6\n", | |
"6 5.4 3.9 1.7 0.4 setosa 9.3\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" <th>sepal</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>8.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>7.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>7.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>7.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>8.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species sepal\n", | |
"0 5.1 3.5 1.4 0.2 setosa 8.6\n", | |
"1 4.9 3.0 1.4 0.2 setosa 7.9\n", | |
"2 4.7 3.2 1.3 0.2 setosa 7.9\n", | |
"3 4.6 3.1 1.5 0.2 setosa 7.7\n", | |
"4 5.0 3.6 1.4 0.2 setosa 8.6" | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.assign(\n", | |
" sepal = iris['Sepal.Length'] + iris['Sepal.Width']\n", | |
").head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Apply window function to each column." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::mutate_each(iris, funs(min_rank))\n", | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"1 33 126 12 6 1\n", | |
"2 17 58 12 6 1\n", | |
"3 10 95 5 6 1\n", | |
"4 6 84 25 6 1\n", | |
"5 23 132 12 6 1\n", | |
"6 47 145 45 42 1\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sepal.Length</th>\n", | |
" <th>Sepal.Width</th>\n", | |
" <th>Petal.Length</th>\n", | |
" <th>Petal.Width</th>\n", | |
" <th>Species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>33.0</td>\n", | |
" <td>126.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>17.0</td>\n", | |
" <td>58.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>10.0</td>\n", | |
" <td>95.0</td>\n", | |
" <td>5.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>6.0</td>\n", | |
" <td>84.0</td>\n", | |
" <td>25.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>23.0</td>\n", | |
" <td>132.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>1.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", | |
"0 33.0 126.0 12.0 6.0 1.0\n", | |
"1 17.0 58.0 12.0 6.0 1.0\n", | |
"2 10.0 95.0 5.0 6.0 1.0\n", | |
"3 6.0 84.0 25.0 6.0 1.0\n", | |
"4 23.0 132.0 12.0 6.0 1.0" | |
] | |
}, | |
"execution_count": 49, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.apply(lambda x: x.rank(method=\"min\")).head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Compute one or more new columns. Drop original columns." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```r\n", | |
"> dplyr::transmute(iris, sepal = Sepal.Length + Sepal.Width)\n", | |
" sepal\n", | |
"1 8.6\n", | |
"2 7.9\n", | |
"3 7.9\n", | |
"4 7.7\n", | |
"5 8.6\n", | |
"6 9.3\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>8.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>7.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>7.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>7.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal\n", | |
"0 8.6\n", | |
"1 7.9\n", | |
"2 7.9\n", | |
"3 7.7\n", | |
"4 8.6" | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris.assign(\n", | |
" sepal = iris['Sepal.Length'] + iris['Sepal.Width']\n", | |
")[['sepal',]].head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment