Skip to content

Instantly share code, notes, and snippets.

@heronshoes
Last active April 11, 2022 09:47
Show Gist options
  • Save heronshoes/3d93af4cc7c55bd60185932c122c6b4e to your computer and use it in GitHub Desktop.
Save heronshoes/3d93af4cc7c55bd60185932c122c6b4e to your computer and use it in GitHub Desktop.

Rover summary method example

This is a experimental code and usage of Rover::DataFrame#summary of Ruby.

This method is not implemented in official release.

Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9ff0d57f-61f7-460f-a1d7-f2b3c3aef9fa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"ruby 3.1.1p18 (2022-02-18 revision 53f5fc4236) [x86_64-linux]\""
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"RUBY_DESCRIPTION"
]
},
{
"cell_type": "markdown",
"id": "6da0684a-0ba8-478c-8144-5bf3216e6cde",
"metadata": {},
"source": [
"## Experimental `Rover#DataFrame.summary`\n",
"\n",
"This is a experimental `summary` method example, and not impremented in official release."
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "1a8fa611-365c-4944-8e8c-0528714b62cb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"0.3.0\""
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'rover'\n",
"require './prepend_rover_summary'\n",
"Rover::VERSION"
]
},
{
"cell_type": "markdown",
"id": "65af414f-2a14-486a-9235-8bcb54b6427d",
"metadata": {},
"source": [
"### Penguins dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "3eded250-d8c7-414b-9bff-8b2e368cd954",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181.0</td><td>3750.0</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186.0</td><td>3800.0</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195.0</td><td>3250.0</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222.0</td><td>5750.0</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212.0</td><td>5200.0</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213.0</td><td>5400.0</td><td>male</td><td>2009</td></tr></table>"
],
"text/plain": [
"species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year\n",
" Adelie Torgersen 39.1 18.7 181.0 3750.0 male 2007\n",
" Adelie Torgersen 39.5 17.4 186.0 3800.0 female 2007\n",
" Adelie Torgersen 40.3 18.0 195.0 3250.0 female 2007\n",
" Adelie Torgersen NaN NaN NaN NaN 2007\n",
" Adelie Torgersen 36.7 19.3 193.0 3450.0 female 2007\n",
" ... ... ... ... ... ... ... ...\n",
" Gentoo Biscoe NaN NaN NaN NaN 2009\n",
" Gentoo Biscoe 46.8 14.3 215.0 4850.0 female 2009\n",
" Gentoo Biscoe 50.4 15.7 222.0 5750.0 male 2009\n",
" Gentoo Biscoe 45.2 14.8 212.0 5200.0 female 2009\n",
" Gentoo Biscoe 49.9 16.1 213.0 5400.0 male 2009"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'datasets'\n",
"ds = Datasets::Penguins.new\n",
"penguins = Rover::DataFrame.new(ds.to_table.to_h)"
]
},
{
"cell_type": "markdown",
"id": "9b1a53c3-9c9e-486e-94d7-cd6c7b9cc0e7",
"metadata": {},
"source": [
"#### `summary`, Vector to row"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7807a49b-33e3-4033-a70c-7edb62b865a4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" [344,5] count mean std min 25% 50% 75% max\n",
" bill_length_mm 342 43.92193 5.459584 32.1 39.275 44.5 48.525 59.6\n",
" bill_depth_mm 342 17.15117 1.974793 13.1 15.6 17.3 18.7 21.5\n",
"flipper_length_mm 342 200.915205 14.061714 172.0 190.0 197.0 214.0 231.0\n",
" body_mass_g 342 4201.754386 801.954536 2700.0 3550.0 4050.0 4781.25 6300.0\n",
" year 344 2008.02907 0.818356 2007.0 2007.0 2008.0 2009.0 2009.0\n"
]
}
],
"source": [
"puts penguins.summary"
]
},
{
"cell_type": "markdown",
"id": "71814402-174a-45f9-abcd-d5a11ce02213",
"metadata": {},
"source": [
"#### `summary_T`, Vector to column"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "41937ac6-165a-49b1-8608-9bc9a3401bfd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[344,5] bill_length_mm bill_depth_mm flipper_length_mm body_mass_g year\n",
" count 342.0 342.0 342.0 342.0 344.0\n",
" mean 43.92193 17.15117 200.915205 4201.754386 2008.02907\n",
" std 5.459584 1.974793 14.061714 801.954536 0.818356\n",
" min 32.1 13.1 172.0 2700.0 2007.0\n",
" 25% 39.275 15.6 190.0 3550.0 2007.0\n",
" 50% 44.5 17.3 197.0 4050.0 2008.0\n",
" 75% 48.525 18.7 214.0 4781.25 2009.0\n",
" max 59.6 21.5 231.0 6300.0 2009.0\n"
]
}
],
"source": [
"puts penguins.summary_T"
]
},
{
"cell_type": "markdown",
"id": "43dbc54a-4939-4971-ac9d-a972e93afd05",
"metadata": {},
"source": [
"#### Python's `describe`"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "3fa47866-b819-45b7-8b5d-79050018434d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{:pycall=>\"1.4.1\", :pandas=>\"1.4.1\", :matplotlib=>\"3.5.1\", :seaborn=>\"0.11.2\"}"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"require 'pandas'\n",
"pd = Pandas\n",
"require 'matplotlib'\n",
"require 'matplotlib/iruby'\n",
"Matplotlib::IRuby.activate\n",
"plt = Matplotlib::pyplot\n",
"sns = PyCall.import_module('seaborn')\n",
"{pycall: PyCall::VERSION, pandas: pd.__version__, matplotlib: Matplotlib.__version__, seaborn: sns.__version__}"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "1469aa63-5527-4158-b4f3-5dcbbbab732a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" bill_length_mm bill_depth_mm flipper_length_mm body_mass_g\n",
"count 342.000000 342.000000 342.000000 342.000000\n",
"mean 43.921930 17.151170 200.915205 4201.754386\n",
"std 5.459584 1.974793 14.061714 801.954536\n",
"min 32.100000 13.100000 172.000000 2700.000000\n",
"25% 39.225000 15.600000 190.000000 3550.000000\n",
"50% 44.450000 17.300000 197.000000 4050.000000\n",
"75% 48.500000 18.700000 213.000000 4750.000000\n",
"max 59.600000 21.500000 231.000000 6300.000000\n"
]
}
],
"source": [
"penguins_pandas = sns.load_dataset('penguins')\n",
"puts penguins_pandas.describe"
]
},
{
"cell_type": "markdown",
"id": "54c8032d-2d41-4c65-8b62-628f53641782",
"metadata": {},
"source": [
"### anscombe dataset"
]
},
{
"cell_type": "markdown",
"id": "5203e6dc-c501-47c0-b5e5-56f04042ad9f",
"metadata": {},
"source": [
"#### Rover's (from R dataset)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "b4f084eb-a5d2-4ebc-964d-795a95d1dd9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table><tr><th>x1</th><th>x2</th><th>x3</th><th>x4</th><th>y1</th><th>y2</th><th>y3</th><th>y4</th></tr><tr><td>10</td><td>10</td><td>10</td><td>8</td><td>8.04</td><td>9.14</td><td>7.46</td><td>6.58</td></tr><tr><td>8</td><td>8</td><td>8</td><td>8</td><td>6.95</td><td>8.14</td><td>6.77</td><td>5.76</td></tr><tr><td>13</td><td>13</td><td>13</td><td>8</td><td>7.58</td><td>8.74</td><td>12.74</td><td>7.71</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>12</td><td>12</td><td>12</td><td>8</td><td>10.84</td><td>9.13</td><td>8.15</td><td>5.56</td></tr><tr><td>7</td><td>7</td><td>7</td><td>8</td><td>4.82</td><td>7.26</td><td>6.42</td><td>7.91</td></tr><tr><td>5</td><td>5</td><td>5</td><td>8</td><td>5.68</td><td>4.74</td><td>5.73</td><td>6.89</td></tr></table>"
],
"text/plain": [
" x1 x2 x3 x4 y1 y2 y3 y4\n",
" 10 10 10 8 8.04 9.14 7.46 6.58\n",
" 8 8 8 8 6.95 8.14 6.77 5.76\n",
" 13 13 13 8 7.58 8.74 12.74 7.71\n",
" 9 9 9 8 8.81 8.77 7.11 8.84\n",
" 11 11 11 8 8.33 9.26 7.81 8.47\n",
" 14 14 14 8 9.96 8.1 8.84 7.04\n",
" 6 6 6 8 7.24 6.13 6.08 5.25\n",
" 4 4 4 19 4.26 3.1 5.39 12.5\n",
" 12 12 12 8 10.84 9.13 8.15 5.56\n",
" 7 7 7 8 4.82 7.26 6.42 7.91\n",
" 5 5 5 8 5.68 4.74 5.73 6.89"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_list = Datasets::RdatasetsList.new\n",
"ds = Datasets::Rdatasets.new('datasets', 'anscombe')\n",
"df = Rover::DataFrame.new(ds.to_table.to_h)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "62852fed-8b51-4dc9-8fc9-63e71652c07c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[11,8] count mean std min 25% 50% 75% max\n",
" x1 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" x2 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" x3 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" x4 11 9.0 3.316625 8.0 8.0 8.0 8.0 19.0\n",
" y1 11 7.500909 2.031568 4.26 6.315 7.58 8.57 10.84\n",
" y2 11 7.500909 2.031657 3.1 6.695 8.14 8.95 9.26\n",
" y3 11 7.5 2.030424 5.39 6.25 7.11 7.98 12.74\n",
" y4 11 7.500909 2.030579 5.25 6.17 7.04 8.19 12.5\n"
]
}
],
"source": [
"puts df.summary"
]
},
{
"cell_type": "markdown",
"id": "f5c6016f-ee37-43e5-bdfe-79e56d0482fd",
"metadata": {},
"source": [
"#### Rover's (from Seaborn dataset)\n",
"This should be a good material to try for group methods"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "6cd7fe3f-9343-4e62-a946-b4148b5231d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
":dataset = I\n",
"[11,2] count mean std min 25% 50% 75% max\n",
" x 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" y 11 7.500909 2.031568 4.26 6.315 7.58 8.57 10.84\n",
"\n",
":dataset = II\n",
"[11,2] count mean std min 25% 50% 75% max\n",
" x 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" y 11 7.500909 2.031657 3.1 6.695 8.14 8.95 9.26\n",
"\n",
":dataset = III\n",
"[11,2] count mean std min 25% 50% 75% max\n",
" x 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" y 11 7.5 2.030424 5.39 6.25 7.11 7.98 12.74\n",
"\n",
":dataset = IV\n",
"[11,2] count mean std min 25% 50% 75% max\n",
" x 11 9.0 3.316625 8.0 8.0 8.0 8.0 19.0\n",
" y 11 7.500909 2.030579 5.25 6.17 7.04 8.19 12.5\n",
"\n"
]
}
],
"source": [
"ds = Datasets::SeabornData.new(\"anscombe\")\n",
"df = Rover::DataFrame.new(ds.to_table.to_h)\n",
"\n",
"df[:dataset].uniq.each do |dataset|\n",
" puts \":dataset = #{dataset}\"\n",
" puts df[df[:dataset] == dataset].summary\n",
" puts\n",
"end; nil"
]
},
{
"cell_type": "markdown",
"id": "96bde9f0-d9fa-4759-ad98-95c0b6307298",
"metadata": {},
"source": [
"#### Python's"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "d65da6d4-80fc-48be-a32f-62d7dcf30ee1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>11.000000</td>\n",
" <td>11.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>9.000000</td>\n",
" <td>7.500909</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>3.316625</td>\n",
" <td>2.031568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.000000</td>\n",
" <td>4.260000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>6.500000</td>\n",
" <td>6.315000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>9.000000</td>\n",
" <td>7.580000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>11.500000</td>\n",
" <td>8.570000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>14.000000</td>\n",
" <td>10.840000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x y\n",
"count 11.000000 11.000000\n",
"mean 9.000000 7.500909\n",
"std 3.316625 2.031568\n",
"min 4.000000 4.260000\n",
"25% 6.500000 6.315000\n",
"50% 9.000000 7.580000\n",
"75% 11.500000 8.570000\n",
"max 14.000000 10.840000"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sns.set_theme(style:'ticks')\n",
"py_df = sns.load_dataset('anscombe')\n",
"py_df[py_df[:dataset] == \"I\"].describe"
]
},
{
"cell_type": "markdown",
"id": "c2e0a173-760d-4f61-8959-e2da0e86a63c",
"metadata": {},
"source": [
"##### using group"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "316f4d6b-e8fe-483e-abdd-794de0166c6c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">x</th>\n",
" <th colspan=\"8\" halign=\"left\">y</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dataset</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" <td>3.316625</td>\n",
" <td>4.0</td>\n",
" <td>6.5</td>\n",
" <td>9.0</td>\n",
" <td>11.5</td>\n",
" <td>14.0</td>\n",
" <td>11.0</td>\n",
" <td>7.500909</td>\n",
" <td>2.031568</td>\n",
" <td>4.26</td>\n",
" <td>6.315</td>\n",
" <td>7.58</td>\n",
" <td>8.57</td>\n",
" <td>10.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>II</th>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" <td>3.316625</td>\n",
" <td>4.0</td>\n",
" <td>6.5</td>\n",
" <td>9.0</td>\n",
" <td>11.5</td>\n",
" <td>14.0</td>\n",
" <td>11.0</td>\n",
" <td>7.500909</td>\n",
" <td>2.031657</td>\n",
" <td>3.10</td>\n",
" <td>6.695</td>\n",
" <td>8.14</td>\n",
" <td>8.95</td>\n",
" <td>9.26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>III</th>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" <td>3.316625</td>\n",
" <td>4.0</td>\n",
" <td>6.5</td>\n",
" <td>9.0</td>\n",
" <td>11.5</td>\n",
" <td>14.0</td>\n",
" <td>11.0</td>\n",
" <td>7.500000</td>\n",
" <td>2.030424</td>\n",
" <td>5.39</td>\n",
" <td>6.250</td>\n",
" <td>7.11</td>\n",
" <td>7.98</td>\n",
" <td>12.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IV</th>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" <td>3.316625</td>\n",
" <td>8.0</td>\n",
" <td>8.0</td>\n",
" <td>8.0</td>\n",
" <td>8.0</td>\n",
" <td>19.0</td>\n",
" <td>11.0</td>\n",
" <td>7.500909</td>\n",
" <td>2.030579</td>\n",
" <td>5.25</td>\n",
" <td>6.170</td>\n",
" <td>7.04</td>\n",
" <td>8.19</td>\n",
" <td>12.50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x ... y \n",
" count mean std min 25% 50% ... std min 25% 50% 75% max\n",
"dataset ... \n",
"I 11.0 9.0 3.316625 4.0 6.5 9.0 ... 2.031568 4.26 6.315 7.58 8.57 10.84\n",
"II 11.0 9.0 3.316625 4.0 6.5 9.0 ... 2.031657 3.10 6.695 8.14 8.95 9.26\n",
"III 11.0 9.0 3.316625 4.0 6.5 9.0 ... 2.030424 5.39 6.250 7.11 7.98 12.74\n",
"IV 11.0 9.0 3.316625 8.0 8.0 8.0 ... 2.030579 5.25 6.170 7.04 8.19 12.50\n",
"\n",
"[4 rows x 16 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"py_df.groupby(\"dataset\").describe"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "7cff8b91-b034-478b-96b6-6f24875b2b8b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.FacetGrid object at 0x7f987c779fd0>"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 600x600 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.lmplot(\n",
" data:py_df,\n",
" x:'x', y:'y', hue:'dataset',\n",
" col:'dataset', col_wrap:2,\n",
" ci:nil, palette:\"muted\", height:3,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "31c0101b-240e-42ad-9e68-f2141b6e76fc",
"metadata": {
"tags": []
},
"source": [
"### R dataset by Rover"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "03205c43-3a31-4d5e-9843-6918fadc008c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1) ability.cov, Ability and Intelligence Tests\n",
" [6,8] count mean std min 25% 50% 75% max\n",
"cov.general 6 20.105167 11.752029 5.991 9.706 22.698 28.436 33.52\n",
"cov.picture 6 7.458333 5.575117 1.782 5.19975 6.3455 7.078 18.137\n",
" cov.blocks 6 50.515833 50.063513 18.137 22.4255 32.475 46.44475 149.831\n",
" cov.maze 6 8.962 6.346548 1.782 5.0735 7.549 11.802 19.424\n",
"cov.reading 6 30.207333 25.357664 4.757 8.89075 26.0925 47.3105 66.762\n",
" cov.vocab 6 49.797833 47.904936 7.204 14.2315 40.227 62.75975 135.292\n",
" center 6 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n",
" n.obs 6 112.0 0.0 112.0 112.0 112.0 112.0 112.0\n",
"\n",
"(2) airmiles, Passenger Miles on Commercial US Airlines, 1937-1960\n",
"[24,2] count mean std min 25% 50% 75% max\n",
" time 24 1948.5 7.071068 1937 1942.75 1948.5 1954.25 1960\n",
" value 24 10527.833333 10033.32719 412 1580.0 6431.0 17531.5 30514\n",
"\n",
"(3) AirPassengers, Monthly Airline Passenger Numbers 1949-1960\n",
"[144,2] count mean std min 25% 50% 75% max\n",
" time 144 1954.958333 3.476109 1949.0 1951.979167 1954.958333 1957.9375 1960.91666666667\n",
" value 144 280.298611 119.966317 104.0 180.0 265.5 360.5 622.0\n",
"\n",
"(4) airquality, New York Air Quality Measurements\n",
"[153,4] count mean std min 25% 50% 75% max\n",
" Wind 153 9.957516 3.523001 1.7 7.4 9.7 11.5 20.7\n",
" Temp 153 77.882353 9.46527 56.0 72.0 79.0 85.0 97.0\n",
" Month 153 6.993464 1.416522 5.0 6.0 7.0 8.0 9.0\n",
" Day 153 15.803922 8.86452 1.0 8.0 16.0 23.0 31.0\n",
"\n",
"(5) anscombe, Anscombe's Quartet of 'Identical' Simple Linear Regressions\n",
"[11,8] count mean std min 25% 50% 75% max\n",
" x1 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" x2 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" x3 11 9.0 3.316625 4.0 6.5 9.0 11.5 14.0\n",
" x4 11 9.0 3.316625 8.0 8.0 8.0 8.0 19.0\n",
" y1 11 7.500909 2.031568 4.26 6.315 7.58 8.57 10.84\n",
" y2 11 7.500909 2.031657 3.1 6.695 8.14 8.95 9.26\n",
" y3 11 7.5 2.030424 5.39 6.25 7.11 7.98 12.74\n",
" y4 11 7.500909 2.030579 5.25 6.17 7.04 8.19 12.5\n",
"\n",
"(6) attenu, The Joyner-Boore Attenuation Data\n",
"[182,4] count mean std min 25% 50% 75% max\n",
" event 182 14.741758 6.852375 1.0 9.0 18.0 20.0 23.0\n",
" mag 182 6.084066 0.721431 5.0 5.3 6.1 6.6 7.7\n",
" dist 182 45.603297 62.170063 0.5 11.325 23.4 47.55 370.0\n",
" accel 182 0.15422 0.149001 0.003 0.04425 0.113 0.21925 0.81\n",
"\n",
"(7) attitude, The Chatterjee-Price Attitude Data\n",
" [30,7] count mean std min 25% 50% 75% max\n",
" rating 30 64.633333 12.172562 40 58.75 65.5 71.75 85\n",
"complaints 30 66.6 13.314757 37 58.5 65.0 77.0 90\n",
"privileges 30 53.133333 12.23543 30 45.0 51.5 62.5 83\n",
" learning 30 56.366667 11.737013 34 47.0 56.5 66.75 75\n",
" raises 30 64.633333 10.397226 43 58.25 63.5 71.0 88\n",
" critical 30 74.766667 9.894908 49 69.25 77.5 80.0 92\n",
" advance 30 42.933333 10.288706 25 35.0 41.0 47.75 72\n",
"\n",
"(8) austres, Quarterly Time Series of the Number of Australian Residents\n",
"[89,2] count mean std min 25% 50% 75% max\n",
" time 89 1982.25 6.459005 1971.25 1976.75 1982.25 1987.75 1993.25\n",
" value 89 15273.449438 1356.812524 13067.3 14110.1 15184.2 16398.9 17661.5\n",
"\n",
"(9) BJsales, Sales Data with Leading Indicator\n",
"[150,2] count mean std min 25% 50% 75% max\n",
" time 150 75.5 43.445368 1.0 38.25 75.5 112.75 150.0\n",
" value 150 229.978 21.479686 198.6 212.575 220.65 254.675 263.3\n",
"\n",
"(10) BOD, Biochemical Oxygen Demand\n",
" [6,2] count mean std min 25% 50% 75% max\n",
" Time 6 3.666667 2.160247 1.0 2.25 3.5 4.75 7.0\n",
"demand 6 14.833333 4.630623 8.3 11.625 15.8 18.25 19.8\n",
"\n",
"(11) cars, Speed and Stopping Distances of Cars\n",
"[50,2] count mean std min 25% 50% 75% max\n",
" speed 50 15.4 5.287644 4 12.0 15.0 19.0 25\n",
" dist 50 42.98 25.769377 2 26.0 36.0 56.0 120\n",
"\n",
"(12) ChickWeight, Weight versus age of chicks on different diets\n",
"[578,4] count mean std min 25% 50% 75% max\n",
" weight 578 121.818339 71.07196 35 63.0 103.0 163.75 373\n",
" Time 578 10.717993 6.7584 0 4.0 10.0 16.0 21\n",
" Chick 578 25.750865 14.568795 1 13.0 26.0 38.0 50\n",
" Diet 578 2.235294 1.162678 1 1.0 2.0 3.0 4\n",
"\n",
"(13) chickwts, Chicken Weights by Feed Type\n",
"[71,1] count mean std min 25% 50% 75% max\n",
"weight 71 261.309859 78.0737 108 204.5 258.0 323.5 423\n",
"\n",
"(14) CO2, Carbon Dioxide Uptake in Grass Plants\n",
"[84,2] count mean std min 25% 50% 75% max\n",
" conc 84 435.0 295.924119 95.0 175.0 350.0 675.0 1000.0\n",
"uptake 84 27.213095 10.814412 7.7 17.9 28.3 37.125 45.5\n",
"\n",
"(15) co2, Mauna Loa Atmospheric CO2 Concentration\n",
"[468,2] count mean std min 25% 50% 75% max\n",
" time 468 1978.458333 11.270352 1959.0 1968.729167 1978.458333 1988.1875 1997.91666667\n",
" value 468 337.053526 14.96622 313.18 323.53 335.17 350.255 366.84\n",
"\n",
"(16) crimtab, Student's 3000 Criminals Data\n",
"[924,3] count mean std min 25% 50% 75% max\n",
" Var1 924 11.45 1.212748 9.4 10.4 11.45 12.5 13.5\n",
" Var2 924 168.91 16.123221 142.24 154.94 168.91 182.88 195.58\n",
" Freq 924 3.246753 8.29179 0.0 0.0 0.0 1.0 58.0\n",
"\n",
"(17) discoveries, Yearly Numbers of Important Discoveries\n",
"[100,2] count mean std min 25% 50% 75% max\n",
" time 100 1909.5 29.011492 1860 1884.75 1909.5 1934.25 1959\n",
" value 100 3.1 2.254065 0 2.0 3.0 4.0 12\n",
"\n",
"(18) DNase, Elisa assay of DNase\n",
"[176,3] count mean std min 25% 50% 75% max\n",
" Run 176 6.0 3.1713 1.0 3.0 6.0 9.0 11.0\n",
" conc 176 3.106689 4.059865 0.04882812 0.341797 1.171875 3.90625 12.5\n",
"density 176 0.719159 0.595573 0.011 0.19775 0.5265 1.1705 2.003\n",
"\n",
"(19) esoph, Smoking, Alcohol and (O)esophageal Cancer\n",
" [88,2] count mean std min 25% 50% 75% max\n",
" ncases 88 2.272727 2.753169 0 0.0 1.0 4.0 17\n",
"ncontrols 88 8.806818 12.135119 0 1.0 4.0 10.0 60\n",
"\n",
"(20) euro, Conversion Rates of Euro Currencies\n",
"[11,1] count mean std min 25% 50% 75% max\n",
" dat 11 219.548228 573.614402 0.787564 4.07472 13.7603 103.36295 1936.27\n",
"\n",
"(21) EuStockMarkets, Daily Closing Prices of Major European Stock Indices, 1991-1998\n",
"[1860,4] count mean std min 25% 50% 75% max\n",
" DAX 1860 2530.656882 1084.79274 1402.34 1744.1025 2140.565 2722.3675 6186.09\n",
" SMI 1860 3376.22371 1663.026465 1587.4 2165.625 2796.35 3812.425 8412.0\n",
" CAC 1860 2227.828495 580.314198 1611.0 1875.15 1992.3 2274.35 4388.5\n",
" FTSE 1860 3565.643172 976.71554 2281.0 2843.15 3246.6 3993.575 6179.0\n",
"\n",
"(22) faithful, Old Faithful Geyser Data\n",
" [272,2] count mean std min 25% 50% 75% max\n",
"eruptions 272 3.487783 1.141371 1.6 2.16275 4.0 4.45425 5.1\n",
" waiting 272 70.897059 13.594974 43.0 58.0 76.0 82.0 96.0\n",
"\n",
"(23) Formaldehyde, Determination of Formaldehyde\n",
" [6,2] count mean std min 25% 50% 75% max\n",
" carb 6 0.516667 0.285774 0.1 0.35 0.55 0.675 0.9\n",
"optden 6 0.457833 0.250539 0.086 0.31325 0.492 0.604 0.782\n",
"\n",
"(24) freeny, Freeny's Revenue Data\n",
" [39,5] count mean std min 25% 50% 75% max\n",
" y 39 9.306304 0.315617 8.79137 9.0446 9.31378 9.590855 9.79424\n",
"lag.quarterly.revenue 39 9.280718 0.315468 8.79137 9.019585 9.28436 9.560515 9.77536\n",
" price.index 39 4.496182 0.133357 4.27789 4.391615 4.51018 4.604965 4.70997\n",
" income.level 39 6.038596 0.120443 5.8211 5.947985 6.06093 6.13912 6.2003\n",
" market.potential 39 13.066831 0.064504 12.9699 13.0066 13.0693 13.1244 13.1664\n",
"\n",
"(25) HairEyeColor, Hair and Eye Color of Statistics Students\n",
"[32,1] count mean std min 25% 50% 75% max\n",
" Freq 32 18.5 18.242099 2 7.0 10.0 29.25 66\n",
"\n",
"(26) Harman23.cor, Harman Example 2.3\n",
" [8,10] count mean std min 25% 50% 75% max\n",
" cov.height 8 0.633 0.271179 0.301 0.394 0.639 0.84925 1.0\n",
" cov.arm.span 8 0.618375 0.295601 0.277 0.3635 0.6205 0.85475 1.0\n",
" cov.forearm 8 0.596 0.303648 0.237 0.3385 0.5905 0.824 1.0\n",
" cov.lower.leg 8 0.617875 0.279309 0.327 0.356 0.6185 0.83425 1.0\n",
" cov.weight 8 0.59825 0.222184 0.376 0.422 0.551 0.738 1.0\n",
"cov.bitro.diameter 8 0.53675 0.245921 0.319 0.32825 0.4875 0.62775 1.0\n",
" cov.chest.girth 8 0.49925 0.26719 0.237 0.295 0.433 0.61975 1.0\n",
" cov.chest.width 8 0.5315 0.216977 0.345 0.37775 0.477 0.59 1.0\n",
" center 8 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n",
" n.obs 8 305.0 0.0 305.0 305.0 305.0 305.0 305.0\n",
"\n",
"(27) Harman74.cor, Harman Example 7.4\n",
" [24,26] count mean std min 25% 50% 75% max\n",
" cov.VisualPerception 24 0.355333 0.16806 0.116 0.2985 0.329 0.4055 1.0\n",
" cov.Cubes 24 0.239625 0.184311 0.005 0.14875 0.231 0.28675 1.0\n",
" cov.PaperFormBoard 24 0.262875 0.191116 -0.075 0.177 0.2485 0.31325 1.0\n",
" cov.Flags 24 0.293542 0.183721 0.066 0.18025 0.3135 0.336 1.0\n",
" cov.GeneralInformation 24 0.385083 0.200596 0.187 0.2425 0.3195 0.4365 1.0\n",
" cov.PargraphComprehension 24 0.382583 0.206195 0.095 0.26 0.318 0.4335 1.0\n",
" cov.SentenceCompletion 24 0.371208 0.218879 0.157 0.22525 0.309 0.4405 1.0\n",
" cov.WordClassification 24 0.38775 0.178238 0.157 0.27325 0.3595 0.4455 1.0\n",
" cov.WordMeaning 24 0.3815 0.21788 0.113 0.2465 0.28 0.48825 1.0\n",
" cov.Addition 24 0.27775 0.217806 -0.075 0.1585 0.2245 0.339 1.0\n",
" cov.Code 24 0.337042 0.177146 0.091 0.24825 0.311 0.37125 1.0\n",
" cov.CountingDots 24 0.286542 0.202825 0.095 0.14375 0.2305 0.35125 1.0\n",
"cov.StraightCurvedCapitals 24 0.357667 0.168907 0.139 0.275 0.325 0.39825 1.0\n",
" cov.WordRecognition 24 0.268458 0.178211 0.066 0.1815 0.2425 0.3025 1.0\n",
" cov.NumberRecognition 24 0.253292 0.17609 0.065 0.17025 0.235 0.26 1.0\n",
" cov.FigureRecognition 24 0.317042 0.165031 0.119 0.261 0.2935 0.345 1.0\n",
" cov.ObjectNumber 24 0.292042 0.174138 0.005 0.2045 0.2735 0.3245 1.0\n",
" cov.NumberFigure 24 0.325333 0.161609 0.159 0.25075 0.317 0.35175 1.0\n",
" cov.FigureWord 24 0.281417 0.170392 0.11 0.1915 0.2665 0.315 1.0\n",
" cov.Deduction 24 0.364792 0.165544 0.167 0.2695 0.352 0.429 1.0\n",
" cov.NumericalPuzzles 24 0.358583 0.159254 0.165 0.296 0.352 0.4005 1.0\n",
" cov.ProblemReasoning 24 0.362208 0.16369 0.16 0.26725 0.3495 0.40025 1.0\n",
" cov.SeriesCompletion 24 0.40325 0.154898 0.242 0.299 0.3825 0.45675 1.0\n",
" cov.ArithmeticProblems 24 0.384167 0.159249 0.165 0.2985 0.3815 0.42625 1.0\n",
" center 24 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n",
" n.obs 24 145.0 0.0 145.0 145.0 145.0 145.0 145.0\n",
"\n",
"(28) Indometh, Pharmacokinetics of Indomethacin\n",
" [66,3] count mean std min 25% 50% 75% max\n",
"Subject 66 3.5 1.720912 1.0 2.0 3.5 5.0 6.0\n",
" time 66 2.886364 2.464432 0.25 0.75 2.0 5.0 8.0\n",
" conc 66 0.591818 0.632584 0.05 0.11 0.34 0.8325 2.72\n",
"\n",
"(29) infert, Infertility after Spontaneous and Induced Abortion\n",
" [248,7] count mean std min 25% 50% 75% max\n",
" age 248 31.504032 5.251565 21 28.0 31.0 35.25 44\n",
" parity 248 2.092742 1.251504 1 1.0 2.0 3.0 6\n",
" induced 248 0.572581 0.738457 0 0.0 0.0 1.0 2\n",
" case 248 0.334677 0.472832 0 0.0 0.0 1.0 1\n",
" spontaneous 248 0.576613 0.732541 0 0.0 0.0 1.0 2\n",
" stratum 248 41.870968 23.968423 1 21.0 42.0 62.25 83\n",
"pooled.stratum 248 33.580645 17.2721 1 19.0 36.0 48.25 63\n",
"\n",
"(30) InsectSprays, Effectiveness of Insect Sprays\n",
"[72,1] count mean std min 25% 50% 75% max\n",
" count 72 9.5 7.203286 0 3.0 7.0 14.25 26\n",
"\n",
"(31) iris, Edgar Anderson's Iris Data\n",
" [150,4] count mean std min 25% 50% 75% max\n",
"Sepal.Length 150 5.843333 0.828066 4.3 5.1 5.8 6.4 7.9\n",
" Sepal.Width 150 3.057333 0.435866 2.0 2.8 3.0 3.3 4.4\n",
"Petal.Length 150 3.758 1.765298 1.0 1.6 4.35 5.1 6.9\n",
" Petal.Width 150 1.199333 0.762238 0.1 0.3 1.3 1.8 2.5\n",
"\n",
"(32) iris3, Edgar Anderson's Iris Data\n",
" [50,12] count mean std min 25% 50% 75% max\n",
" Sepal L..Setosa 50 5.006 0.35249 4.3 4.8 5.0 5.2 5.8\n",
" Sepal W..Setosa 50 3.428 0.379064 2.3 3.2 3.4 3.675 4.4\n",
" Petal L..Setosa 50 1.462 0.173664 1.0 1.4 1.5 1.575 1.9\n",
" Petal W..Setosa 50 0.246 0.105386 0.1 0.2 0.2 0.3 0.6\n",
"Sepal L..Versicolor 50 5.936 0.516171 4.9 5.6 5.9 6.3 7.0\n",
"Sepal W..Versicolor 50 2.77 0.313798 2.0 2.525 2.8 3.0 3.4\n",
"Petal L..Versicolor 50 4.26 0.469911 3.0 4.0 4.35 4.6 5.1\n",
"Petal W..Versicolor 50 1.326 0.197753 1.0 1.2 1.3 1.5 1.8\n",
" Sepal L..Virginica 50 6.588 0.63588 4.9 6.225 6.5 6.9 7.9\n",
" Sepal W..Virginica 50 2.974 0.322497 2.2 2.8 3.0 3.175 3.8\n",
" Petal L..Virginica 50 5.552 0.551895 4.5 5.1 5.55 5.875 6.9\n",
" Petal W..Virginica 50 2.026 0.27465 1.4 1.8 2.0 2.3 2.5\n",
"\n",
"(33) islands, Areas of the World's Major Landmasses\n",
"[48,1] count mean std min 25% 50% 75% max\n",
" dat 48 1252.729167 3371.145735 12 20.5 41.0 183.25 16988\n",
"\n",
"(34) JohnsonJohnson, Quarterly Earnings per Johnson & Johnson Share\n",
"[84,2] count mean std min 25% 50% 75% max\n",
" time 84 1970.375 6.098155 1960.0 1965.1875 1970.375 1975.5625 1980.75\n",
" value 84 4.799762 4.309991 0.44 1.2475 3.51 7.1325 16.2\n",
"\n",
"(35) LakeHuron, Level of Lake Huron 1875-1972\n",
"[98,2] count mean std min 25% 50% 75% max\n",
" time 98 1923.5 28.434134 1875.0 1899.25 1923.5 1947.75 1972.0\n",
" value 98 579.004082 1.318299 575.96 578.135 579.12 579.875 581.86\n",
"\n",
"(36) lh, Luteinizing Hormone in Blood Samples\n",
"[48,2] count mean std min 25% 50% 75% max\n",
" time 48 24.5 14.0 1.0 12.75 24.5 36.25 48.0\n",
" value 48 2.4 0.551593 1.4 2.0 2.3 2.75 3.5\n",
"\n",
"(37) LifeCycleSavings, Intercountry Life-Cycle Savings Data\n",
"[50,5] count mean std min 25% 50% 75% max\n",
" sr 50 9.671 4.480407 0.6 6.97 10.51 12.6175 21.1\n",
" pop15 50 35.0896 9.151727 21.44 26.215 32.575 44.065 47.64\n",
" pop75 50 2.293 1.290771 0.56 1.125 2.175 3.325 4.7\n",
" dpi 50 1106.7584 990.868889 88.94 288.2075 695.665 1795.6225 4001.89\n",
" ddpi 50 3.7576 2.869871 0.22 2.0025 3.0 4.4775 16.71\n",
"\n",
"(38) Loblolly, Growth of Loblolly pine trees\n",
"[84,3] count mean std min 25% 50% 75% max\n",
"height 84 32.364405 20.673605 3.46 10.4675 34.0 51.3625 64.1\n",
" age 84 13.0 7.899977 3.0 5.0 12.5 20.0 25.0\n",
" Seed 84 316.142857 9.877738 301.0 307.0 317.0 325.0 331.0\n",
"\n",
"(39) longley, Longley's Economic Regression Data\n",
" [16,7] count mean std min 25% 50% 75% max\n",
"GNP.deflator 16 101.68125 10.791553 83.0 94.525 100.6 111.25 116.9\n",
" GNP 16 387.698437 99.394938 234.289 317.881 381.427 454.0855 554.894\n",
" Unemployed 16 319.33125 93.446425 187.0 234.825 314.35 384.25 480.6\n",
"Armed.Forces 16 260.66875 69.59196 145.6 229.8 271.75 306.075 359.4\n",
" Population 16 117.424 6.956102 107.608 111.7885 116.8035 122.304 130.081\n",
" Year 16 1954.5 4.760952 1947.0 1950.75 1954.5 1958.25 1962.0\n",
" Employed 16 65.317 3.511968 60.171 62.7125 65.504 68.2905 70.551\n",
"\n",
"(40) lynx, Annual Canadian Lynx trappings 1821-1934\n",
"[114,2] count mean std min 25% 50% 75% max\n",
" time 114 1877.5 33.052988 1821 1849.25 1877.5 1905.75 1934\n",
" value 114 1538.017544 1585.843914 39 348.25 771.0 2566.75 6991\n",
"\n",
"(41) morley, Michelson Speed of Light Data\n",
"[100,3] count mean std min 25% 50% 75% max\n",
" Expt 100 3.0 1.421338 1 2.0 3.0 4.0 5\n",
" Run 100 10.5 5.795331 1 5.75 10.5 15.25 20\n",
" Speed 100 852.4 79.010548 620 807.5 850.0 892.5 1070\n",
"\n",
"(42) mtcars, Motor Trend Car Road Tests\n",
"[32,11] count mean std min 25% 50% 75% max\n",
" mpg 32 20.090625 6.026948 10.4 15.425 19.2 22.8 33.9\n",
" cyl 32 6.1875 1.785922 4.0 4.0 6.0 8.0 8.0\n",
" disp 32 230.721875 123.938694 71.1 120.825 196.3 326.0 472.0\n",
" hp 32 146.6875 68.562868 52.0 96.5 123.0 180.0 335.0\n",
" drat 32 3.596563 0.534679 2.76 3.08 3.695 3.92 4.93\n",
" wt 32 3.21725 0.978457 1.513 2.58125 3.325 3.61 5.424\n",
" qsec 32 17.84875 1.786943 14.5 16.8925 17.71 18.9 22.9\n",
" vs 32 0.4375 0.504016 0.0 0.0 0.0 1.0 1.0\n",
" am 32 0.40625 0.498991 0.0 0.0 0.0 1.0 1.0\n",
" gear 32 3.6875 0.737804 3.0 3.0 4.0 4.0 5.0\n",
" carb 32 2.8125 1.6152 1.0 2.0 2.0 4.0 8.0\n",
"\n",
"(43) nhtemp, Average Yearly Temperatures in New Haven\n",
"[60,2] count mean std min 25% 50% 75% max\n",
" time 60 1941.5 17.464249 1912.0 1926.75 1941.5 1956.25 1971.0\n",
" value 60 51.16 1.265608 47.9 50.575 51.2 51.9 54.6\n",
"\n",
"(44) Nile, Flow of the River Nile\n",
"[100,2] count mean std min 25% 50% 75% max\n",
" time 100 1920.5 29.011492 1871 1895.75 1920.5 1945.25 1970\n",
" value 100 919.35 169.227501 456 798.5 893.5 1032.5 1370\n",
"\n",
"(45) nottem, Average Monthly Temperatures at Nottingham, 1920-1939\n",
"[240,2] count mean std min 25% 50% 75% max\n",
" time 240 1929.958333 5.785518 1920.0 1924.979167 1929.958333 1934.9375 1939.91666666667\n",
" value 240 49.039583 8.572324 31.3 41.55 47.35 57.0 66.5\n",
"\n",
"(46) npk, Classical N, P, K Factorial Experiment\n",
"[24,5] count mean std min 25% 50% 75% max\n",
" block 24 3.5 1.744557 1.0 2.0 3.5 5.0 6.0\n",
" N 24 0.5 0.510754 0.0 0.0 0.5 1.0 1.0\n",
" P 24 0.5 0.510754 0.0 0.0 0.5 1.0 1.0\n",
" K 24 0.5 0.510754 0.0 0.0 0.5 1.0 1.0\n",
" yield 24 54.875 6.172749 44.2 49.725 55.65 58.625 69.5\n",
"\n",
"(47) occupationalStatus, Occupational Status of Fathers and their Sons\n",
" [64,3] count mean std min 25% 50% 75% max\n",
" origin 64 4.5 2.309401 1 2.75 4.5 6.25 8\n",
"destination 64 4.5 2.309401 1 2.75 4.5 6.25 8\n",
" Freq 64 54.65625 81.999462 0 12.0 25.5 65.25 554\n",
"\n",
"(48) Orange, Growth of Orange Trees\n",
" [35,3] count mean std min 25% 50% 75% max\n",
" Tree 35 3.0 1.43486 1 2.0 3.0 4.0 5\n",
" age 35 922.142857 491.864528 118 484.0 1004.0 1372.0 1582\n",
"circumference 35 115.857143 57.488179 30 65.5 115.0 161.5 214\n",
"\n",
"(49) OrchardSprays, Potency of Orchard Sprays\n",
" [64,3] count mean std min 25% 50% 75% max\n",
"decrease 64 45.421875 35.574561 2 12.75 41.0 72.0 130\n",
" rowpos 64 4.5 2.309401 1 2.75 4.5 6.25 8\n",
" colpos 64 4.5 2.309401 1 2.75 4.5 6.25 8\n",
"\n",
"(50) PlantGrowth, Results from an Experiment on Plant Growth\n",
"[30,1] count mean std min 25% 50% 75% max\n",
"weight 30 5.073 0.701192 3.59 4.55 5.155 5.53 6.31\n",
"\n",
"(51) precip, Annual Precipitation in US Cities\n",
"[70,1] count mean std min 25% 50% 75% max\n",
" dat 70 34.885714 13.70665 7.0 29.375 36.6 42.775 67.0\n",
"\n",
"(52) presidents, Quarterly Approval Ratings of US Presidents\n",
"[120,1] count mean std min 25% 50% 75% max\n",
" time 120 1959.875 8.696264 1945.0 1952.4375 1959.875 1967.3125 1974.75\n",
"\n",
"(53) pressure, Vapor Pressure of Mercury as a Function of Temperature\n",
" [19,2] count mean std min 25% 50% 75% max\n",
"temperature 19 180.0 112.546287 0.0 90.0 180.0 270.0 360.0\n",
" pressure 19 124.336705 224.62254 0.0002 0.18 8.8 126.5 806.0\n",
"\n",
"(54) Puromycin, Reaction Velocity of an Enzymatic Reaction\n",
"[23,2] count mean std min 25% 50% 75% max\n",
" conc 23 0.312174 0.36313 0.02 0.06 0.11 0.56 1.1\n",
" rate 23 126.826087 47.513302 47.0 91.5 124.0 158.5 207.0\n",
"\n",
"(55) quakes, Locations of Earthquakes off Fiji\n",
"[1000,5] count mean std min 25% 50% 75% max\n",
" lat 1000 -20.64275 5.028791 -38.59 -23.47 -20.3 -17.6375 -10.72\n",
" long 1000 179.46202 6.069497 165.67 179.62 181.41 183.2 188.13\n",
" depth 1000 311.371 215.535498 40.0 99.0 247.0 543.0 680.0\n",
" mag 1000 4.6204 0.402773 4.0 4.3 4.6 4.9 6.4\n",
"stations 1000 33.418 21.900386 10.0 18.0 27.0 42.0 132.0\n",
"\n",
"(56) randu, Random Numbers from Congruential Generator RANDU\n",
"[400,3] count mean std min 25% 50% 75% max\n",
" x 400 0.526429 0.285012 3.1e-05 0.300312 0.540788 0.778623 0.99985\n",
" y 400 0.486053 0.293718 0.000183 0.227744 0.483379 0.73992 0.999939\n",
" z 400 0.480955 0.279035 2.9e-05 0.25206 0.463328 0.71141 0.998243\n",
"\n",
"(57) rivers, Lengths of Major North American Rivers\n",
"[141,1] count mean std min 25% 50% 75% max\n",
" dat 141 591.184397 493.870842 135 310.0 425.0 680.0 3710\n",
"\n",
"(58) rock, Measurements on Petroleum Rock Samples\n",
"[48,4] count mean std min 25% 50% 75% max\n",
" area 48 7187.729167 2683.848862 1016.0 5305.25 7487.0 8869.5 12212.0\n",
" peri 48 2682.211938 1431.661164 308.642 1414.9075 2536.195 3989.5225 4864.22\n",
" shape 48 0.21811 0.083496 0.0903296 0.162262 0.198862 0.26267 0.464125\n",
" perm 48 415.45 437.818226 6.3 76.45 130.5 777.5 1300.0\n",
"\n",
"(59) Seatbelts, Road Casualties in Great Britain 1969-84\n",
" [192,8] count mean std min 25% 50% 75% max\n",
"DriversKilled 192 122.802083 25.379886 60.0 104.75 118.5 138.0 198.0\n",
" drivers 192 1670.307292 289.610958 1057.0 1461.75 1631.0 1850.75 2654.0\n",
" front 192 837.21875 175.098967 426.0 715.5 828.5 950.75 1299.0\n",
" rear 192 401.208333 83.10221 224.0 344.75 401.5 456.25 646.0\n",
" kms 192 14993.604167 2938.049207 7685.0 12685.0 14987.0 17202.5 21626.0\n",
" PetrolPrice 192 0.103624 0.012176 0.0811788933269884 0.092577 0.104477 0.114056 0.133027420877451\n",
" VanKilled 192 9.057292 3.636903 2.0 6.0 8.0 12.0 17.0\n",
" law 192 0.119792 0.325567 0.0 0.0 0.0 0.0 1.0\n",
"\n",
"(60) sleep, Student's Sleep Data\n",
"[20,3] count mean std min 25% 50% 75% max\n",
" extra 20 1.54 2.01792 -1.6 -0.025 0.95 3.4 5.5\n",
" group 20 1.5 0.512989 1.0 1.0 1.5 2.0 2.0\n",
" ID 20 5.5 2.946898 1.0 3.0 5.5 8.0 10.0\n",
"\n",
"(61) stackloss, Brownlee's Stack Loss Plant Data\n",
" [21,4] count mean std min 25% 50% 75% max\n",
" Air.Flow 21 60.428571 9.168268 50 56.0 58.0 62.0 80\n",
"Water.Temp 21 21.095238 3.160771 17 18.0 20.0 24.0 27\n",
"Acid.Conc. 21 86.285714 5.358571 72 82.0 87.0 89.0 93\n",
"stack.loss 21 17.52381 10.171623 7 11.0 15.0 19.0 42\n",
"\n",
"(62) sunspot.month, Monthly Sunspot Data, from 1749 to \"Present\"\n",
"[3177,2] count mean std min 25% 50% 75% max\n",
" time 3177 1881.333333 76.438769 1749.0 1815.166667 1881.333333 1947.5 2013.66666666667\n",
" value 3177 51.96481 44.125236 0.0 15.7 42.0 76.4 253.8\n",
"\n",
"(63) sunspot.year, Yearly Sunspot Data, 1700-1988\n",
"[289,2] count mean std min 25% 50% 75% max\n",
" time 289 1844.0 83.571327 1700.0 1772.0 1844.0 1916.0 1988.0\n",
" value 289 48.613495 39.474103 0.0 15.6 39.0 68.9 190.2\n",
"\n",
"(64) sunspots, Monthly Sunspot Numbers, 1749-1983\n",
"[2820,2] count mean std min 25% 50% 75% max\n",
" time 2820 1866.458333 67.850684 1749.0 1807.729167 1866.458333 1925.1875 1983.91666667\n",
" value 2820 51.265957 43.448971 0.0 15.7 42.0 74.925 253.8\n",
"\n",
"(65) swiss, Swiss Fertility and Socioeconomic Indicators (1888) Data\n",
" [47,6] count mean std min 25% 50% 75% max\n",
" Fertility 47 70.142553 12.491697 35.0 64.7 70.4 78.45 92.5\n",
" Agriculture 47 50.659574 22.711218 1.2 35.9 54.1 67.65 89.7\n",
" Examination 47 16.489362 7.977883 3.0 12.0 16.0 22.0 37.0\n",
" Education 47 10.978723 9.615407 1.0 6.0 8.0 12.0 53.0\n",
" Catholic 47 41.14383 41.70485 2.15 5.195 15.14 93.125 100.0\n",
"Infant.Mortality 47 19.942553 2.912697 10.8 18.15 20.0 21.7 26.6\n",
"\n",
"(66) Theoph, Pharmacokinetics of Theophylline\n",
"[132,5] count mean std min 25% 50% 75% max\n",
"Subject 132 6.5 3.465203 1.0 3.75 6.5 9.25 12.0\n",
" Wt 132 69.583333 9.133181 54.6 63.575 70.5 74.425 86.4\n",
" Dose 132 4.625833 0.718074 3.1 4.305 4.53 5.0375 5.86\n",
" Time 132 5.894621 6.925952 0.0 0.595 3.53 9.0 24.65\n",
" conc 132 4.960455 2.867319 0.0 2.8775 5.275 7.14 11.4\n",
"\n",
"(67) Titanic, Survival of passengers on the Titanic\n",
"[32,1] count mean std min 25% 50% 75% max\n",
" Freq 32 68.78125 135.995905 0 0.75 13.5 77.0 670\n",
"\n",
"(68) ToothGrowth, The Effect of Vitamin C on Tooth Growth in Guinea Pigs\n",
"[60,2] count mean std min 25% 50% 75% max\n",
" len 60 18.813333 7.649315 4.2 13.075 19.25 25.275 33.9\n",
" dose 60 1.166667 0.628872 0.5 0.5 1.0 2.0 2.0\n",
"\n",
"(69) treering, Yearly Treering Data, -6000-1979\n",
"[7980,2] count mean std min 25% 50% 75% max\n",
" time 7980 -2010.5 2303.771907 -6000.0 -4005.25 -2010.5 -15.75 1979.0\n",
" value 7980 0.996836 0.300358 0.0 0.837 1.034 1.197 1.908\n",
"\n",
"(70) trees, Diameter, Height and Volume for Black Cherry Trees\n",
"[31,3] count mean std min 25% 50% 75% max\n",
" Girth 31 13.248387 3.138139 8.3 11.05 12.9 15.25 20.6\n",
"Height 31 76.0 6.371813 63.0 72.0 76.0 80.0 87.0\n",
"Volume 31 30.170968 16.437846 10.2 19.4 24.2 37.3 77.0\n",
"\n",
"(71) UCBAdmissions, Student Admissions at UC Berkeley\n",
"[24,1] count mean std min 25% 50% 75% max\n",
" Freq 24 188.583333 140.063624 8 80.0 170.0 302.5 512\n",
"\n",
"(72) UKDriverDeaths, Road Casualties in Great Britain 1969-84\n",
"[192,2] count mean std min 25% 50% 75% max\n",
" time 192 1976.958333 4.630815 1969.0 1972.979167 1976.958333 1980.9375 1984.91666666667\n",
" value 192 1670.307292 289.610958 1057.0 1461.75 1631.0 1850.75 2654.0\n",
"\n",
"(73) UKgas, UK Quarterly Gas Consumption\n",
"[108,2] count mean std min 25% 50% 75% max\n",
" time 108 1973.375 7.83023 1960.0 1966.6875 1973.375 1980.0625 1986.75\n",
" value 108 337.630556 251.334776 84.8 153.3 220.9 469.9 1163.9\n",
"\n",
"(74) USAccDeaths, Accidental Deaths in the US 1973-1978\n",
"[72,2] count mean std min 25% 50% 75% max\n",
" time 72 1975.958333 1.744037 1973.0 1974.479167 1975.958333 1977.4375 1978.91666666667\n",
" value 72 8788.791667 957.752606 6892.0 8089.0 8728.5 9323.25 11317.0\n",
"\n",
"(75) USArrests, Violent Crime Rates by US State\n",
" [50,4] count mean std min 25% 50% 75% max\n",
" Murder 50 7.788 4.35551 0.8 4.075 7.25 11.25 17.4\n",
" Assault 50 170.76 83.337661 45.0 109.0 159.0 249.0 337.0\n",
"UrbanPop 50 65.54 14.474763 32.0 54.5 66.0 77.75 91.0\n",
" Rape 50 21.232 9.366385 7.3 15.075 20.1 26.175 46.0\n",
"\n",
"(76) USJudgeRatings, Lawyers' Ratings of State Judges in the US Superior Court\n",
"[43,12] count mean std min 25% 50% 75% max\n",
" CONT 43 7.437209 0.940877 5.7 6.85 7.3 7.9 10.6\n",
" INTG 43 8.02093 0.770145 5.9 7.55 8.1 8.55 9.2\n",
" DMNR 43 7.516279 1.143705 4.3 6.9 7.7 8.35 9.0\n",
" DILG 43 7.693023 0.900898 5.1 7.15 7.8 8.45 9.0\n",
" CFMG 43 7.47907 0.86011 5.4 7.0 7.6 8.05 8.7\n",
" DECI 43 7.565116 0.802936 5.7 7.1 7.7 8.15 8.8\n",
" PREP 43 7.467442 0.95337 4.8 6.9 7.7 8.2 9.1\n",
" FAMI 43 7.488372 0.948987 5.1 6.95 7.6 8.25 9.1\n",
" ORAL 43 7.293023 1.010044 4.7 6.85 7.5 8.0 8.9\n",
" WRIT 43 7.383721 0.961133 4.9 6.9 7.6 8.05 9.0\n",
" PHYS 43 7.934884 0.939575 4.7 7.7 8.1 8.5 9.1\n",
" RTEN 43 7.602326 1.100971 4.8 7.15 7.8 8.25 9.2\n",
"\n",
"(77) USPersonalExpenditure, Personal Expenditure Data\n",
"[5,5] count mean std min 25% 50% 75% max\n",
" 1940 5 7.5222 9.135526 0.341 1.04 3.53 10.5 22.2\n",
" 1945 5 13.7428 18.126113 0.974 1.98 5.76 15.5 44.5\n",
" 1950 5 20.512 24.459026 1.8 2.45 9.71 29.0 59.6\n",
" 1955 5 25.94 29.750597 2.6 3.4 14.0 36.5 73.2\n",
" 1960 5 32.628 34.761213 3.64 5.4 21.1 46.2 86.8\n",
"\n",
"(78) uspop, Populations Recorded by the US Census\n",
"[19,2] count mean std min 25% 50% 75% max\n",
" time 19 1880.0 56.273143 1790.0 1835.0 1880.0 1925.0 1970.0\n",
" value 19 69.769474 63.207036 3.93 15.0 50.2 114.25 203.2\n",
"\n",
"(79) VADeaths, Death Rates in Virginia (1940)\n",
" [5,4] count mean std min 25% 50% 75% max\n",
" Rural Male 5 32.74 21.596134 11.7 18.1 26.9 41.0 66.0\n",
"Rural Female 5 25.18 18.424223 8.7 11.7 20.3 30.9 54.3\n",
" Urban Male 5 40.48 22.582449 15.4 24.3 37.0 54.6 71.1\n",
"Urban Female 5 25.28 17.063323 8.4 13.6 19.3 35.1 50.0\n",
"\n",
"(80) volcano, Topographic Information on Auckland's Maunga Whau Volcano\n",
"[87,61] count mean std min 25% 50% 75% max\n",
" V1 87 110.586207 6.902227 97 106.5 111.0 115.0 124\n",
" V2 87 111.827586 7.565538 97 107.5 113.0 116.0 128\n",
" V3 87 112.954023 8.203669 97 108.0 114.0 117.0 131\n",
" V4 87 114.114943 8.735686 98 108.5 115.0 118.0 134\n",
" V5 87 115.126437 9.295916 98 109.0 116.0 119.0 136\n",
" ... ... ... ... ... ... ... ... ...\n",
" V57 87 107.367816 8.405584 94 101.5 108.0 111.0 124\n",
" V58 87 105.827586 6.844123 94 100.5 106.0 110.0 119\n",
" V59 87 104.632184 5.775077 94 100.0 106.0 108.0 116\n",
" V60 87 103.804598 5.209203 94 100.0 105.0 107.0 113\n",
" V61 87 103.16092 4.874885 94 100.0 104.0 107.0 110\n",
"\n",
"(81) warpbreaks, The Number of Breaks in Yarn during Weaving\n",
"[54,1] count mean std min 25% 50% 75% max\n",
"breaks 54 28.148148 13.198638 10 18.25 26.0 34.0 70\n",
"\n",
"(82) women, Average Heights and Weights for American Women\n",
"[15,2] count mean std min 25% 50% 75% max\n",
"height 15 65.0 4.472136 58 61.5 65.0 68.5 72\n",
"weight 15 136.733333 15.498694 115 124.5 135.0 148.0 164\n",
"\n",
"(83) WorldPhones, The World's Telephones\n",
" [7,7] count mean std min 25% 50% 75% max\n",
" N.Amer 7 66747.571429 11277.462508 45939 62572.0 68484.0 73917.5 79831\n",
" Europe 7 34343.428571 7195.616857 21574 31250.0 35218.0 38969.5 43173\n",
" Asia 7 6229.285714 2124.214578 2876 4969.0 6662.0 7538.0 9053\n",
" S.Amer 7 2772.285714 496.687599 1815 2631.5 2845.0 3072.5 3338\n",
" Oceania 7 2625.0 523.063094 1646 2446.0 2691.0 2961.0 3224\n",
" Africa 7 1484.0 647.706981 89 1478.5 1663.0 1837.0 2005\n",
"Mid.Amer 7 841.714286 176.124685 555 753.0 836.0 959.5 1076\n",
"\n",
"(84) WWWusage, Internet Usage per Minute\n",
"[100,2] count mean std min 25% 50% 75% max\n",
" time 100 50.5 29.011492 1 25.75 50.5 75.25 100\n",
" value 100 137.08 39.999414 83 99.0 138.5 167.5 228\n",
"\n"
]
}
],
"source": [
"dataset_list = Datasets::RdatasetsList.new\n",
"\n",
"package = 'datasets'\n",
"\n",
"dataset_list.filter(package: package).each.with_index(1) do |ds, i|\n",
" puts \"(#{i}) #{ds.dataset}, #{ds.title}\"\n",
" dataset = Datasets::Rdatasets.new(package, ds.dataset)\n",
" df = Rover::DataFrame.new(dataset.to_table.to_h)\n",
" puts df.summary\n",
" puts\n",
"end; nil"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab2ec2f9-2d38-433e-97b9-3f848cb450f3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "27886f8d-2006-4bd6-9605-f53a5ee43a00",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a917886b-5b72-4b3b-b5d3-cd1f58629ddf",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Ruby 3.1.1",
"language": "ruby",
"name": "ruby"
},
"language_info": {
"file_extension": ".rb",
"mimetype": "application/x-ruby",
"name": "ruby",
"version": "3.1.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
module My
module RoverVectorPrepender
def mean
@data.cast_to(Numo::DFloat).mean(nan: true)
end
def median
@data.cast_to(Numo::DFloat).median(nan: true)
end
def std
@data.cast_to(Numo::DFloat).stddev(nan: true)
end
def var
@data.cast_to(Numo::DFloat).var(nan: true)
end
end
module RoverDataFramePrepender
# Show statistical summary of self
# - Returns DataFrame
# - Make stats for numeric columns only
# - 1st column header indicates [n of rows, n of numeric columns]
# - Int type columns are casted to Float64 in mean, std, var (by Vector)
# - NaNs are ignored using (nan: true) option in Numo
# - counts also show non-NaN counts
def summary
num_keys = self.keys.select {|key| self[key].numeric?}
nrow, _ = self.shape
key0 = :"[#{nrow},#{num_keys.size}]"
round = 6
hash = {key0 => num_keys}
hash["count"] = num_keys.map {|k| self[k].missing.to_numo.count_false }
hash["mean"] = num_keys.map {|k| self[k].mean.round(round) }
hash["std"] = num_keys.map {|k| self[k].std.round(round) }
hash["min"] = num_keys.map {|k| self[k].min }
hash["25%"] = num_keys.map {|k| self[k].percentile(25).round(round) }
hash["50%"] = num_keys.map {|k| self[k].percentile(50).round(round) }
hash["75%"] = num_keys.map {|k| self[k].percentile(75).round(round) }
hash["max"] = num_keys.map {|k| self[k].max }
Rover::DataFrame.new(hash)
end
# This method may be abandoned
# - My first implementation
# - Counts sould be Int but casted to Float
def summary_T
num_keys = self.keys.select {|key| self[key].numeric?}
# use key of 1st column to show n_rows and n_of_numeric_columns
nrow, _ = self.shape
key0 = :"[#{nrow},#{num_keys.size}]"
round = 6
ary = [] <<
num_keys.each_with_object({key0 => "count"}) {|k, h| h[k] = self[k].missing.to_numo.count_false } <<
num_keys.each_with_object({key0 => "mean"}) {|k, h| h[k] = self[k].mean.round(round) } <<
num_keys.each_with_object({key0 => "std"}) {|k, h| h[k] = self[k].std.round(round) } <<
num_keys.each_with_object({key0 => "min"}) {|k, h| h[k] = self[k].min } <<
num_keys.each_with_object({key0 => "25%"}) {|k, h| h[k] = self[k].percentile(25).round(round) } <<
num_keys.each_with_object({key0 => "50%"}) {|k, h| h[k] = self[k].percentile(50).round(round) } <<
num_keys.each_with_object({key0 => "75%"}) {|k, h| h[k] = self[k].percentile(75).round(round) } <<
num_keys.each_with_object({key0 => "max"}) {|k, h| h[k] = self[k].max }
Rover::DataFrame.new(ary)
end
end
end
Rover::Vector.prepend My::RoverVectorPrepender
Rover::DataFrame.prepend My::RoverDataFramePrepender
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment