Skip to content

Instantly share code, notes, and snippets.

@Tdual
Created May 2, 2018 08:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Tdual/55e7efc217424c16b09e03ccc2728538 to your computer and use it in GitHub Desktop.
Save Tdual/55e7efc217424c16b09e03ccc2728538 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"import pandas as pd\n",
"from collections import OrderedDict\n",
"\n",
"iris = datasets.load_iris()\n",
"\n",
"df = pd.concat([pd.DataFrame(iris.data,columns=iris.feature_names),pd.DataFrame(iris.target,columns=[\"species\"])],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal length (cm)</th>\n",
" <th>sepal width (cm)</th>\n",
" <th>petal length (cm)</th>\n",
" <th>petal width (cm)</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>3.6</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n",
"0 5.1 3.5 1.4 0.2 \n",
"1 4.9 3.0 1.4 0.2 \n",
"2 4.7 3.2 1.3 0.2 \n",
"3 4.6 3.1 1.5 0.2 \n",
"4 5.0 3.6 1.4 0.2 \n",
"\n",
" species \n",
"0 0 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal length (cm)</th>\n",
" <th>sepal width (cm)</th>\n",
" <th>petal length (cm)</th>\n",
" <th>petal width (cm)</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>150.000000</td>\n",
" <td>150.000000</td>\n",
" <td>150.000000</td>\n",
" <td>150.000000</td>\n",
" <td>150.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>5.843333</td>\n",
" <td>3.054000</td>\n",
" <td>3.758667</td>\n",
" <td>1.198667</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.828066</td>\n",
" <td>0.433594</td>\n",
" <td>1.764420</td>\n",
" <td>0.763161</td>\n",
" <td>0.819232</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.300000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.100000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>5.100000</td>\n",
" <td>2.800000</td>\n",
" <td>1.600000</td>\n",
" <td>0.300000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>5.800000</td>\n",
" <td>3.000000</td>\n",
" <td>4.350000</td>\n",
" <td>1.300000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>6.400000</td>\n",
" <td>3.300000</td>\n",
" <td>5.100000</td>\n",
" <td>1.800000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>7.900000</td>\n",
" <td>4.400000</td>\n",
" <td>6.900000</td>\n",
" <td>2.500000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal length (cm) sepal width (cm) petal length (cm) \\\n",
"count 150.000000 150.000000 150.000000 \n",
"mean 5.843333 3.054000 3.758667 \n",
"std 0.828066 0.433594 1.764420 \n",
"min 4.300000 2.000000 1.000000 \n",
"25% 5.100000 2.800000 1.600000 \n",
"50% 5.800000 3.000000 4.350000 \n",
"75% 6.400000 3.300000 5.100000 \n",
"max 7.900000 4.400000 6.900000 \n",
"\n",
" petal width (cm) species \n",
"count 150.000000 150.000000 \n",
"mean 1.198667 1.000000 \n",
"std 0.763161 0.819232 \n",
"min 0.100000 0.000000 \n",
"25% 0.300000 0.000000 \n",
"50% 1.300000 1.000000 \n",
"75% 1.800000 2.000000 \n",
"max 2.500000 2.000000 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>species</th>\n",
" <th>sepal length (cm)</th>\n",
" <th>sepal width (cm)</th>\n",
" <th>petal length (cm)</th>\n",
" <th>petal width (cm)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>5.006</td>\n",
" <td>3.418</td>\n",
" <td>1.464</td>\n",
" <td>0.244</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>5.936</td>\n",
" <td>2.770</td>\n",
" <td>4.260</td>\n",
" <td>1.326</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>6.588</td>\n",
" <td>2.974</td>\n",
" <td>5.552</td>\n",
" <td>2.026</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" species sepal length (cm) sepal width (cm) petal length (cm) \\\n",
"0 0 5.006 3.418 1.464 \n",
"1 1 5.936 2.770 4.260 \n",
"2 2 6.588 2.974 5.552 \n",
"\n",
" petal width (cm) \n",
"0 0.244 \n",
"1 1.326 \n",
"2 2.026 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_groupby = df.groupby(\"species\",as_index=False)\n",
"df_groupby.mean()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>species</th>\n",
" <th>sepal length (cm)</th>\n",
" <th colspan=\"2\" halign=\"left\">sepal width (cm)</th>\n",
" <th colspan=\"2\" halign=\"left\">petal length (cm)</th>\n",
" <th colspan=\"3\" halign=\"left\">petal width (cm)</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>count</th>\n",
" <th>max</th>\n",
" <th>min</th>\n",
" <th>sum</th>\n",
" <th>var</th>\n",
" <th>std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>5.006</td>\n",
" <td>3.418</td>\n",
" <td>50</td>\n",
" <td>1.9</td>\n",
" <td>1.0</td>\n",
" <td>12.2</td>\n",
" <td>0.011494</td>\n",
" <td>0.107210</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>5.936</td>\n",
" <td>2.770</td>\n",
" <td>50</td>\n",
" <td>5.1</td>\n",
" <td>3.0</td>\n",
" <td>66.3</td>\n",
" <td>0.039106</td>\n",
" <td>0.197753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>6.588</td>\n",
" <td>2.974</td>\n",
" <td>50</td>\n",
" <td>6.9</td>\n",
" <td>4.5</td>\n",
" <td>101.3</td>\n",
" <td>0.075433</td>\n",
" <td>0.274650</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" species sepal length (cm) sepal width (cm) petal length (cm) \\\n",
" mean mean count max min \n",
"0 0 5.006 3.418 50 1.9 1.0 \n",
"1 1 5.936 2.770 50 5.1 3.0 \n",
"2 2 6.588 2.974 50 6.9 4.5 \n",
"\n",
" petal width (cm) \n",
" sum var std \n",
"0 12.2 0.011494 0.107210 \n",
"1 66.3 0.039106 0.197753 \n",
"2 101.3 0.075433 0.274650 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_groupby.agg({\"sepal length (cm)\": \"mean\",\n",
" \"sepal width (cm)\": [\"mean\",\"count\"],\n",
" \"petal length (cm)\": [\"max\",\"min\"],\n",
" \"petal width (cm)\": [\"sum\",\"var\",\"std\"]})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Environment (conda_py3.6)",
"language": "python",
"name": "conda_py3.6"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment