Skip to content

Instantly share code, notes, and snippets.

@edouardklein
Created October 30, 2022 15:27
Show Gist options
  • Save edouardklein/80a308102aec0f83babcfb499d73fa3f to your computer and use it in GitHub Desktop.
Save edouardklein/80a308102aec0f83babcfb499d73fa3f to your computer and use it in GitHub Desktop.
Chihuahua
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b9bf282a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Holder_id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>District</th>\n",
" <th>City_quarter</th>\n",
" <th>Primary_breed</th>\n",
" <th>RASSE1_MISCHLING</th>\n",
" <th>Secondary_breed</th>\n",
" <th>RASSE2_MISCHLING</th>\n",
" <th>Breed_type</th>\n",
" <th>Birth_year</th>\n",
" <th>Dog_gender</th>\n",
" <th>Color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>126</td>\n",
" <td>61-70</td>\n",
" <td>m</td>\n",
" <td>9.0</td>\n",
" <td>92.0</td>\n",
" <td>Welsh Terrier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>K</td>\n",
" <td>2011</td>\n",
" <td>w</td>\n",
" <td>schwarz/braun</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>171</td>\n",
" <td>61-70</td>\n",
" <td>m</td>\n",
" <td>3.0</td>\n",
" <td>31.0</td>\n",
" <td>Berner Sennenhund</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2009</td>\n",
" <td>m</td>\n",
" <td>tricolor</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>574</td>\n",
" <td>61-70</td>\n",
" <td>w</td>\n",
" <td>2.0</td>\n",
" <td>23.0</td>\n",
" <td>Mischling gross</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2013</td>\n",
" <td>w</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>695</td>\n",
" <td>41-50</td>\n",
" <td>m</td>\n",
" <td>6.0</td>\n",
" <td>63.0</td>\n",
" <td>Labrador Retriever</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2012</td>\n",
" <td>w</td>\n",
" <td>braun</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>893</td>\n",
" <td>61-70</td>\n",
" <td>w</td>\n",
" <td>7.0</td>\n",
" <td>71.0</td>\n",
" <td>Mittelschnauzer</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2010</td>\n",
" <td>w</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7150</th>\n",
" <td>135718</td>\n",
" <td>31-40</td>\n",
" <td>m</td>\n",
" <td>9.0</td>\n",
" <td>91.0</td>\n",
" <td>Mischling gross</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2015</td>\n",
" <td>w</td>\n",
" <td>golden</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7151</th>\n",
" <td>135725</td>\n",
" <td>31-40</td>\n",
" <td>w</td>\n",
" <td>7.0</td>\n",
" <td>74.0</td>\n",
" <td>Boxer</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2016</td>\n",
" <td>w</td>\n",
" <td>gelb/weiss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7152</th>\n",
" <td>135726</td>\n",
" <td>11-20</td>\n",
" <td>w</td>\n",
" <td>11.0</td>\n",
" <td>111.0</td>\n",
" <td>Pinscher</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>K</td>\n",
" <td>2016</td>\n",
" <td>w</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7153</th>\n",
" <td>135728</td>\n",
" <td>31-40</td>\n",
" <td>w</td>\n",
" <td>7.0</td>\n",
" <td>72.0</td>\n",
" <td>Whippet</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2016</td>\n",
" <td>w</td>\n",
" <td>vierfarbig</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7154</th>\n",
" <td>135731</td>\n",
" <td>21-30</td>\n",
" <td>m</td>\n",
" <td>9.0</td>\n",
" <td>91.0</td>\n",
" <td>Cocker Spaniel</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>K</td>\n",
" <td>2016</td>\n",
" <td>m</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7155 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" Holder_id Age Gender District City_quarter Primary_breed \\\n",
"0 126 61-70 m 9.0 92.0 Welsh Terrier \n",
"1 171 61-70 m 3.0 31.0 Berner Sennenhund \n",
"2 574 61-70 w 2.0 23.0 Mischling gross \n",
"3 695 41-50 m 6.0 63.0 Labrador Retriever \n",
"4 893 61-70 w 7.0 71.0 Mittelschnauzer \n",
"... ... ... ... ... ... ... \n",
"7150 135718 31-40 m 9.0 91.0 Mischling gross \n",
"7151 135725 31-40 w 7.0 74.0 Boxer \n",
"7152 135726 11-20 w 11.0 111.0 Pinscher \n",
"7153 135728 31-40 w 7.0 72.0 Whippet \n",
"7154 135731 21-30 m 9.0 91.0 Cocker Spaniel \n",
"\n",
" RASSE1_MISCHLING Secondary_breed RASSE2_MISCHLING Breed_type \\\n",
"0 NaN NaN NaN K \n",
"1 NaN NaN NaN I \n",
"2 NaN NaN NaN I \n",
"3 NaN NaN NaN I \n",
"4 NaN NaN NaN I \n",
"... ... ... ... ... \n",
"7150 NaN NaN NaN I \n",
"7151 NaN NaN NaN I \n",
"7152 NaN NaN NaN K \n",
"7153 NaN NaN NaN I \n",
"7154 NaN NaN NaN K \n",
"\n",
" Birth_year Dog_gender Color \n",
"0 2011 w schwarz/braun \n",
"1 2009 m tricolor \n",
"2 2013 w schwarz \n",
"3 2012 w braun \n",
"4 2010 w schwarz \n",
"... ... ... ... \n",
"7150 2015 w golden \n",
"7151 2016 w gelb/weiss \n",
"7152 2016 w schwarz \n",
"7153 2016 w vierfarbig \n",
"7154 2016 m schwarz \n",
"\n",
"[7155 rows x 13 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.read_csv('20170308hundehalter.csv')\n",
"\n",
"data = data.rename(columns={'ALTER': 'Age','GESCHLECHT':'Gender',\n",
" 'STADTKREIS': 'District', 'STADTQUARTIER' : 'City_quarter',\t'RASSE1': 'Primary_breed',\n",
" 'RASSE2':'Secondary_breed','GEBURTSJAHR_HUND': 'Birth_year',\n",
" 'GESCHLECHT_HUND': 'Dog_gender','HUNDEFARBE':'Color',\n",
" 'RASSENTYP':'Breed_type', 'HALTER_ID':'Holder_id'\n",
" })\n",
" \n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "850c2e69",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>Holder_id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>District</th>\n",
" <th>City_quarter</th>\n",
" <th>Primary_breed</th>\n",
" <th>RASSE1_MISCHLING</th>\n",
" <th>Secondary_breed</th>\n",
" <th>RASSE2_MISCHLING</th>\n",
" <th>Breed_type</th>\n",
" <th>Birth_year</th>\n",
" <th>Dog_gender</th>\n",
" <th>Color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>126</td>\n",
" <td>61-70</td>\n",
" <td>m</td>\n",
" <td>9.0</td>\n",
" <td>92.0</td>\n",
" <td>Welsh Terrier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>K</td>\n",
" <td>2011</td>\n",
" <td>w</td>\n",
" <td>schwarz/braun</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>171</td>\n",
" <td>61-70</td>\n",
" <td>m</td>\n",
" <td>3.0</td>\n",
" <td>31.0</td>\n",
" <td>Berner Sennenhund</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2009</td>\n",
" <td>m</td>\n",
" <td>tricolor</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>574</td>\n",
" <td>61-70</td>\n",
" <td>w</td>\n",
" <td>2.0</td>\n",
" <td>23.0</td>\n",
" <td>Mischling gross</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2013</td>\n",
" <td>w</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>695</td>\n",
" <td>41-50</td>\n",
" <td>m</td>\n",
" <td>6.0</td>\n",
" <td>63.0</td>\n",
" <td>Labrador Retriever</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2012</td>\n",
" <td>w</td>\n",
" <td>braun</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>893</td>\n",
" <td>61-70</td>\n",
" <td>w</td>\n",
" <td>7.0</td>\n",
" <td>71.0</td>\n",
" <td>Mittelschnauzer</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2010</td>\n",
" <td>w</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7150</th>\n",
" <td>7150</td>\n",
" <td>135718</td>\n",
" <td>31-40</td>\n",
" <td>m</td>\n",
" <td>9.0</td>\n",
" <td>91.0</td>\n",
" <td>Mischling gross</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2015</td>\n",
" <td>w</td>\n",
" <td>golden</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7151</th>\n",
" <td>7151</td>\n",
" <td>135725</td>\n",
" <td>31-40</td>\n",
" <td>w</td>\n",
" <td>7.0</td>\n",
" <td>74.0</td>\n",
" <td>Boxer</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2016</td>\n",
" <td>w</td>\n",
" <td>gelb/weiss</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7152</th>\n",
" <td>7152</td>\n",
" <td>135726</td>\n",
" <td>11-20</td>\n",
" <td>w</td>\n",
" <td>11.0</td>\n",
" <td>111.0</td>\n",
" <td>Pinscher</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>K</td>\n",
" <td>2016</td>\n",
" <td>w</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7153</th>\n",
" <td>7153</td>\n",
" <td>135728</td>\n",
" <td>31-40</td>\n",
" <td>w</td>\n",
" <td>7.0</td>\n",
" <td>72.0</td>\n",
" <td>Whippet</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2016</td>\n",
" <td>w</td>\n",
" <td>vierfarbig</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7154</th>\n",
" <td>7154</td>\n",
" <td>135731</td>\n",
" <td>21-30</td>\n",
" <td>m</td>\n",
" <td>9.0</td>\n",
" <td>91.0</td>\n",
" <td>Cocker Spaniel</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>K</td>\n",
" <td>2016</td>\n",
" <td>m</td>\n",
" <td>schwarz</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7155 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" index Holder_id Age Gender District City_quarter \\\n",
"0 0 126 61-70 m 9.0 92.0 \n",
"1 1 171 61-70 m 3.0 31.0 \n",
"2 2 574 61-70 w 2.0 23.0 \n",
"3 3 695 41-50 m 6.0 63.0 \n",
"4 4 893 61-70 w 7.0 71.0 \n",
"... ... ... ... ... ... ... \n",
"7150 7150 135718 31-40 m 9.0 91.0 \n",
"7151 7151 135725 31-40 w 7.0 74.0 \n",
"7152 7152 135726 11-20 w 11.0 111.0 \n",
"7153 7153 135728 31-40 w 7.0 72.0 \n",
"7154 7154 135731 21-30 m 9.0 91.0 \n",
"\n",
" Primary_breed RASSE1_MISCHLING Secondary_breed RASSE2_MISCHLING \\\n",
"0 Welsh Terrier NaN NaN NaN \n",
"1 Berner Sennenhund NaN NaN NaN \n",
"2 Mischling gross NaN NaN NaN \n",
"3 Labrador Retriever NaN NaN NaN \n",
"4 Mittelschnauzer NaN NaN NaN \n",
"... ... ... ... ... \n",
"7150 Mischling gross NaN NaN NaN \n",
"7151 Boxer NaN NaN NaN \n",
"7152 Pinscher NaN NaN NaN \n",
"7153 Whippet NaN NaN NaN \n",
"7154 Cocker Spaniel NaN NaN NaN \n",
"\n",
" Breed_type Birth_year Dog_gender Color \n",
"0 K 2011 w schwarz/braun \n",
"1 I 2009 m tricolor \n",
"2 I 2013 w schwarz \n",
"3 I 2012 w braun \n",
"4 I 2010 w schwarz \n",
"... ... ... ... ... \n",
"7150 I 2015 w golden \n",
"7151 I 2016 w gelb/weiss \n",
"7152 K 2016 w schwarz \n",
"7153 I 2016 w vierfarbig \n",
"7154 K 2016 m schwarz \n",
"\n",
"[7155 rows x 14 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We need a column whose value is unique per line. The index will do.\n",
"data.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7becef94",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>District</th>\n",
" <th>1.0</th>\n",
" <th>2.0</th>\n",
" <th>3.0</th>\n",
" <th>4.0</th>\n",
" <th>5.0</th>\n",
" <th>6.0</th>\n",
" <th>7.0</th>\n",
" <th>8.0</th>\n",
" <th>9.0</th>\n",
" <th>10.0</th>\n",
" <th>11.0</th>\n",
" <th>12.0</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Primary_breed</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Affenpinscher</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Afghane</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Afghanischer Windhund</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Airedale Terrier</th>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Akita Inu</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zwergpinscher</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>4</td>\n",
" <td>14</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zwergpudel</th>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>15</td>\n",
" <td>8</td>\n",
" <td>15</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zwergrauhaardackel</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zwergschnauzer</th>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zwergspitz</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>4</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>24</td>\n",
" <td>9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>304 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
"District 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 \\\n",
"Primary_breed \n",
"Affenpinscher 0 0 1 0 0 0 0 0 0 \n",
"Afghane 0 0 1 1 0 1 1 0 1 \n",
"Afghanischer Windhund 0 0 0 0 0 0 2 1 0 \n",
"Airedale Terrier 0 4 2 0 0 1 4 1 3 \n",
"Akita Inu 0 0 0 0 0 1 1 0 2 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"Zwergpinscher 0 3 7 3 3 4 6 1 11 \n",
"Zwergpudel 0 7 4 1 1 4 15 8 15 \n",
"Zwergrauhaardackel 1 0 0 1 0 2 2 2 0 \n",
"Zwergschnauzer 0 4 2 0 3 9 9 1 2 \n",
"Zwergspitz 3 3 8 2 7 8 6 4 10 \n",
"\n",
"District 10.0 11.0 12.0 \n",
"Primary_breed \n",
"Affenpinscher 0 0 0 \n",
"Afghane 0 1 0 \n",
"Afghanischer Windhund 0 0 0 \n",
"Airedale Terrier 1 0 1 \n",
"Akita Inu 0 1 0 \n",
"... ... ... ... \n",
"Zwergpinscher 4 14 6 \n",
"Zwergpudel 6 8 5 \n",
"Zwergrauhaardackel 0 1 0 \n",
"Zwergschnauzer 1 5 1 \n",
"Zwergspitz 4 24 9 \n",
"\n",
"[304 rows x 12 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Count how many of each breed per disctrict\n",
"pt = data.reset_index().pivot_table(columns=\"District\", index=\"Primary_breed\",\n",
" values=\"index\", aggfunc=\"count\", fill_value=0)\n",
"pt"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "61e65d6c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Holder_id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>District</th>\n",
" <th>City_quarter</th>\n",
" <th>Primary_breed</th>\n",
" <th>RASSE1_MISCHLING</th>\n",
" <th>Secondary_breed</th>\n",
" <th>RASSE2_MISCHLING</th>\n",
" <th>Breed_type</th>\n",
" <th>Birth_year</th>\n",
" <th>Dog_gender</th>\n",
" <th>Color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>84917</td>\n",
" <td>51-60</td>\n",
" <td>w</td>\n",
" <td>2.0</td>\n",
" <td>21.0</td>\n",
" <td>Airedale Terrier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2012</td>\n",
" <td>w</td>\n",
" <td>black/tan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2743</th>\n",
" <td>98087</td>\n",
" <td>81-90</td>\n",
" <td>m</td>\n",
" <td>2.0</td>\n",
" <td>24.0</td>\n",
" <td>Airedale Terrier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2005</td>\n",
" <td>w</td>\n",
" <td>black/tan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3407</th>\n",
" <td>103892</td>\n",
" <td>61-70</td>\n",
" <td>w</td>\n",
" <td>2.0</td>\n",
" <td>23.0</td>\n",
" <td>Airedale Terrier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2010</td>\n",
" <td>m</td>\n",
" <td>black/tan</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6687</th>\n",
" <td>132871</td>\n",
" <td>51-60</td>\n",
" <td>m</td>\n",
" <td>2.0</td>\n",
" <td>21.0</td>\n",
" <td>Airedale Terrier</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>I</td>\n",
" <td>2006</td>\n",
" <td>m</td>\n",
" <td>black/tan</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Holder_id Age Gender District City_quarter Primary_breed \\\n",
"1022 84917 51-60 w 2.0 21.0 Airedale Terrier \n",
"2743 98087 81-90 m 2.0 24.0 Airedale Terrier \n",
"3407 103892 61-70 w 2.0 23.0 Airedale Terrier \n",
"6687 132871 51-60 m 2.0 21.0 Airedale Terrier \n",
"\n",
" RASSE1_MISCHLING Secondary_breed RASSE2_MISCHLING Breed_type \\\n",
"1022 NaN NaN NaN I \n",
"2743 NaN NaN NaN I \n",
"3407 NaN NaN NaN I \n",
"6687 NaN NaN NaN I \n",
"\n",
" Birth_year Dog_gender Color \n",
"1022 2012 w black/tan \n",
"2743 2005 w black/tan \n",
"3407 2010 m black/tan \n",
"6687 2006 m black/tan "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Check that we get the correct number of lines as reported in the pivot table\n",
"data.query(\"Primary_breed == 'Airedale Terrier' and District == 2\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "044c8932",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"District\n",
"1.0 Chihuahua\n",
"2.0 Mischling klein\n",
"3.0 Mischling klein\n",
"4.0 Chihuahua\n",
"5.0 Chihuahua\n",
"6.0 Mischling klein\n",
"7.0 Mischling klein\n",
"8.0 Mischling klein\n",
"9.0 Mischling klein\n",
"10.0 Mischling klein\n",
"11.0 Chihuahua\n",
"12.0 Chihuahua\n",
"dtype: object"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# find the breed that has the max count for each district\n",
"pt.idxmax()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2672c9f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "/gnu/store/kg33ii73xhnl64ld34dv2afv66ayn7g8-python-3.9.9/bin/python3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment