Skip to content

Instantly share code, notes, and snippets.

@h5li
Created January 25, 2019 23:24
Show Gist options
  • Save h5li/568522da8f1c0a52ec91c6fb33d68da5 to your computer and use it in GitHub Desktop.
Save h5li/568522da8f1c0a52ec91c6fb33d68da5 to your computer and use it in GitHub Desktop.
Kaggle Session Winter 2019 No.1
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Intro to Pandas Solution Notebook 🥑 \n",
"Hey y'all, this notebook will walk you through the Kaggle session we had in week 3. We go over basic data selection and basic math/conditional operators."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Importing all the Python libraries that we need\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Date</th>\n",
" <th>AveragePrice</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>2015-12-27</td>\n",
" <td>1.33</td>\n",
" <td>64236.62</td>\n",
" <td>1036.74</td>\n",
" <td>54454.85</td>\n",
" <td>48.16</td>\n",
" <td>8696.87</td>\n",
" <td>8603.62</td>\n",
" <td>93.25</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2015-12-20</td>\n",
" <td>1.35</td>\n",
" <td>54876.98</td>\n",
" <td>674.28</td>\n",
" <td>44638.81</td>\n",
" <td>58.33</td>\n",
" <td>9505.56</td>\n",
" <td>9408.07</td>\n",
" <td>97.49</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>2015-12-13</td>\n",
" <td>0.93</td>\n",
" <td>118220.22</td>\n",
" <td>794.70</td>\n",
" <td>109149.67</td>\n",
" <td>130.50</td>\n",
" <td>8145.35</td>\n",
" <td>8042.21</td>\n",
" <td>103.14</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>2015-12-06</td>\n",
" <td>1.08</td>\n",
" <td>78992.15</td>\n",
" <td>1132.00</td>\n",
" <td>71976.41</td>\n",
" <td>72.58</td>\n",
" <td>5811.16</td>\n",
" <td>5677.40</td>\n",
" <td>133.76</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>2015-11-29</td>\n",
" <td>1.28</td>\n",
" <td>51039.60</td>\n",
" <td>941.48</td>\n",
" <td>43838.39</td>\n",
" <td>75.78</td>\n",
" <td>6183.95</td>\n",
" <td>5986.26</td>\n",
" <td>197.69</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Date AveragePrice Total Volume 4046 4225 \\\n",
"0 0 2015-12-27 1.33 64236.62 1036.74 54454.85 \n",
"1 1 2015-12-20 1.35 54876.98 674.28 44638.81 \n",
"2 2 2015-12-13 0.93 118220.22 794.70 109149.67 \n",
"3 3 2015-12-06 1.08 78992.15 1132.00 71976.41 \n",
"4 4 2015-11-29 1.28 51039.60 941.48 43838.39 \n",
"\n",
" 4770 Total Bags Small Bags Large Bags XLarge Bags type \\\n",
"0 48.16 8696.87 8603.62 93.25 0.0 conventional \n",
"1 58.33 9505.56 9408.07 97.49 0.0 conventional \n",
"2 130.50 8145.35 8042.21 103.14 0.0 conventional \n",
"3 72.58 5811.16 5677.40 133.76 0.0 conventional \n",
"4 75.78 6183.95 5986.26 197.69 0.0 conventional \n",
"\n",
" year region \n",
"0 2015 Albany \n",
"1 2015 Albany \n",
"2 2015 Albany \n",
"3 2015 Albany \n",
"4 2015 Albany "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Reading the dataset from a csv into a dataframe\n",
"df = pd.read_csv(\"avocado.csv\")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>AveragePrice</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-12-27</td>\n",
" <td>1.33</td>\n",
" <td>64236.62</td>\n",
" <td>1036.74</td>\n",
" <td>54454.85</td>\n",
" <td>48.16</td>\n",
" <td>8696.87</td>\n",
" <td>8603.62</td>\n",
" <td>93.25</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-12-20</td>\n",
" <td>1.35</td>\n",
" <td>54876.98</td>\n",
" <td>674.28</td>\n",
" <td>44638.81</td>\n",
" <td>58.33</td>\n",
" <td>9505.56</td>\n",
" <td>9408.07</td>\n",
" <td>97.49</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-12-13</td>\n",
" <td>0.93</td>\n",
" <td>118220.22</td>\n",
" <td>794.70</td>\n",
" <td>109149.67</td>\n",
" <td>130.50</td>\n",
" <td>8145.35</td>\n",
" <td>8042.21</td>\n",
" <td>103.14</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date AveragePrice Total Volume 4046 4225 4770 \\\n",
"0 2015-12-27 1.33 64236.62 1036.74 54454.85 48.16 \n",
"1 2015-12-20 1.35 54876.98 674.28 44638.81 58.33 \n",
"2 2015-12-13 0.93 118220.22 794.70 109149.67 130.50 \n",
"\n",
" Total Bags Small Bags Large Bags XLarge Bags type year region \n",
"0 8696.87 8603.62 93.25 0.0 conventional 2015 Albany \n",
"1 9505.56 9408.07 97.49 0.0 conventional 2015 Albany \n",
"2 8145.35 8042.21 103.14 0.0 conventional 2015 Albany "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Now we need to clean up our dataset a little bit; in particular, get rid of the useless \"Unnamed: 0\" column\n",
"df = df.drop('Unnamed: 0', axis=1)\n",
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Average Price</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-12-27</td>\n",
" <td>1.33</td>\n",
" <td>64236.62</td>\n",
" <td>1036.74</td>\n",
" <td>54454.85</td>\n",
" <td>48.16</td>\n",
" <td>8696.87</td>\n",
" <td>8603.62</td>\n",
" <td>93.25</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-12-20</td>\n",
" <td>1.35</td>\n",
" <td>54876.98</td>\n",
" <td>674.28</td>\n",
" <td>44638.81</td>\n",
" <td>58.33</td>\n",
" <td>9505.56</td>\n",
" <td>9408.07</td>\n",
" <td>97.49</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-12-13</td>\n",
" <td>0.93</td>\n",
" <td>118220.22</td>\n",
" <td>794.70</td>\n",
" <td>109149.67</td>\n",
" <td>130.50</td>\n",
" <td>8145.35</td>\n",
" <td>8042.21</td>\n",
" <td>103.14</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Average Price Total Volume 4046 4225 4770 \\\n",
"0 2015-12-27 1.33 64236.62 1036.74 54454.85 48.16 \n",
"1 2015-12-20 1.35 54876.98 674.28 44638.81 58.33 \n",
"2 2015-12-13 0.93 118220.22 794.70 109149.67 130.50 \n",
"\n",
" Total Bags Small Bags Large Bags XLarge Bags type year region \n",
"0 8696.87 8603.62 93.25 0.0 conventional 2015 Albany \n",
"1 9505.56 9408.07 97.49 0.0 conventional 2015 Albany \n",
"2 8145.35 8042.21 103.14 0.0 conventional 2015 Albany "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Here's how you rename a column\n",
"df = df.rename(columns={\"AveragePrice\": \"Average Price\"})\n",
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Albany\n",
"1 Albany\n",
"2 Albany\n",
"3 Albany\n",
"4 Albany\n",
"Name: region, dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Here are three different ways to select one column from a dataframe. Uncomment the different \n",
"# selection functions to see that they do the exact same thing.\n",
"df['region'].head()\n",
"#df.iloc[:, -1].head()\n",
"#df.region.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.93"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# How to select one entry in the 3rd row and 2nd column\n",
"df.iloc[2, 1]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2015-12-27\n",
"1 2015-12-20\n",
"2 2015-12-13\n",
"3 2015-12-06\n",
"4 2015-11-29\n",
"Name: Date, dtype: object"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# How to use the loc function to get one column\n",
"df.loc[:, 'Date'].head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Average Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-12-27</td>\n",
" <td>1.33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-12-20</td>\n",
" <td>1.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-12-13</td>\n",
" <td>0.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-12-06</td>\n",
" <td>1.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-11-29</td>\n",
" <td>1.28</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Average Price\n",
"0 2015-12-27 1.33\n",
"1 2015-12-20 1.35\n",
"2 2015-12-13 0.93\n",
"3 2015-12-06 1.08\n",
"4 2015-11-29 1.28"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# How to select multiple columns from a dataframe\n",
"df[['Date','Average Price']].head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Average Price</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-12-27</td>\n",
" <td>1.33</td>\n",
" <td>64236.62</td>\n",
" <td>1036.74</td>\n",
" <td>54454.85</td>\n",
" <td>48.16</td>\n",
" <td>8696.87</td>\n",
" <td>8603.62</td>\n",
" <td>93.25</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-12-20</td>\n",
" <td>1.35</td>\n",
" <td>54876.98</td>\n",
" <td>674.28</td>\n",
" <td>44638.81</td>\n",
" <td>58.33</td>\n",
" <td>9505.56</td>\n",
" <td>9408.07</td>\n",
" <td>97.49</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-12-13</td>\n",
" <td>0.93</td>\n",
" <td>118220.22</td>\n",
" <td>794.70</td>\n",
" <td>109149.67</td>\n",
" <td>130.50</td>\n",
" <td>8145.35</td>\n",
" <td>8042.21</td>\n",
" <td>103.14</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-12-06</td>\n",
" <td>1.08</td>\n",
" <td>78992.15</td>\n",
" <td>1132.00</td>\n",
" <td>71976.41</td>\n",
" <td>72.58</td>\n",
" <td>5811.16</td>\n",
" <td>5677.40</td>\n",
" <td>133.76</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-11-29</td>\n",
" <td>1.28</td>\n",
" <td>51039.60</td>\n",
" <td>941.48</td>\n",
" <td>43838.39</td>\n",
" <td>75.78</td>\n",
" <td>6183.95</td>\n",
" <td>5986.26</td>\n",
" <td>197.69</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Average Price Total Volume 4046 4225 4770 \\\n",
"0 2015-12-27 1.33 64236.62 1036.74 54454.85 48.16 \n",
"1 2015-12-20 1.35 54876.98 674.28 44638.81 58.33 \n",
"2 2015-12-13 0.93 118220.22 794.70 109149.67 130.50 \n",
"3 2015-12-06 1.08 78992.15 1132.00 71976.41 72.58 \n",
"4 2015-11-29 1.28 51039.60 941.48 43838.39 75.78 \n",
"\n",
" Total Bags Small Bags Large Bags XLarge Bags type year region \n",
"0 8696.87 8603.62 93.25 0.0 conventional 2015 Albany \n",
"1 9505.56 9408.07 97.49 0.0 conventional 2015 Albany \n",
"2 8145.35 8042.21 103.14 0.0 conventional 2015 Albany \n",
"3 5811.16 5677.40 133.76 0.0 conventional 2015 Albany \n",
"4 6183.95 5986.26 197.69 0.0 conventional 2015 Albany "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Using conditionals, we can output all rows where the average price is greater than 0.5\n",
"specific_choice = df[df['Average Price'] > 0.5]\n",
"specific_choice.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Average Price</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8478</th>\n",
" <td>2018-03-25</td>\n",
" <td>1.57</td>\n",
" <td>149396.50</td>\n",
" <td>16361.69</td>\n",
" <td>109045.03</td>\n",
" <td>65.45</td>\n",
" <td>23924.33</td>\n",
" <td>19273.80</td>\n",
" <td>4270.53</td>\n",
" <td>380.00</td>\n",
" <td>conventional</td>\n",
" <td>2018</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8479</th>\n",
" <td>2018-03-18</td>\n",
" <td>1.35</td>\n",
" <td>105304.65</td>\n",
" <td>13234.86</td>\n",
" <td>61037.58</td>\n",
" <td>55.00</td>\n",
" <td>30977.21</td>\n",
" <td>26755.90</td>\n",
" <td>3721.31</td>\n",
" <td>500.00</td>\n",
" <td>conventional</td>\n",
" <td>2018</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8480</th>\n",
" <td>2018-03-11</td>\n",
" <td>1.12</td>\n",
" <td>144648.75</td>\n",
" <td>15823.35</td>\n",
" <td>110950.68</td>\n",
" <td>70.00</td>\n",
" <td>17804.72</td>\n",
" <td>14480.52</td>\n",
" <td>3033.09</td>\n",
" <td>291.11</td>\n",
" <td>conventional</td>\n",
" <td>2018</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8481</th>\n",
" <td>2018-03-04</td>\n",
" <td>1.08</td>\n",
" <td>139520.60</td>\n",
" <td>12002.12</td>\n",
" <td>105069.57</td>\n",
" <td>95.62</td>\n",
" <td>22353.29</td>\n",
" <td>16128.51</td>\n",
" <td>5941.45</td>\n",
" <td>283.33</td>\n",
" <td>conventional</td>\n",
" <td>2018</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8482</th>\n",
" <td>2018-02-25</td>\n",
" <td>1.28</td>\n",
" <td>104278.89</td>\n",
" <td>10368.77</td>\n",
" <td>59723.32</td>\n",
" <td>48.00</td>\n",
" <td>34138.80</td>\n",
" <td>30126.31</td>\n",
" <td>3702.49</td>\n",
" <td>310.00</td>\n",
" <td>conventional</td>\n",
" <td>2018</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Average Price Total Volume 4046 4225 4770 \\\n",
"8478 2018-03-25 1.57 149396.50 16361.69 109045.03 65.45 \n",
"8479 2018-03-18 1.35 105304.65 13234.86 61037.58 55.00 \n",
"8480 2018-03-11 1.12 144648.75 15823.35 110950.68 70.00 \n",
"8481 2018-03-04 1.08 139520.60 12002.12 105069.57 95.62 \n",
"8482 2018-02-25 1.28 104278.89 10368.77 59723.32 48.00 \n",
"\n",
" Total Bags Small Bags Large Bags XLarge Bags type year \\\n",
"8478 23924.33 19273.80 4270.53 380.00 conventional 2018 \n",
"8479 30977.21 26755.90 3721.31 500.00 conventional 2018 \n",
"8480 17804.72 14480.52 3033.09 291.11 conventional 2018 \n",
"8481 22353.29 16128.51 5941.45 283.33 conventional 2018 \n",
"8482 34138.80 30126.31 3702.49 310.00 conventional 2018 \n",
"\n",
" region \n",
"8478 Albany \n",
"8479 Albany \n",
"8480 Albany \n",
"8481 Albany \n",
"8482 Albany "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We also can combine conditionals using careful parenthesis. Here we are outputting all rows where average price is \n",
"# greater than 0.5 and year is greater than 2017\n",
"specific_choice = df[(df['Average Price'] > 0.5) & (df['year'] > 2017)]\n",
"specific_choice.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Average Price</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7791</th>\n",
" <td>2017-12-17</td>\n",
" <td>0.73</td>\n",
" <td>719091.14</td>\n",
" <td>147099.59</td>\n",
" <td>439224.74</td>\n",
" <td>17641.76</td>\n",
" <td>115125.05</td>\n",
" <td>108179.86</td>\n",
" <td>4196.30</td>\n",
" <td>2748.89</td>\n",
" <td>conventional</td>\n",
" <td>2017</td>\n",
" <td>SanDiego</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7835</th>\n",
" <td>2017-02-12</td>\n",
" <td>0.70</td>\n",
" <td>645130.97</td>\n",
" <td>203848.19</td>\n",
" <td>199351.67</td>\n",
" <td>13290.00</td>\n",
" <td>228641.11</td>\n",
" <td>220708.30</td>\n",
" <td>7652.81</td>\n",
" <td>280.00</td>\n",
" <td>conventional</td>\n",
" <td>2017</td>\n",
" <td>SanDiego</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7836</th>\n",
" <td>2017-02-05</td>\n",
" <td>0.63</td>\n",
" <td>892740.87</td>\n",
" <td>260418.91</td>\n",
" <td>290223.93</td>\n",
" <td>20313.95</td>\n",
" <td>321784.08</td>\n",
" <td>318872.23</td>\n",
" <td>2886.85</td>\n",
" <td>25.00</td>\n",
" <td>conventional</td>\n",
" <td>2017</td>\n",
" <td>SanDiego</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7837</th>\n",
" <td>2017-01-29</td>\n",
" <td>0.65</td>\n",
" <td>776310.02</td>\n",
" <td>282233.20</td>\n",
" <td>253233.56</td>\n",
" <td>14380.96</td>\n",
" <td>226462.30</td>\n",
" <td>209999.44</td>\n",
" <td>13407.30</td>\n",
" <td>3055.56</td>\n",
" <td>conventional</td>\n",
" <td>2017</td>\n",
" <td>SanDiego</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7838</th>\n",
" <td>2017-01-22</td>\n",
" <td>0.74</td>\n",
" <td>695873.15</td>\n",
" <td>313593.77</td>\n",
" <td>174728.80</td>\n",
" <td>15139.20</td>\n",
" <td>192411.38</td>\n",
" <td>179361.27</td>\n",
" <td>13050.11</td>\n",
" <td>0.00</td>\n",
" <td>conventional</td>\n",
" <td>2017</td>\n",
" <td>SanDiego</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Average Price Total Volume 4046 4225 4770 \\\n",
"7791 2017-12-17 0.73 719091.14 147099.59 439224.74 17641.76 \n",
"7835 2017-02-12 0.70 645130.97 203848.19 199351.67 13290.00 \n",
"7836 2017-02-05 0.63 892740.87 260418.91 290223.93 20313.95 \n",
"7837 2017-01-29 0.65 776310.02 282233.20 253233.56 14380.96 \n",
"7838 2017-01-22 0.74 695873.15 313593.77 174728.80 15139.20 \n",
"\n",
" Total Bags Small Bags Large Bags XLarge Bags type year \\\n",
"7791 115125.05 108179.86 4196.30 2748.89 conventional 2017 \n",
"7835 228641.11 220708.30 7652.81 280.00 conventional 2017 \n",
"7836 321784.08 318872.23 2886.85 25.00 conventional 2017 \n",
"7837 226462.30 209999.44 13407.30 3055.56 conventional 2017 \n",
"7838 192411.38 179361.27 13050.11 0.00 conventional 2017 \n",
"\n",
" region \n",
"7791 SanDiego \n",
"7835 SanDiego \n",
"7836 SanDiego \n",
"7837 SanDiego \n",
"7838 SanDiego "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# More combinations of conditionals!\n",
"exercise = df[(df['Average Price'] < 0.75) & (df['region'] == 'SanDiego') & (df['year'] == 2017)]\n",
"exercise.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(89791919.69, 15523402593.400002)\n"
]
},
{
"data": {
"text/plain": [
"0.0057842936913957465"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Here's how you would get the total volume proportion of san diego avocados: \n",
"sandiego = df[df['region'] == 'SanDiego']\n",
"sandiego_total = sandiego['Total Volume'].sum()\n",
"total = df['Total Volume'].sum()\n",
"\n",
"print(sandiego_total, total)\n",
"\n",
"prop_sandiego = sandiego_total / total\n",
"prop_sandiego"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.020012349060428782"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# and here is how you would get the total volume proportion of both atlanta and denver avocados\n",
"atlanta_denver = df[(df['region'] == 'Atlanta') | (df['region'] == 'Denver')]\n",
"ad_total = atlanta_denver['Total Bags'].sum()\n",
"total = df['Total Bags'].sum()\n",
"\n",
"prop_total_bags = ad_total / total\n",
"prop_total_bags"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Average Price</th>\n",
" <th>Total Volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>Total Bags</th>\n",
" <th>Small Bags</th>\n",
" <th>Large Bags</th>\n",
" <th>XLarge Bags</th>\n",
" <th>type</th>\n",
" <th>year</th>\n",
" <th>region</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-12-27</td>\n",
" <td>1.33</td>\n",
" <td>-786407.393009</td>\n",
" <td>1036.74</td>\n",
" <td>54454.85</td>\n",
" <td>48.16</td>\n",
" <td>8696.87</td>\n",
" <td>8603.62</td>\n",
" <td>93.25</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-12-20</td>\n",
" <td>1.35</td>\n",
" <td>-795767.033009</td>\n",
" <td>674.28</td>\n",
" <td>44638.81</td>\n",
" <td>58.33</td>\n",
" <td>9505.56</td>\n",
" <td>9408.07</td>\n",
" <td>97.49</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-12-13</td>\n",
" <td>0.93</td>\n",
" <td>-732423.793009</td>\n",
" <td>794.70</td>\n",
" <td>109149.67</td>\n",
" <td>130.50</td>\n",
" <td>8145.35</td>\n",
" <td>8042.21</td>\n",
" <td>103.14</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-12-06</td>\n",
" <td>1.08</td>\n",
" <td>-771651.863009</td>\n",
" <td>1132.00</td>\n",
" <td>71976.41</td>\n",
" <td>72.58</td>\n",
" <td>5811.16</td>\n",
" <td>5677.40</td>\n",
" <td>133.76</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-11-29</td>\n",
" <td>1.28</td>\n",
" <td>-799604.413009</td>\n",
" <td>941.48</td>\n",
" <td>43838.39</td>\n",
" <td>75.78</td>\n",
" <td>6183.95</td>\n",
" <td>5986.26</td>\n",
" <td>197.69</td>\n",
" <td>0.0</td>\n",
" <td>conventional</td>\n",
" <td>2015</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Average Price Total Volume 4046 4225 4770 \\\n",
"0 2015-12-27 1.33 -786407.393009 1036.74 54454.85 48.16 \n",
"1 2015-12-20 1.35 -795767.033009 674.28 44638.81 58.33 \n",
"2 2015-12-13 0.93 -732423.793009 794.70 109149.67 130.50 \n",
"3 2015-12-06 1.08 -771651.863009 1132.00 71976.41 72.58 \n",
"4 2015-11-29 1.28 -799604.413009 941.48 43838.39 75.78 \n",
"\n",
" Total Bags Small Bags Large Bags XLarge Bags type year region \n",
"0 8696.87 8603.62 93.25 0.0 conventional 2015 Albany \n",
"1 9505.56 9408.07 97.49 0.0 conventional 2015 Albany \n",
"2 8145.35 8042.21 103.14 0.0 conventional 2015 Albany \n",
"3 5811.16 5677.40 133.76 0.0 conventional 2015 Albany \n",
"4 6183.95 5986.26 197.69 0.0 conventional 2015 Albany "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Here is how to subtract the mean of the total volume column from every entry in the column.\n",
"mean = df['Total Volume'].mean() # Get the mean of the column\n",
"subtract_mean = lambda x: x - mean # Create a lambda function called subtract_mean which subtracts the mean from x\n",
"# Use the apply function from pandas to apply the subtract_mean function to every entry in Total Volume\n",
"df['Total Volume'] = df['Total Volume'].apply(subtract_mean) \n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment