Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wonksknowsuchin/479e8013e3a3fa65c3aaf2e312a9d043 to your computer and use it in GitHub Desktop.
Save wonksknowsuchin/479e8013e3a3fa65c3aaf2e312a9d043 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "respiratory-doctor",
"metadata": {},
"source": [
"# Import pandas Library"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "hairy-oxide",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "mental-interval",
"metadata": {},
"source": [
"# Load dataset as pandas dataframe"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "developmental-recommendation",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100% Bran</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>130</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>6</td>\n",
" <td>280</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.33</td>\n",
" <td>68.402973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100% Natural Bran</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>120</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>2.0</td>\n",
" <td>8.0</td>\n",
" <td>8</td>\n",
" <td>135</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>33.983679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>All-Bran</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>260</td>\n",
" <td>9.0</td>\n",
" <td>7.0</td>\n",
" <td>5</td>\n",
" <td>320</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.33</td>\n",
" <td>59.425505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>All-Bran with Extra Fiber</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>0</td>\n",
" <td>330</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" <td>93.704912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Almond Delight</td>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>200</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
" <td>8</td>\n",
" <td>-1</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.75</td>\n",
" <td>34.384843</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"0 100% Bran N C 70 4 1 130 10.0 \n",
"1 100% Natural Bran Q C 120 3 5 15 2.0 \n",
"2 All-Bran K C 70 4 1 260 9.0 \n",
"3 All-Bran with Extra Fiber K C 50 4 0 140 14.0 \n",
"4 Almond Delight R C 110 2 2 200 1.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"0 5.0 6 280 25 3 1.0 0.33 68.402973 \n",
"1 8.0 8 135 0 3 1.0 1.00 33.983679 \n",
"2 7.0 5 320 25 3 1.0 0.33 59.425505 \n",
"3 8.0 0 330 25 3 1.0 0.50 93.704912 \n",
"4 14.0 8 -1 25 3 1.0 0.75 34.384843 "
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df=pd.read_csv('cereal.csv')\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "closing-outreach",
"metadata": {},
"source": [
"# Select particular columns using column names in a dataframe method"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "latest-bosnia",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100% Bran</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100% Natural Bran</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>All-Bran</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>All-Bran with Extra Fiber</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Almond Delight</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>Triples</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>Trix</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>Wheat Chex</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>Wheaties</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>Wheaties Honey Gold</td>\n",
" <td>110</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>77 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" name calories\n",
"0 100% Bran 70\n",
"1 100% Natural Bran 120\n",
"2 All-Bran 70\n",
"3 All-Bran with Extra Fiber 50\n",
"4 Almond Delight 110\n",
".. ... ...\n",
"72 Triples 110\n",
"73 Trix 110\n",
"74 Wheat Chex 100\n",
"75 Wheaties 100\n",
"76 Wheaties Honey Gold 110\n",
"\n",
"[77 rows x 2 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['name', 'calories']]"
]
},
{
"cell_type": "markdown",
"id": "actual-restaurant",
"metadata": {},
"source": [
"# Slicing using index to filter rows"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "stopped-jewelry",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Apple Cinnamon Cheerios</td>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>180</td>\n",
" <td>1.5</td>\n",
" <td>10.5</td>\n",
" <td>10</td>\n",
" <td>70</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.00</td>\n",
" <td>0.75</td>\n",
" <td>29.509541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Apple Jacks</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>125</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>14</td>\n",
" <td>30</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.00</td>\n",
" <td>1.00</td>\n",
" <td>33.174094</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Basic 4</td>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>130</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>210</td>\n",
" <td>2.0</td>\n",
" <td>18.0</td>\n",
" <td>8</td>\n",
" <td>100</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.33</td>\n",
" <td>0.75</td>\n",
" <td>37.038562</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Bran Chex</td>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>125</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.00</td>\n",
" <td>0.67</td>\n",
" <td>49.120253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Bran Flakes</td>\n",
" <td>P</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>210</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>5</td>\n",
" <td>190</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>0.67</td>\n",
" <td>53.313813</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"5 Apple Cinnamon Cheerios G C 110 2 2 180 1.5 \n",
"6 Apple Jacks K C 110 2 0 125 1.0 \n",
"7 Basic 4 G C 130 3 2 210 2.0 \n",
"8 Bran Chex R C 90 2 1 200 4.0 \n",
"9 Bran Flakes P C 90 3 0 210 5.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"5 10.5 10 70 25 1 1.00 0.75 29.509541 \n",
"6 11.0 14 30 25 2 1.00 1.00 33.174094 \n",
"7 18.0 8 100 25 3 1.33 0.75 37.038562 \n",
"8 15.0 6 125 25 1 1.00 0.67 49.120253 \n",
"9 13.0 5 190 25 3 1.00 0.67 53.313813 "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[5:10]"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "dying-purchase",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>100% Bran</th>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>130</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>6</td>\n",
" <td>280</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>0.33</td>\n",
" <td>68.402973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100% Natural Bran</th>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>120</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>2.0</td>\n",
" <td>8.0</td>\n",
" <td>8</td>\n",
" <td>135</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>1.00</td>\n",
" <td>33.983679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All-Bran</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>260</td>\n",
" <td>9.0</td>\n",
" <td>7.0</td>\n",
" <td>5</td>\n",
" <td>320</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>0.33</td>\n",
" <td>59.425505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All-Bran with Extra Fiber</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>0</td>\n",
" <td>330</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>0.50</td>\n",
" <td>93.704912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Almond Delight</th>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>200</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
" <td>8</td>\n",
" <td>-1</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>0.75</td>\n",
" <td>34.384843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Apple Cinnamon Cheerios</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>180</td>\n",
" <td>1.5</td>\n",
" <td>10.5</td>\n",
" <td>10</td>\n",
" <td>70</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.00</td>\n",
" <td>0.75</td>\n",
" <td>29.509541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Apple Jacks</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>125</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>14</td>\n",
" <td>30</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.00</td>\n",
" <td>1.00</td>\n",
" <td>33.174094</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Basic 4</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>130</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>210</td>\n",
" <td>2.0</td>\n",
" <td>18.0</td>\n",
" <td>8</td>\n",
" <td>100</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.33</td>\n",
" <td>0.75</td>\n",
" <td>37.038562</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bran Chex</th>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>125</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.00</td>\n",
" <td>0.67</td>\n",
" <td>49.120253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bran Flakes</th>\n",
" <td>P</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>210</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>5</td>\n",
" <td>190</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>0.67</td>\n",
" <td>53.313813</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mfr type calories protein fat sodium fiber \\\n",
"name \n",
"100% Bran N C 70 4 1 130 10.0 \n",
"100% Natural Bran Q C 120 3 5 15 2.0 \n",
"All-Bran K C 70 4 1 260 9.0 \n",
"All-Bran with Extra Fiber K C 50 4 0 140 14.0 \n",
"Almond Delight R C 110 2 2 200 1.0 \n",
"Apple Cinnamon Cheerios G C 110 2 2 180 1.5 \n",
"Apple Jacks K C 110 2 0 125 1.0 \n",
"Basic 4 G C 130 3 2 210 2.0 \n",
"Bran Chex R C 90 2 1 200 4.0 \n",
"Bran Flakes P C 90 3 0 210 5.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight \\\n",
"name \n",
"100% Bran 5.0 6 280 25 3 1.00 \n",
"100% Natural Bran 8.0 8 135 0 3 1.00 \n",
"All-Bran 7.0 5 320 25 3 1.00 \n",
"All-Bran with Extra Fiber 8.0 0 330 25 3 1.00 \n",
"Almond Delight 14.0 8 -1 25 3 1.00 \n",
"Apple Cinnamon Cheerios 10.5 10 70 25 1 1.00 \n",
"Apple Jacks 11.0 14 30 25 2 1.00 \n",
"Basic 4 18.0 8 100 25 3 1.33 \n",
"Bran Chex 15.0 6 125 25 1 1.00 \n",
"Bran Flakes 13.0 5 190 25 3 1.00 \n",
"\n",
" cups rating \n",
"name \n",
"100% Bran 0.33 68.402973 \n",
"100% Natural Bran 1.00 33.983679 \n",
"All-Bran 0.33 59.425505 \n",
"All-Bran with Extra Fiber 0.50 93.704912 \n",
"Almond Delight 0.75 34.384843 \n",
"Apple Cinnamon Cheerios 0.75 29.509541 \n",
"Apple Jacks 1.00 33.174094 \n",
"Basic 4 0.75 37.038562 \n",
"Bran Chex 0.67 49.120253 \n",
"Bran Flakes 0.67 53.313813 "
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[:10]"
]
},
{
"cell_type": "markdown",
"id": "obvious-divide",
"metadata": {},
"source": [
"# Filter rows and columns using iloc() function"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "quality-beaver",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100% Bran</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100% Natural Bran</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>120</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>All-Bran</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>All-Bran with Extra Fiber</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Almond Delight</td>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Apple Cinnamon Cheerios</td>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Apple Jacks</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Basic 4</td>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>130</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein\n",
"0 100% Bran N C 70 4\n",
"1 100% Natural Bran Q C 120 3\n",
"2 All-Bran K C 70 4\n",
"3 All-Bran with Extra Fiber K C 50 4\n",
"4 Almond Delight R C 110 2\n",
"5 Apple Cinnamon Cheerios G C 110 2\n",
"6 Apple Jacks K C 110 2\n",
"7 Basic 4 G C 130 3"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[ 0:8, 0:5 ]"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "precious-saturn",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>100% Bran</th>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100% Natural Bran</th>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>120</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All-Bran</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All-Bran with Extra Fiber</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Almond Delight</th>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Apple Cinnamon Cheerios</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Apple Jacks</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Basic 4</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>130</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mfr type calories protein fat\n",
"name \n",
"100% Bran N C 70 4 1\n",
"100% Natural Bran Q C 120 3 5\n",
"All-Bran K C 70 4 1\n",
"All-Bran with Extra Fiber K C 50 4 0\n",
"Almond Delight R C 110 2 2\n",
"Apple Cinnamon Cheerios G C 110 2 2\n",
"Apple Jacks K C 110 2 0\n",
"Basic 4 G C 130 3 2"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[ :8, 0:5 ]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "portuguese-exploration",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>220</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>12</td>\n",
" <td>35</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>0.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>290</td>\n",
" <td>2.0</td>\n",
" <td>17.0</td>\n",
" <td>1</td>\n",
" <td>105</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>210</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>9</td>\n",
" <td>45</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>0.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>140</td>\n",
" <td>2.0</td>\n",
" <td>13.0</td>\n",
" <td>7</td>\n",
" <td>105</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>180</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>13</td>\n",
" <td>55</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>280</td>\n",
" <td>0.0</td>\n",
" <td>22.0</td>\n",
" <td>3</td>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>290</td>\n",
" <td>1.0</td>\n",
" <td>21.0</td>\n",
" <td>2</td>\n",
" <td>35</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>90</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>12</td>\n",
" <td>20</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>180</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>13</td>\n",
" <td>65</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>140</td>\n",
" <td>4.0</td>\n",
" <td>10.0</td>\n",
" <td>7</td>\n",
" <td>160</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sodium fiber carbo sugars potass vitamins shelf weight cups\n",
"10 220 0.0 12.0 12 35 25 2 1.0 0.75\n",
"11 290 2.0 17.0 1 105 25 1 1.0 1.25\n",
"12 210 0.0 13.0 9 45 25 2 1.0 0.75\n",
"13 140 2.0 13.0 7 105 25 3 1.0 0.50\n",
"14 180 0.0 12.0 13 55 25 2 1.0 1.00\n",
"15 280 0.0 22.0 3 25 25 1 1.0 1.00\n",
"16 290 1.0 21.0 2 35 25 1 1.0 1.00\n",
"17 90 1.0 13.0 12 20 25 2 1.0 1.00\n",
"18 180 0.0 12.0 13 65 25 2 1.0 1.00\n",
"19 140 4.0 10.0 7 160 25 3 1.0 0.50"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[10:20, -10:-1]"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "caring-radius",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Total Raisin Bran</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>140</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>190</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>14</td>\n",
" <td>230</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Total Whole Grain</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>3</td>\n",
" <td>110</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Triples</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>250</td>\n",
" <td>0.0</td>\n",
" <td>21.0</td>\n",
" <td>3</td>\n",
" <td>60</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Trix</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>140</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wheat Chex</th>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>230</td>\n",
" <td>3.0</td>\n",
" <td>17.0</td>\n",
" <td>3</td>\n",
" <td>115</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wheaties</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>3.0</td>\n",
" <td>17.0</td>\n",
" <td>3</td>\n",
" <td>110</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wheaties Honey Gold</th>\n",
" <td>G</td>\n",
" <td>C</td>\n",
" <td>110</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>1.0</td>\n",
" <td>16.0</td>\n",
" <td>8</td>\n",
" <td>60</td>\n",
" <td>25</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mfr type calories protein fat sodium fiber carbo \\\n",
"name \n",
"Total Raisin Bran G C 140 3 1 190 4.0 15.0 \n",
"Total Whole Grain G C 100 3 1 200 3.0 16.0 \n",
"Triples G C 110 2 1 250 0.0 21.0 \n",
"Trix G C 110 1 1 140 0.0 13.0 \n",
"Wheat Chex R C 100 3 1 230 3.0 17.0 \n",
"Wheaties G C 100 3 1 200 3.0 17.0 \n",
"Wheaties Honey Gold G C 110 2 1 200 1.0 16.0 \n",
"\n",
" sugars potass vitamins \n",
"name \n",
"Total Raisin Bran 14 230 100 \n",
"Total Whole Grain 3 110 100 \n",
"Triples 3 60 25 \n",
"Trix 12 25 25 \n",
"Wheat Chex 3 115 25 \n",
"Wheaties 3 110 25 \n",
"Wheaties Honey Gold 8 60 25 "
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[70:, :11]"
]
},
{
"cell_type": "markdown",
"id": "swiss-turkish",
"metadata": {},
"source": [
"# Filter rows and columns using loc() method(slicing using labels)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "missing-college",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100% Bran</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100% Natural Bran</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>All-Bran</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>All-Bran with Extra Fiber</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Almond Delight</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Apple Cinnamon Cheerios</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Apple Jacks</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Basic 4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name\n",
"0 100% Bran\n",
"1 100% Natural Bran\n",
"2 All-Bran\n",
"3 All-Bran with Extra Fiber\n",
"4 Almond Delight\n",
"5 Apple Cinnamon Cheerios\n",
"6 Apple Jacks\n",
"7 Basic 4"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[0:7, ['name']]"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "atmospheric-dictionary",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100% Natural Bran</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Apple Cinnamon Cheerios</td>\n",
" <td>C</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name type\n",
"1 100% Natural Bran C\n",
"5 Apple Cinnamon Cheerios C"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[[1,5], ['name', 'type']]"
]
},
{
"cell_type": "markdown",
"id": "eastern-ending",
"metadata": {},
"source": [
"# Filter using relational conditions"
]
},
{
"cell_type": "markdown",
"id": "presidential-scottish",
"metadata": {},
"source": [
"# Method 1: Dataframe method using single condition"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "external-harvest",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Bran Chex</td>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>125</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>49.120253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Bran Flakes</td>\n",
" <td>P</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>210</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>5</td>\n",
" <td>190</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>53.313813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>170</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>2</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>59.642837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>Raisin Squares</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>110</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" <td>55.333142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>Shredded Wheat 'n'Bran</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4.0</td>\n",
" <td>19.0</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>74.472949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>Shredded Wheat spoon size</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3.0</td>\n",
" <td>20.0</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>72.801787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Strawberry Fruit Wheats</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>5</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>59.363993</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"8 Bran Chex R C 90 2 1 200 4.0 \n",
"9 Bran Flakes P C 90 3 0 210 5.0 \n",
"50 Nutri-grain Wheat K C 90 3 0 170 3.0 \n",
"60 Raisin Squares K C 90 2 0 0 2.0 \n",
"64 Shredded Wheat 'n'Bran N C 90 3 0 0 4.0 \n",
"65 Shredded Wheat spoon size N C 90 3 0 0 3.0 \n",
"68 Strawberry Fruit Wheats N C 90 2 0 15 3.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"8 15.0 6 125 25 1 1.0 0.67 49.120253 \n",
"9 13.0 5 190 25 3 1.0 0.67 53.313813 \n",
"50 18.0 2 90 25 3 1.0 1.00 59.642837 \n",
"60 15.0 6 110 25 3 1.0 0.50 55.333142 \n",
"64 19.0 0 140 0 1 1.0 0.67 74.472949 \n",
"65 20.0 0 120 0 1 1.0 0.67 72.801787 \n",
"68 15.0 5 90 25 2 1.0 1.00 59.363993 "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['calories']==90]"
]
},
{
"cell_type": "markdown",
"id": "otherwise-statistics",
"metadata": {},
"source": [
"# Display specific columns based on single condition"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "cosmetic-reflection",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>rating</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Bran Chex</td>\n",
" <td>49.120253</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Bran Flakes</td>\n",
" <td>53.313813</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>59.642837</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>Raisin Squares</td>\n",
" <td>55.333142</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>Shredded Wheat 'n'Bran</td>\n",
" <td>74.472949</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>Shredded Wheat spoon size</td>\n",
" <td>72.801787</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Strawberry Fruit Wheats</td>\n",
" <td>59.363993</td>\n",
" <td>90</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name rating calories\n",
"8 Bran Chex 49.120253 90\n",
"9 Bran Flakes 53.313813 90\n",
"50 Nutri-grain Wheat 59.642837 90\n",
"60 Raisin Squares 55.333142 90\n",
"64 Shredded Wheat 'n'Bran 74.472949 90\n",
"65 Shredded Wheat spoon size 72.801787 90\n",
"68 Strawberry Fruit Wheats 59.363993 90"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['name', 'rating', 'calories']][df['calories']==90]"
]
},
{
"cell_type": "markdown",
"id": "ready-neighbor",
"metadata": {},
"source": [
"# Dataframe method using multiple conditions"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "automated-sound",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>rating</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>59.642837</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>Raisin Squares</td>\n",
" <td>55.333142</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>Shredded Wheat spoon size</td>\n",
" <td>72.801787</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Strawberry Fruit Wheats</td>\n",
" <td>59.363993</td>\n",
" <td>90</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name rating calories\n",
"50 Nutri-grain Wheat 59.642837 90\n",
"60 Raisin Squares 55.333142 90\n",
"65 Shredded Wheat spoon size 72.801787 90\n",
"68 Strawberry Fruit Wheats 59.363993 90"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['name', 'rating', 'calories']][(df['calories']==90) & (df['fiber']<4)]"
]
},
{
"cell_type": "markdown",
"id": "lucky-technical",
"metadata": {},
"source": [
"# Method 2: Using loc() function"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "annual-while",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Bran Chex</td>\n",
" <td>R</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>200</td>\n",
" <td>4.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>125</td>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>49.120253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Bran Flakes</td>\n",
" <td>P</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>210</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>5</td>\n",
" <td>190</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>53.313813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>170</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>2</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>59.642837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>Raisin Squares</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>110</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" <td>55.333142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>Shredded Wheat 'n'Bran</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4.0</td>\n",
" <td>19.0</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>74.472949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>Shredded Wheat spoon size</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3.0</td>\n",
" <td>20.0</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>72.801787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Strawberry Fruit Wheats</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>5</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>59.363993</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"8 Bran Chex R C 90 2 1 200 4.0 \n",
"9 Bran Flakes P C 90 3 0 210 5.0 \n",
"50 Nutri-grain Wheat K C 90 3 0 170 3.0 \n",
"60 Raisin Squares K C 90 2 0 0 2.0 \n",
"64 Shredded Wheat 'n'Bran N C 90 3 0 0 4.0 \n",
"65 Shredded Wheat spoon size N C 90 3 0 0 3.0 \n",
"68 Strawberry Fruit Wheats N C 90 2 0 15 3.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"8 15.0 6 125 25 1 1.0 0.67 49.120253 \n",
"9 13.0 5 190 25 3 1.0 0.67 53.313813 \n",
"50 18.0 2 90 25 3 1.0 1.00 59.642837 \n",
"60 15.0 6 110 25 3 1.0 0.50 55.333142 \n",
"64 19.0 0 140 0 1 1.0 0.67 74.472949 \n",
"65 20.0 0 120 0 1 1.0 0.67 72.801787 \n",
"68 15.0 5 90 25 2 1.0 1.00 59.363993 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df['calories']==90]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "faced-haven",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>170</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>2</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>59.642837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>Raisin Squares</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2.0</td>\n",
" <td>15.0</td>\n",
" <td>6</td>\n",
" <td>110</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" <td>55.333142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>Shredded Wheat spoon size</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3.0</td>\n",
" <td>20.0</td>\n",
" <td>0</td>\n",
" <td>120</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.67</td>\n",
" <td>72.801787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Strawberry Fruit Wheats</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>5</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>59.363993</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"50 Nutri-grain Wheat K C 90 3 0 170 3.0 \n",
"60 Raisin Squares K C 90 2 0 0 2.0 \n",
"65 Shredded Wheat spoon size N C 90 3 0 0 3.0 \n",
"68 Strawberry Fruit Wheats N C 90 2 0 15 3.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"50 18.0 2 90 25 3 1.0 1.00 59.642837 \n",
"60 15.0 6 110 25 3 1.0 0.50 55.333142 \n",
"65 20.0 0 120 0 1 1.0 0.67 72.801787 \n",
"68 15.0 5 90 25 2 1.0 1.00 59.363993 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[(df['calories']==90) & (df['fiber']<4)]"
]
},
{
"cell_type": "markdown",
"id": "herbal-sunglasses",
"metadata": {},
"source": [
"# Displaying specific columns using loc() function"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "based-fountain",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>fiber</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>3.0</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>Raisin Squares</td>\n",
" <td>2.0</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>Shredded Wheat spoon size</td>\n",
" <td>3.0</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Strawberry Fruit Wheats</td>\n",
" <td>3.0</td>\n",
" <td>90</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name fiber calories\n",
"50 Nutri-grain Wheat 3.0 90\n",
"60 Raisin Squares 2.0 90\n",
"65 Shredded Wheat spoon size 3.0 90\n",
"68 Strawberry Fruit Wheats 3.0 90"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[(df['calories']==90) & (df['fiber']<4),['name', 'fiber', 'calories']]"
]
},
{
"cell_type": "markdown",
"id": "communist-butler",
"metadata": {},
"source": [
"# Selecting rows using query function"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "norwegian-nicaragua",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>All-Bran with Extra Fiber</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>0</td>\n",
" <td>330</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.5</td>\n",
" <td>93.704912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Puffed Rice</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.0</td>\n",
" <td>60.756112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.0</td>\n",
" <td>63.005645</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"3 All-Bran with Extra Fiber K C 50 4 0 140 14.0 \n",
"54 Puffed Rice Q C 50 1 0 0 0.0 \n",
"55 Puffed Wheat Q C 50 2 0 0 1.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"3 8.0 0 330 25 3 1.0 0.5 93.704912 \n",
"54 13.0 0 15 0 3 0.5 1.0 60.756112 \n",
"55 10.0 0 50 0 3 0.5 1.0 63.005645 "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('calories<60')"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "occupied-syracuse",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>All-Bran</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>260</td>\n",
" <td>9.0</td>\n",
" <td>7.0</td>\n",
" <td>5</td>\n",
" <td>320</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.33</td>\n",
" <td>59.425505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>All-Bran with Extra Fiber</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>0</td>\n",
" <td>330</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" <td>93.704912</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber \\\n",
"2 All-Bran K C 70 4 1 260 9.0 \n",
"3 All-Bran with Extra Fiber K C 50 4 0 140 14.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight cups rating \n",
"2 7.0 5 320 25 3 1.0 0.33 59.425505 \n",
"3 8.0 0 330 25 3 1.0 0.50 93.704912 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('calories<80 & mfr==\"K\" ')"
]
},
{
"cell_type": "markdown",
"id": "simple-poetry",
"metadata": {},
"source": [
"# Filter single item from the dataset "
]
},
{
"cell_type": "markdown",
"id": "quantitative-crime",
"metadata": {},
"source": [
"# Method 1: Using iat method"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "surrounded-climate",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Apple Cinnamon Cheerios'"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iat[5, 0]"
]
},
{
"cell_type": "markdown",
"id": "binding-shirt",
"metadata": {},
"source": [
"# Method 2: Using at method"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "deluxe-administration",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Apple Cinnamon Cheerios'"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.at[5, 'name']"
]
},
{
"cell_type": "markdown",
"id": "severe-shift",
"metadata": {},
"source": [
"# Select dataframe rows based on a list membership"
]
},
{
"cell_type": "markdown",
"id": "psychological-slovakia",
"metadata": {},
"source": [
"# Method 1: Dataframe Method"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "applied-assembly",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" <tr>\n",
" <th>name</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>100% Bran</th>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>130</td>\n",
" <td>10.0</td>\n",
" <td>5.0</td>\n",
" <td>6</td>\n",
" <td>280</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.33</td>\n",
" <td>68.402973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All-Bran</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>70</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>260</td>\n",
" <td>9.0</td>\n",
" <td>7.0</td>\n",
" <td>5</td>\n",
" <td>320</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.33</td>\n",
" <td>59.425505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All-Bran with Extra Fiber</th>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>0</td>\n",
" <td>330</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.50</td>\n",
" <td>93.704912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Puffed Rice</th>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.00</td>\n",
" <td>60.756112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Puffed Wheat</th>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.00</td>\n",
" <td>63.005645</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mfr type calories protein fat sodium fiber \\\n",
"name \n",
"100% Bran N C 70 4 1 130 10.0 \n",
"All-Bran K C 70 4 1 260 9.0 \n",
"All-Bran with Extra Fiber K C 50 4 0 140 14.0 \n",
"Puffed Rice Q C 50 1 0 0 0.0 \n",
"Puffed Wheat Q C 50 2 0 0 1.0 \n",
"\n",
" carbo sugars potass vitamins shelf weight \\\n",
"name \n",
"100% Bran 5.0 6 280 25 3 1.0 \n",
"All-Bran 7.0 5 320 25 3 1.0 \n",
"All-Bran with Extra Fiber 8.0 0 330 25 3 1.0 \n",
"Puffed Rice 13.0 0 15 0 3 0.5 \n",
"Puffed Wheat 10.0 0 50 0 3 0.5 \n",
"\n",
" cups rating \n",
"name \n",
"100% Bran 0.33 68.402973 \n",
"All-Bran 0.33 59.425505 \n",
"All-Bran with Extra Fiber 0.50 93.704912 \n",
"Puffed Rice 1.00 60.756112 \n",
"Puffed Wheat 1.00 63.005645 "
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['calories'].isin([50,70])]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "peaceful-pavilion",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Puffed Rice</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.0</td>\n",
" <td>60.756112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.0</td>\n",
" <td>63.005645</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber carbo \\\n",
"54 Puffed Rice Q C 50 1 0 0 0.0 13.0 \n",
"55 Puffed Wheat Q C 50 2 0 0 1.0 10.0 \n",
"\n",
" sugars potass vitamins shelf weight cups rating \n",
"54 0 15 0 3 0.5 1.0 60.756112 \n",
"55 0 50 0 3 0.5 1.0 63.005645 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['calories'].isin([50,70]) & df['fiber'].isin([1, 0])]"
]
},
{
"cell_type": "markdown",
"id": "italian-brighton",
"metadata": {},
"source": [
"# Method 2: Using loc() function"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "improving-innocent",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Puffed Rice</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>13.0</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.0</td>\n",
" <td>60.756112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.5</td>\n",
" <td>1.0</td>\n",
" <td>63.005645</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber carbo \\\n",
"54 Puffed Rice Q C 50 1 0 0 0.0 13.0 \n",
"55 Puffed Wheat Q C 50 2 0 0 1.0 10.0 \n",
"\n",
" sugars potass vitamins shelf weight cups rating \n",
"54 0 15 0 3 0.5 1.0 60.756112 \n",
"55 0 50 0 3 0.5 1.0 63.005645 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df['calories'].isin([50,70]) & df['fiber'].isin([1, 0])]"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "important-controversy",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>fiber</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Puffed Rice</td>\n",
" <td>0.0</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>1.0</td>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name fiber calories\n",
"54 Puffed Rice 0.0 50\n",
"55 Puffed Wheat 1.0 50"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[(df['calories'].isin([50,70]) & df['fiber'].isin([1, 0])), ['name', 'fiber', 'calories']]"
]
},
{
"cell_type": "markdown",
"id": "handy-bobby",
"metadata": {},
"source": [
"# Filter dataframe rows using regular expression (Regex)"
]
},
{
"cell_type": "markdown",
"id": "warming-council",
"metadata": {},
"source": [
"# Method 1: Dataframe method"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "essential-chemistry",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>170</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>2</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>1.0</td>\n",
" <td>59.642837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.50</td>\n",
" <td>1.0</td>\n",
" <td>63.005645</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>Shredded Wheat</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>80</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>0</td>\n",
" <td>95</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.83</td>\n",
" <td>1.0</td>\n",
" <td>68.235885</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber carbo \\\n",
"50 Nutri-grain Wheat K C 90 3 0 170 3.0 18.0 \n",
"55 Puffed Wheat Q C 50 2 0 0 1.0 10.0 \n",
"63 Shredded Wheat N C 80 2 0 0 3.0 16.0 \n",
"\n",
" sugars potass vitamins shelf weight cups rating \n",
"50 2 90 25 3 1.00 1.0 59.642837 \n",
"55 0 50 0 3 0.50 1.0 63.005645 \n",
"63 0 95 0 1 0.83 1.0 68.235885 "
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['name'].str.contains('Wheat$')]"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "headed-instrumentation",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>Post Nat. Raisin Bran</td>\n",
" <td>C</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Product 19</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Puffed Rice</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name type calories\n",
"52 Post Nat. Raisin Bran C 120\n",
"53 Product 19 C 100\n",
"54 Puffed Rice C 50\n",
"55 Puffed Wheat C 50"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['name', 'type', 'calories']][df['name'].str.startswith('P')]"
]
},
{
"cell_type": "markdown",
"id": "constitutional-equipment",
"metadata": {},
"source": [
"# Method 2: Using loc() function"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "fatal-seller",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>mfr</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" <th>protein</th>\n",
" <th>fat</th>\n",
" <th>sodium</th>\n",
" <th>fiber</th>\n",
" <th>carbo</th>\n",
" <th>sugars</th>\n",
" <th>potass</th>\n",
" <th>vitamins</th>\n",
" <th>shelf</th>\n",
" <th>weight</th>\n",
" <th>cups</th>\n",
" <th>rating</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Nutri-grain Wheat</td>\n",
" <td>K</td>\n",
" <td>C</td>\n",
" <td>90</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>170</td>\n",
" <td>3.0</td>\n",
" <td>18.0</td>\n",
" <td>2</td>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>1.0</td>\n",
" <td>59.642837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>Q</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.50</td>\n",
" <td>1.0</td>\n",
" <td>63.005645</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>Shredded Wheat</td>\n",
" <td>N</td>\n",
" <td>C</td>\n",
" <td>80</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>0</td>\n",
" <td>95</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.83</td>\n",
" <td>1.0</td>\n",
" <td>68.235885</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name mfr type calories protein fat sodium fiber carbo \\\n",
"50 Nutri-grain Wheat K C 90 3 0 170 3.0 18.0 \n",
"55 Puffed Wheat Q C 50 2 0 0 1.0 10.0 \n",
"63 Shredded Wheat N C 80 2 0 0 3.0 16.0 \n",
"\n",
" sugars potass vitamins shelf weight cups rating \n",
"50 2 90 25 3 1.00 1.0 59.642837 \n",
"55 0 50 0 3 0.50 1.0 63.005645 \n",
"63 0 95 0 1 0.83 1.0 68.235885 "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df['name'].str.contains('Wheat$')]"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "missing-teaching",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" <th>calories</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>Post Nat. Raisin Bran</td>\n",
" <td>C</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Product 19</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Puffed Rice</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Puffed Wheat</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name type calories\n",
"52 Post Nat. Raisin Bran C 120\n",
"53 Product 19 C 100\n",
"54 Puffed Rice C 50\n",
"55 Puffed Wheat C 50"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[df['name'].str.startswith('P'), ['name', 'type', 'calories']]"
]
},
{
"cell_type": "markdown",
"id": "going-bikini",
"metadata": {},
"source": [
"# Filter data on the basis of not null or null/nan values"
]
},
{
"cell_type": "markdown",
"id": "behind-tower",
"metadata": {},
"source": [
"# Load another dataset containing null values"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "greatest-virgin",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>director_name</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>duration</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>actor_2_name</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>gross</th>\n",
" <th>genres</th>\n",
" <th>...</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>language</th>\n",
" <th>country</th>\n",
" <th>content_rating</th>\n",
" <th>budget</th>\n",
" <th>title_year</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>aspect_ratio</th>\n",
" <th>movie_facebook_likes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Color</td>\n",
" <td>James Cameron</td>\n",
" <td>723.0</td>\n",
" <td>178.0</td>\n",
" <td>0.0</td>\n",
" <td>855.0</td>\n",
" <td>Joel David Moore</td>\n",
" <td>1000.0</td>\n",
" <td>760505847.0</td>\n",
" <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
" <td>...</td>\n",
" <td>3054.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>237000000.0</td>\n",
" <td>2009.0</td>\n",
" <td>936.0</td>\n",
" <td>7.9</td>\n",
" <td>1.78</td>\n",
" <td>33000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Color</td>\n",
" <td>Gore Verbinski</td>\n",
" <td>302.0</td>\n",
" <td>169.0</td>\n",
" <td>563.0</td>\n",
" <td>1000.0</td>\n",
" <td>Orlando Bloom</td>\n",
" <td>40000.0</td>\n",
" <td>309404152.0</td>\n",
" <td>Action|Adventure|Fantasy</td>\n",
" <td>...</td>\n",
" <td>1238.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>300000000.0</td>\n",
" <td>2007.0</td>\n",
" <td>5000.0</td>\n",
" <td>7.1</td>\n",
" <td>2.35</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Color</td>\n",
" <td>Sam Mendes</td>\n",
" <td>602.0</td>\n",
" <td>148.0</td>\n",
" <td>0.0</td>\n",
" <td>161.0</td>\n",
" <td>Rory Kinnear</td>\n",
" <td>11000.0</td>\n",
" <td>200074175.0</td>\n",
" <td>Action|Adventure|Thriller</td>\n",
" <td>...</td>\n",
" <td>994.0</td>\n",
" <td>English</td>\n",
" <td>UK</td>\n",
" <td>PG-13</td>\n",
" <td>245000000.0</td>\n",
" <td>2015.0</td>\n",
" <td>393.0</td>\n",
" <td>6.8</td>\n",
" <td>2.35</td>\n",
" <td>85000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Color</td>\n",
" <td>Christopher Nolan</td>\n",
" <td>813.0</td>\n",
" <td>164.0</td>\n",
" <td>22000.0</td>\n",
" <td>23000.0</td>\n",
" <td>Christian Bale</td>\n",
" <td>27000.0</td>\n",
" <td>448130642.0</td>\n",
" <td>Action|Thriller</td>\n",
" <td>...</td>\n",
" <td>2701.0</td>\n",
" <td>English</td>\n",
" <td>USA</td>\n",
" <td>PG-13</td>\n",
" <td>250000000.0</td>\n",
" <td>2012.0</td>\n",
" <td>23000.0</td>\n",
" <td>8.5</td>\n",
" <td>2.35</td>\n",
" <td>164000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>Doug Walker</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>131.0</td>\n",
" <td>NaN</td>\n",
" <td>Rob Walker</td>\n",
" <td>131.0</td>\n",
" <td>NaN</td>\n",
" <td>Documentary</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>7.1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" color director_name num_critic_for_reviews duration \\\n",
"0 Color James Cameron 723.0 178.0 \n",
"1 Color Gore Verbinski 302.0 169.0 \n",
"2 Color Sam Mendes 602.0 148.0 \n",
"3 Color Christopher Nolan 813.0 164.0 \n",
"4 NaN Doug Walker NaN NaN \n",
"\n",
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n",
"0 0.0 855.0 Joel David Moore \n",
"1 563.0 1000.0 Orlando Bloom \n",
"2 0.0 161.0 Rory Kinnear \n",
"3 22000.0 23000.0 Christian Bale \n",
"4 131.0 NaN Rob Walker \n",
"\n",
" actor_1_facebook_likes gross genres ... \\\n",
"0 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi ... \n",
"1 40000.0 309404152.0 Action|Adventure|Fantasy ... \n",
"2 11000.0 200074175.0 Action|Adventure|Thriller ... \n",
"3 27000.0 448130642.0 Action|Thriller ... \n",
"4 131.0 NaN Documentary ... \n",
"\n",
" num_user_for_reviews language country content_rating budget \\\n",
"0 3054.0 English USA PG-13 237000000.0 \n",
"1 1238.0 English USA PG-13 300000000.0 \n",
"2 994.0 English UK PG-13 245000000.0 \n",
"3 2701.0 English USA PG-13 250000000.0 \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" title_year actor_2_facebook_likes imdb_score aspect_ratio \\\n",
"0 2009.0 936.0 7.9 1.78 \n",
"1 2007.0 5000.0 7.1 2.35 \n",
"2 2015.0 393.0 6.8 2.35 \n",
"3 2012.0 23000.0 8.5 2.35 \n",
"4 NaN 12.0 7.1 NaN \n",
"\n",
" movie_facebook_likes \n",
"0 33000 \n",
"1 0 \n",
"2 85000 \n",
"3 164000 \n",
"4 0 \n",
"\n",
"[5 rows x 28 columns]"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = pd.read_csv('movie_metadata.csv')\n",
"df1.head()"
]
},
{
"cell_type": "markdown",
"id": "bright-korea",
"metadata": {},
"source": [
"# Find null values of complete dataset"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "secure-secret",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>director_name</th>\n",
" <th>num_critic_for_reviews</th>\n",
" <th>duration</th>\n",
" <th>director_facebook_likes</th>\n",
" <th>actor_3_facebook_likes</th>\n",
" <th>actor_2_name</th>\n",
" <th>actor_1_facebook_likes</th>\n",
" <th>gross</th>\n",
" <th>genres</th>\n",
" <th>...</th>\n",
" <th>num_user_for_reviews</th>\n",
" <th>language</th>\n",
" <th>country</th>\n",
" <th>content_rating</th>\n",
" <th>budget</th>\n",
" <th>title_year</th>\n",
" <th>actor_2_facebook_likes</th>\n",
" <th>imdb_score</th>\n",
" <th>aspect_ratio</th>\n",
" <th>movie_facebook_likes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5038</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5039</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5040</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5041</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5042</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5043 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" color director_name num_critic_for_reviews duration \\\n",
"0 False False False False \n",
"1 False False False False \n",
"2 False False False False \n",
"3 False False False False \n",
"4 True False True True \n",
"... ... ... ... ... \n",
"5038 False False False False \n",
"5039 False True False False \n",
"5040 False False False False \n",
"5041 False False False False \n",
"5042 False False False False \n",
"\n",
" director_facebook_likes actor_3_facebook_likes actor_2_name \\\n",
"0 False False False \n",
"1 False False False \n",
"2 False False False \n",
"3 False False False \n",
"4 False True False \n",
"... ... ... ... \n",
"5038 False False False \n",
"5039 True False False \n",
"5040 False False False \n",
"5041 False False False \n",
"5042 False False False \n",
"\n",
" actor_1_facebook_likes gross genres ... num_user_for_reviews \\\n",
"0 False False False ... False \n",
"1 False False False ... False \n",
"2 False False False ... False \n",
"3 False False False ... False \n",
"4 False True False ... True \n",
"... ... ... ... ... ... \n",
"5038 False True False ... False \n",
"5039 False True False ... False \n",
"5040 False True False ... False \n",
"5041 False False False ... False \n",
"5042 False False False ... False \n",
"\n",
" language country content_rating budget title_year \\\n",
"0 False False False False False \n",
"1 False False False False False \n",
"2 False False False False False \n",
"3 False False False False False \n",
"4 True True True True True \n",
"... ... ... ... ... ... \n",
"5038 False False True True False \n",
"5039 False False False True True \n",
"5040 False False True False False \n",
"5041 False False False True False \n",
"5042 False False False False False \n",
"\n",
" actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes \n",
"0 False False False False \n",
"1 False False False False \n",
"2 False False False False \n",
"3 False False False False \n",
"4 False False True False \n",
"... ... ... ... ... \n",
"5038 False False True False \n",
"5039 False False False False \n",
"5040 False False True False \n",
"5041 False False False False \n",
"5042 False False False False \n",
"\n",
"[5043 rows x 28 columns]"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.isnull()\n",
"# this finds the null values in the dataset and returns a boolean dataframe"
]
},
{
"cell_type": "markdown",
"id": "turkish-snake",
"metadata": {},
"source": [
"# Find null values of specific columns"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "vulnerable-rainbow",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 True\n",
" ... \n",
"5038 False\n",
"5039 False\n",
"5040 False\n",
"5041 False\n",
"5042 False\n",
"Name: num_critic_for_reviews, Length: 5043, dtype: bool"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1['num_critic_for_reviews'].isnull()"
]
},
{
"cell_type": "markdown",
"id": "impressive-stage",
"metadata": {},
"source": [
"# Count the total number of null values in each column"
]
},
{
"cell_type": "code",
"execution_count": 100,
"id": "fabulous-concept",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"color 19\n",
"director_name 104\n",
"num_critic_for_reviews 50\n",
"duration 15\n",
"director_facebook_likes 104\n",
"actor_3_facebook_likes 23\n",
"actor_2_name 13\n",
"actor_1_facebook_likes 7\n",
"gross 884\n",
"genres 0\n",
"actor_1_name 7\n",
"movie_title 0\n",
"num_voted_users 0\n",
"cast_total_facebook_likes 0\n",
"actor_3_name 23\n",
"facenumber_in_poster 13\n",
"plot_keywords 153\n",
"movie_imdb_link 0\n",
"num_user_for_reviews 21\n",
"language 12\n",
"country 5\n",
"content_rating 303\n",
"budget 492\n",
"title_year 108\n",
"actor_2_facebook_likes 13\n",
"imdb_score 0\n",
"aspect_ratio 329\n",
"movie_facebook_likes 0\n",
"dtype: int64"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.isna().sum()"
]
},
{
"cell_type": "markdown",
"id": "municipal-craps",
"metadata": {},
"source": [
"# Delete all the null values from the dataset"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "recreational-chase",
"metadata": {},
"outputs": [],
"source": [
"df1.dropna(inplace=True)"
]
},
{
"cell_type": "markdown",
"id": "bright-helping",
"metadata": {},
"source": [
"# Validate if all the null values are deleted"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "surprised-central",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"color 0\n",
"director_name 0\n",
"num_critic_for_reviews 0\n",
"duration 0\n",
"director_facebook_likes 0\n",
"actor_3_facebook_likes 0\n",
"actor_2_name 0\n",
"actor_1_facebook_likes 0\n",
"gross 0\n",
"genres 0\n",
"actor_1_name 0\n",
"movie_title 0\n",
"num_voted_users 0\n",
"cast_total_facebook_likes 0\n",
"actor_3_name 0\n",
"facenumber_in_poster 0\n",
"plot_keywords 0\n",
"movie_imdb_link 0\n",
"num_user_for_reviews 0\n",
"language 0\n",
"country 0\n",
"content_rating 0\n",
"budget 0\n",
"title_year 0\n",
"actor_2_facebook_likes 0\n",
"imdb_score 0\n",
"aspect_ratio 0\n",
"movie_facebook_likes 0\n",
"dtype: int64"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.isna().sum()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment