-
-
Save jbencook/2dca3df136f1f296b56751ae1bfd0e28 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1999-01-06</td>\n", | |
" <td>APAC</td>\n", | |
" <td>135</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>1999-01-18</td>\n", | |
" <td>APAC</td>\n", | |
" <td>147</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>1999-01-24</td>\n", | |
" <td>APAC</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>1999-03-20</td>\n", | |
" <td>APAC</td>\n", | |
" <td>108</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region revenue\n", | |
"3 1999-01-06 APAC 135\n", | |
"9 1999-01-18 APAC 147\n", | |
"11 1999-01-24 APAC 100\n", | |
"24 1999-03-20 APAC 108" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"df = pd.read_csv(\"https://jbencook.com/data/dummy-sales.csv\")\n", | |
"df.query(\"region == 'APAC' and revenue < 300\")\n", | |
"\n", | |
"# Expected result\n", | |
"# date region revenue\n", | |
"# 3 1999-01-06 APAC 135\n", | |
"# 9 1999-01-18 APAC 147\n", | |
"# 11 1999-01-24 APAC 100\n", | |
"# 24 1999-03-20 APAC 108" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999-01-02</td>\n", | |
" <td>APAC</td>\n", | |
" <td>928</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1999-01-16</td>\n", | |
" <td>APAC</td>\n", | |
" <td>970</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>1999-02-16</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>918</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>1999-03-23</td>\n", | |
" <td>AMER</td>\n", | |
" <td>972</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>1999-03-24</td>\n", | |
" <td>AMER</td>\n", | |
" <td>956</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>1999-03-24</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>954</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>1999-03-26</td>\n", | |
" <td>AMER</td>\n", | |
" <td>994</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region revenue\n", | |
"0 1999-01-02 APAC 928\n", | |
"8 1999-01-16 APAC 970\n", | |
"19 1999-02-16 EMEA 918\n", | |
"25 1999-03-23 AMER 972\n", | |
"26 1999-03-24 AMER 956\n", | |
"27 1999-03-24 EMEA 954\n", | |
"29 1999-03-26 AMER 994" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"avg_revenue = df.revenue.mean()\n", | |
"std_revenue = df.revenue.std()\n", | |
"df.query(\"revenue > @avg_revenue + @std_revenue\")\n", | |
"\n", | |
"# Expected result\n", | |
"# date region revenue\n", | |
"# 0 1999-01-02 APAC 928\n", | |
"# 8 1999-01-16 APAC 970\n", | |
"# 19 1999-02-16 EMEA 918\n", | |
"# 25 1999-03-23 AMER 972\n", | |
"# 26 1999-03-24 AMER 956\n", | |
"# 27 1999-03-24 EMEA 954\n", | |
"# 29 1999-03-26 AMER 994" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999-01-02</td>\n", | |
" <td>APAC</td>\n", | |
" <td>928</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1999-01-16</td>\n", | |
" <td>APAC</td>\n", | |
" <td>970</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>1999-02-16</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>918</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>1999-03-23</td>\n", | |
" <td>AMER</td>\n", | |
" <td>972</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td>1999-03-24</td>\n", | |
" <td>AMER</td>\n", | |
" <td>956</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td>1999-03-24</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>954</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td>1999-03-26</td>\n", | |
" <td>AMER</td>\n", | |
" <td>994</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region revenue\n", | |
"0 1999-01-02 APAC 928\n", | |
"8 1999-01-16 APAC 970\n", | |
"19 1999-02-16 EMEA 918\n", | |
"25 1999-03-23 AMER 972\n", | |
"26 1999-03-24 AMER 956\n", | |
"27 1999-03-24 EMEA 954\n", | |
"29 1999-03-26 AMER 994" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.query(\"revenue > revenue.mean() + revenue.std()\")\n", | |
"\n", | |
"# Expected result\n", | |
"# date region revenue\n", | |
"# 0 1999-01-02 APAC 928\n", | |
"# 8 1999-01-16 APAC 970\n", | |
"# 19 1999-02-16 EMEA 918\n", | |
"# 25 1999-03-23 AMER 972\n", | |
"# 26 1999-03-24 AMER 956\n", | |
"# 27 1999-03-24 EMEA 954\n", | |
"# 29 1999-03-26 AMER 994" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999-01-02</td>\n", | |
" <td>APAC</td>\n", | |
" <td>928</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999-01-03</td>\n", | |
" <td>AMER</td>\n", | |
" <td>526</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1999-01-04</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>497</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region revenue\n", | |
"0 1999-01-02 APAC 928\n", | |
"1 1999-01-03 AMER 526\n", | |
"2 1999-01-04 EMEA 497" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"valid_dates = [\"1999-01-02\", \"1999-01-03\", \"1999-01-04\"]\n", | |
"df.query(\"date in @valid_dates\")\n", | |
"\n", | |
"# Expected result\n", | |
"# date region revenue\n", | |
"# 0 1999-01-02 APAC 928\n", | |
"# 1 1999-01-03 AMER 526\n", | |
"# 2 1999-01-04 EMEA 497" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1999-01-06</td>\n", | |
" <td>APAC</td>\n", | |
" <td>135</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>1999-01-18</td>\n", | |
" <td>APAC</td>\n", | |
" <td>147</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>1999-01-24</td>\n", | |
" <td>APAC</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td>1999-03-20</td>\n", | |
" <td>APAC</td>\n", | |
" <td>108</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region revenue\n", | |
"3 1999-01-06 APAC 135\n", | |
"9 1999-01-18 APAC 147\n", | |
"11 1999-01-24 APAC 100\n", | |
"24 1999-03-20 APAC 108" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df[(df.region == \"APAC\") & (df.revenue < 300)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment