Skip to content

Instantly share code, notes, and snippets.

@dthboyd
Forked from heyrudder/caseWhen2.ipynb
Created August 4, 2018 01:31
Show Gist options
  • Save dthboyd/837497be16a725a1ab93e3f0acf4bec6 to your computer and use it in GitHub Desktop.
Save dthboyd/837497be16a725a1ab93e3f0acf4bec6 to your computer and use it in GitHub Desktop.
How to do an IF / CASE statement in Pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"i = 0\n",
"while i < 10:\n",
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['colAB'] = 'A'\n",
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n",
"df.loc[df.col8 > 75, 'colAB'] = None"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['caseWhen'] = 'X'\n",
"#set the column caseWhen to colAB when col1 > 75 and col2 < 75\n",
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n",
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n",
"#set the column caseWhen to 'C' when col6 > 20 and colAB = 'A'\n",
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n",
"#the '~' returns the opposite, e.g. where col4 is not > 70\n",
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n",
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#this block creates new boolean columns indicating that the value is greater than the mean of the column\n",
"#the new column is called 'oldCol_GTxx' where xx is the mean of the column\n",
"for col in df.columns[:5]:\n",
" colMean = df[col].mean()\n",
" newCol = col + '_GT' + str(int(colMean))\n",
" df[newCol] = df[col] > colMean"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"C 472\n",
"E 228\n",
"B 124\n",
"A 84\n",
"X 83\n",
"Name: caseWhen, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.caseWhen.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col5</th>\n",
" <th>col6</th>\n",
" <th>col7</th>\n",
" <th>col8</th>\n",
" <th>col9</th>\n",
" <th>colAB</th>\n",
" <th>caseWhen</th>\n",
" <th>col0_GT50</th>\n",
" <th>col1_GT48</th>\n",
" <th>col2_GT51</th>\n",
" <th>col3_GT49</th>\n",
" <th>col4_GT49</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>43</td>\n",
" <td>75</td>\n",
" <td>78</td>\n",
" <td>91</td>\n",
" <td>37</td>\n",
" <td>68</td>\n",
" <td>51</td>\n",
" <td>62</td>\n",
" <td>14</td>\n",
" <td>98</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>59</td>\n",
" <td>73</td>\n",
" <td>32</td>\n",
" <td>3</td>\n",
" <td>95</td>\n",
" <td>38</td>\n",
" <td>31</td>\n",
" <td>66</td>\n",
" <td>82</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>99</td>\n",
" <td>47</td>\n",
" <td>82</td>\n",
" <td>61</td>\n",
" <td>49</td>\n",
" <td>67</td>\n",
" <td>50</td>\n",
" <td>48</td>\n",
" <td>19</td>\n",
" <td>22</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>39</td>\n",
" <td>92</td>\n",
" <td>49</td>\n",
" <td>89</td>\n",
" <td>9</td>\n",
" <td>35</td>\n",
" <td>51</td>\n",
" <td>17</td>\n",
" <td>45</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20</td>\n",
" <td>45</td>\n",
" <td>54</td>\n",
" <td>40</td>\n",
" <td>78</td>\n",
" <td>56</td>\n",
" <td>53</td>\n",
" <td>94</td>\n",
" <td>45</td>\n",
" <td>2</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>13</td>\n",
" <td>17</td>\n",
" <td>59</td>\n",
" <td>72</td>\n",
" <td>5</td>\n",
" <td>63</td>\n",
" <td>92</td>\n",
" <td>53</td>\n",
" <td>95</td>\n",
" <td>1</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>88</td>\n",
" <td>21</td>\n",
" <td>32</td>\n",
" <td>86</td>\n",
" <td>3</td>\n",
" <td>56</td>\n",
" <td>84</td>\n",
" <td>30</td>\n",
" <td>19</td>\n",
" <td>42</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>40</td>\n",
" <td>81</td>\n",
" <td>84</td>\n",
" <td>21</td>\n",
" <td>65</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>18</td>\n",
" <td>25</td>\n",
" <td>22</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>48</td>\n",
" <td>78</td>\n",
" <td>84</td>\n",
" <td>73</td>\n",
" <td>75</td>\n",
" <td>39</td>\n",
" <td>86</td>\n",
" <td>23</td>\n",
" <td>52</td>\n",
" <td>20</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>30</td>\n",
" <td>24</td>\n",
" <td>7</td>\n",
" <td>81</td>\n",
" <td>20</td>\n",
" <td>35</td>\n",
" <td>88</td>\n",
" <td>86</td>\n",
" <td>8</td>\n",
" <td>46</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n",
"0 43 75 78 91 37 68 51 62 14 98 A C \n",
"1 77 59 73 32 3 95 38 31 66 82 B B \n",
"2 99 47 82 61 49 67 50 48 19 22 B X \n",
"3 5 39 92 49 89 9 35 51 17 45 A C \n",
"4 20 45 54 40 78 56 53 94 45 2 A C \n",
"5 13 17 59 72 5 63 92 53 95 1 None E \n",
"6 88 21 32 86 3 56 84 30 19 42 B X \n",
"7 40 81 84 21 65 30 30 18 25 22 A C \n",
"8 48 78 84 73 75 39 86 23 52 20 A C \n",
"9 30 24 7 81 20 35 88 86 8 46 A C \n",
"\n",
" col0_GT50 col1_GT48 col2_GT51 col3_GT49 col4_GT49 \n",
"0 False True True True False \n",
"1 True True True False False \n",
"2 True False True True False \n",
"3 False False True False True \n",
"4 False False True False True \n",
"5 False False True True False \n",
"6 True False False True False \n",
"7 False True True False True \n",
"8 False True True True True \n",
"9 False False False True False "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment