Skip to content

Instantly share code, notes, and snippets.

@mritchie712
Created February 16, 2017 14:55
Show Gist options
  • Save mritchie712/7c854bbd58feee1a9c054c7270852903 to your computer and use it in GitHub Desktop.
Save mritchie712/7c854bbd58feee1a9c054c7270852903 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"i = 0\n",
"while i < 10:\n",
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['colAB'] = 'A'\n",
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n",
"df.loc[df.col8 > 75, 'colAB'] = None"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['caseWhen'] = 'X'\n",
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n",
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n",
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n",
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n",
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for col in df.columns[:5]:\n",
" colMean = df[col].mean()\n",
" newCol = col + '_GT' + str(int(colMean))\n",
" df[newCol] = df[col] > colMean"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"C 445\n",
"E 248\n",
"B 126\n",
"X 89\n",
"A 76\n",
"Name: caseWhen, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.caseWhen.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col5</th>\n",
" <th>col6</th>\n",
" <th>col7</th>\n",
" <th>col8</th>\n",
" <th>col9</th>\n",
" <th>colAB</th>\n",
" <th>caseWhen</th>\n",
" <th>col0_GT49</th>\n",
" <th>col1_GT50</th>\n",
" <th>col2_GT50</th>\n",
" <th>col3_GT49</th>\n",
" <th>col4_GT50</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>83</td>\n",
" <td>38</td>\n",
" <td>47</td>\n",
" <td>55</td>\n",
" <td>44</td>\n",
" <td>14</td>\n",
" <td>6</td>\n",
" <td>76</td>\n",
" <td>66</td>\n",
" <td>77</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>42</td>\n",
" <td>36</td>\n",
" <td>39</td>\n",
" <td>33</td>\n",
" <td>98</td>\n",
" <td>58</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>A</td>\n",
" <td>X</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>51</td>\n",
" <td>76</td>\n",
" <td>1</td>\n",
" <td>93</td>\n",
" <td>66</td>\n",
" <td>20</td>\n",
" <td>75</td>\n",
" <td>29</td>\n",
" <td>16</td>\n",
" <td>11</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>99</td>\n",
" <td>50</td>\n",
" <td>45</td>\n",
" <td>8</td>\n",
" <td>82</td>\n",
" <td>87</td>\n",
" <td>1</td>\n",
" <td>98</td>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>16</td>\n",
" <td>92</td>\n",
" <td>93</td>\n",
" <td>28</td>\n",
" <td>55</td>\n",
" <td>39</td>\n",
" <td>67</td>\n",
" <td>27</td>\n",
" <td>77</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>53</td>\n",
" <td>42</td>\n",
" <td>89</td>\n",
" <td>56</td>\n",
" <td>91</td>\n",
" <td>31</td>\n",
" <td>55</td>\n",
" <td>40</td>\n",
" <td>90</td>\n",
" <td>50</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>81</td>\n",
" <td>94</td>\n",
" <td>95</td>\n",
" <td>36</td>\n",
" <td>27</td>\n",
" <td>84</td>\n",
" <td>12</td>\n",
" <td>57</td>\n",
" <td>21</td>\n",
" <td>59</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>53</td>\n",
" <td>85</td>\n",
" <td>28</td>\n",
" <td>28</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>74</td>\n",
" <td>16</td>\n",
" <td>91</td>\n",
" <td>82</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>19</td>\n",
" <td>27</td>\n",
" <td>45</td>\n",
" <td>86</td>\n",
" <td>79</td>\n",
" <td>65</td>\n",
" <td>92</td>\n",
" <td>28</td>\n",
" <td>96</td>\n",
" <td>26</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>45</td>\n",
" <td>17</td>\n",
" <td>34</td>\n",
" <td>67</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>69</td>\n",
" <td>83</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n",
"0 83 38 47 55 44 14 6 76 66 77 B B \n",
"1 42 36 39 33 98 58 14 29 3 2 A X \n",
"2 51 76 1 93 66 20 75 29 16 11 A C \n",
"3 99 50 45 8 82 87 1 98 47 1 B X \n",
"4 90 16 92 93 28 55 39 67 27 77 B B \n",
"5 53 42 89 56 91 31 55 40 90 50 None E \n",
"6 81 94 95 36 27 84 12 57 21 59 B B \n",
"7 53 85 28 28 1 42 74 16 91 82 None E \n",
"8 19 27 45 86 79 65 92 28 96 26 None E \n",
"9 90 25 45 17 34 67 7 3 69 83 B B \n",
"\n",
" col0_GT49 col1_GT50 col2_GT50 col3_GT49 col4_GT50 \n",
"0 True False False True False \n",
"1 False False False False True \n",
"2 True True False True True \n",
"3 True False False False True \n",
"4 True False True True False \n",
"5 True False True True True \n",
"6 True True True False False \n",
"7 True True False False False \n",
"8 False False False True True \n",
"9 True False False False False "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment