Skip to content

Instantly share code, notes, and snippets.

@mritchie712
Created February 16, 2017 14:55
Show Gist options
  • Save mritchie712/7c854bbd58feee1a9c054c7270852903 to your computer and use it in GitHub Desktop.
Save mritchie712/7c854bbd58feee1a9c054c7270852903 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"i = 0\n",
"while i < 10:\n",
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['colAB'] = 'A'\n",
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n",
"df.loc[df.col8 > 75, 'colAB'] = None"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['caseWhen'] = 'X'\n",
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n",
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n",
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n",
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n",
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for col in df.columns[:5]:\n",
" colMean = df[col].mean()\n",
" newCol = col + '_GT' + str(int(colMean))\n",
" df[newCol] = df[col] > colMean"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"C 445\n",
"E 248\n",
"B 126\n",
"X 89\n",
"A 76\n",
"Name: caseWhen, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.caseWhen.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col5</th>\n",
" <th>col6</th>\n",
" <th>col7</th>\n",
" <th>col8</th>\n",
" <th>col9</th>\n",
" <th>colAB</th>\n",
" <th>caseWhen</th>\n",
" <th>col0_GT49</th>\n",
" <th>col1_GT50</th>\n",
" <th>col2_GT50</th>\n",
" <th>col3_GT49</th>\n",
" <th>col4_GT50</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>83</td>\n",
" <td>38</td>\n",
" <td>47</td>\n",
" <td>55</td>\n",
" <td>44</td>\n",
" <td>14</td>\n",
" <td>6</td>\n",
" <td>76</td>\n",
" <td>66</td>\n",
" <td>77</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>42</td>\n",
" <td>36</td>\n",
" <td>39</td>\n",
" <td>33</td>\n",
" <td>98</td>\n",
" <td>58</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>A</td>\n",
" <td>X</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>51</td>\n",
" <td>76</td>\n",
" <td>1</td>\n",
" <td>93</td>\n",
" <td>66</td>\n",
" <td>20</td>\n",
" <td>75</td>\n",
" <td>29</td>\n",
" <td>16</td>\n",
" <td>11</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>99</td>\n",
" <td>50</td>\n",
" <td>45</td>\n",
" <td>8</td>\n",
" <td>82</td>\n",
" <td>87</td>\n",
" <td>1</td>\n",
" <td>98</td>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>16</td>\n",
" <td>92</td>\n",
" <td>93</td>\n",
" <td>28</td>\n",
" <td>55</td>\n",
" <td>39</td>\n",
" <td>67</td>\n",
" <td>27</td>\n",
" <td>77</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>53</td>\n",
" <td>42</td>\n",
" <td>89</td>\n",
" <td>56</td>\n",
" <td>91</td>\n",
" <td>31</td>\n",
" <td>55</td>\n",
" <td>40</td>\n",
" <td>90</td>\n",
" <td>50</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>81</td>\n",
" <td>94</td>\n",
" <td>95</td>\n",
" <td>36</td>\n",
" <td>27</td>\n",
" <td>84</td>\n",
" <td>12</td>\n",
" <td>57</td>\n",
" <td>21</td>\n",
" <td>59</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>53</td>\n",
" <td>85</td>\n",
" <td>28</td>\n",
" <td>28</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>74</td>\n",
" <td>16</td>\n",
" <td>91</td>\n",
" <td>82</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>19</td>\n",
" <td>27</td>\n",
" <td>45</td>\n",
" <td>86</td>\n",
" <td>79</td>\n",
" <td>65</td>\n",
" <td>92</td>\n",
" <td>28</td>\n",
" <td>96</td>\n",
" <td>26</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>45</td>\n",
" <td>17</td>\n",
" <td>34</td>\n",
" <td>67</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>69</td>\n",
" <td>83</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n",
"0 83 38 47 55 44 14 6 76 66 77 B B \n",
"1 42 36 39 33 98 58 14 29 3 2 A X \n",
"2 51 76 1 93 66 20 75 29 16 11 A C \n",
"3 99 50 45 8 82 87 1 98 47 1 B X \n",
"4 90 16 92 93 28 55 39 67 27 77 B B \n",
"5 53 42 89 56 91 31 55 40 90 50 None E \n",
"6 81 94 95 36 27 84 12 57 21 59 B B \n",
"7 53 85 28 28 1 42 74 16 91 82 None E \n",
"8 19 27 45 86 79 65 92 28 96 26 None E \n",
"9 90 25 45 17 34 67 7 3 69 83 B B \n",
"\n",
" col0_GT49 col1_GT50 col2_GT50 col3_GT49 col4_GT50 \n",
"0 True False False True False \n",
"1 False False False False True \n",
"2 True True False True True \n",
"3 True False False False True \n",
"4 True False True True False \n",
"5 True False True True True \n",
"6 True True True False False \n",
"7 True True False False False \n",
"8 False False False True True \n",
"9 True False False False False "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"i = 0\n",
"while i < 10:\n",
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['colAB'] = 'A'\n",
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n",
"df.loc[df.col8 > 75, 'colAB'] = None"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['caseWhen'] = 'X'\n",
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n",
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n",
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n",
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n",
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for col in df.columns[:5]:\n",
" colMean = df[col].mean()\n",
" newCol = col + '_GT' + str(int(colMean))\n",
" df[newCol] = df[col] > colMean"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"C 445\n",
"E 248\n",
"B 126\n",
"X 89\n",
"A 76\n",
"Name: caseWhen, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.caseWhen.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>col0</th>\n",
" <th>col1</th>\n",
" <th>col2</th>\n",
" <th>col3</th>\n",
" <th>col4</th>\n",
" <th>col5</th>\n",
" <th>col6</th>\n",
" <th>col7</th>\n",
" <th>col8</th>\n",
" <th>col9</th>\n",
" <th>colAB</th>\n",
" <th>caseWhen</th>\n",
" <th>col0_GT49</th>\n",
" <th>col1_GT50</th>\n",
" <th>col2_GT50</th>\n",
" <th>col3_GT49</th>\n",
" <th>col4_GT50</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>83</td>\n",
" <td>38</td>\n",
" <td>47</td>\n",
" <td>55</td>\n",
" <td>44</td>\n",
" <td>14</td>\n",
" <td>6</td>\n",
" <td>76</td>\n",
" <td>66</td>\n",
" <td>77</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>42</td>\n",
" <td>36</td>\n",
" <td>39</td>\n",
" <td>33</td>\n",
" <td>98</td>\n",
" <td>58</td>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>A</td>\n",
" <td>X</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>51</td>\n",
" <td>76</td>\n",
" <td>1</td>\n",
" <td>93</td>\n",
" <td>66</td>\n",
" <td>20</td>\n",
" <td>75</td>\n",
" <td>29</td>\n",
" <td>16</td>\n",
" <td>11</td>\n",
" <td>A</td>\n",
" <td>C</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>99</td>\n",
" <td>50</td>\n",
" <td>45</td>\n",
" <td>8</td>\n",
" <td>82</td>\n",
" <td>87</td>\n",
" <td>1</td>\n",
" <td>98</td>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>B</td>\n",
" <td>X</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>16</td>\n",
" <td>92</td>\n",
" <td>93</td>\n",
" <td>28</td>\n",
" <td>55</td>\n",
" <td>39</td>\n",
" <td>67</td>\n",
" <td>27</td>\n",
" <td>77</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>53</td>\n",
" <td>42</td>\n",
" <td>89</td>\n",
" <td>56</td>\n",
" <td>91</td>\n",
" <td>31</td>\n",
" <td>55</td>\n",
" <td>40</td>\n",
" <td>90</td>\n",
" <td>50</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>81</td>\n",
" <td>94</td>\n",
" <td>95</td>\n",
" <td>36</td>\n",
" <td>27</td>\n",
" <td>84</td>\n",
" <td>12</td>\n",
" <td>57</td>\n",
" <td>21</td>\n",
" <td>59</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>53</td>\n",
" <td>85</td>\n",
" <td>28</td>\n",
" <td>28</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>74</td>\n",
" <td>16</td>\n",
" <td>91</td>\n",
" <td>82</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>19</td>\n",
" <td>27</td>\n",
" <td>45</td>\n",
" <td>86</td>\n",
" <td>79</td>\n",
" <td>65</td>\n",
" <td>92</td>\n",
" <td>28</td>\n",
" <td>96</td>\n",
" <td>26</td>\n",
" <td>None</td>\n",
" <td>E</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>90</td>\n",
" <td>25</td>\n",
" <td>45</td>\n",
" <td>17</td>\n",
" <td>34</td>\n",
" <td>67</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>69</td>\n",
" <td>83</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n",
"0 83 38 47 55 44 14 6 76 66 77 B B \n",
"1 42 36 39 33 98 58 14 29 3 2 A X \n",
"2 51 76 1 93 66 20 75 29 16 11 A C \n",
"3 99 50 45 8 82 87 1 98 47 1 B X \n",
"4 90 16 92 93 28 55 39 67 27 77 B B \n",
"5 53 42 89 56 91 31 55 40 90 50 None E \n",
"6 81 94 95 36 27 84 12 57 21 59 B B \n",
"7 53 85 28 28 1 42 74 16 91 82 None E \n",
"8 19 27 45 86 79 65 92 28 96 26 None E \n",
"9 90 25 45 17 34 67 7 3 69 83 B B \n",
"\n",
" col0_GT49 col1_GT50 col2_GT50 col3_GT49 col4_GT50 \n",
"0 True False False True False \n",
"1 False False False False True \n",
"2 True True False True True \n",
"3 True False False False True \n",
"4 True False True True False \n",
"5 True False True True True \n",
"6 True True True False False \n",
"7 True True False False False \n",
"8 False False False True True \n",
"9 True False False False False "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment