Created
December 23, 2014 02:34
-
-
Save jackparmer/8d3853fe78e86110d064 to your computer and use it in GitHub Desktop.
messing with pandas in ipython notebooks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "messing with pandas", | |
"signature": "sha256:8de7fd267cc75a71ca8db02685df82aabbd9201f8bcecd52b3870c02fb8813b9" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "import plotly as py\nimport pandas as pd\nimport numpy as np\n\nfrom datetime import datetime #use datetime library functions\nfrom datetime import time\nfrom datetime import date\nfrom datetime import timedelta", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 37 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "#import of doubleclick data report\ndf = pd.read_csv('/Users/jack/Desktop/doubleclickdata.csv', parse_dates=True) \n\n# Remove rows that don't have data in Creative column\ndf = df[df['Creative'].notnull()] \n\n# parse_dates \ndf['Date'] = pd.to_datetime(df['Date'])\n\ndf.set_index(['Date'], inplace=True)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 79 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# confirm index is datetimes\nlist( df.index )[0:4]", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 81, | |
"text": "[Timestamp('2014-06-16 00:00:00', tz=None),\n Timestamp('2014-06-16 00:00:00', tz=None),\n Timestamp('2014-06-16 00:00:00', tz=None),\n Timestamp('2014-06-17 00:00:00', tz=None)]" | |
} | |
], | |
"prompt_number": 81 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "df.head()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Line Item</th>\n <th>Line Item ID</th>\n <th>Line Item Status</th>\n <th>Line Item Integration Code</th>\n <th>Targeted Data Providers</th>\n <th>Creative</th>\n <th>Creative ID</th>\n <th>DFA Placement ID</th>\n <th>Creative Status</th>\n <th>Creative Source</th>\n <th>Creative Integration Code</th>\n <th>Impressions</th>\n <th>Clicks</th>\n <th>Total Conversions</th>\n <th>Post-Click Conversions</th>\n <th>Post-View Conversions</th>\n <th>Click Rate (CTR)</th>\n </tr>\n <tr>\n <th>Date</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2014-06-16</th>\n <td> mbaMission - Site Retargeting</td>\n <td> 1839989</td>\n <td> Active</td>\n <td>NaN</td>\n <td>NaN</td>\n <td> mbaM-Site2-160x600</td>\n <td> 3999035</td>\n <td> 109499360</td>\n <td> Active</td>\n <td> DFA</td>\n <td>NaN</td>\n <td> 5125</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0.00%</td>\n </tr>\n <tr>\n <th>2014-06-16</th>\n <td> mbaMission - Site Retargeting</td>\n <td> 1839989</td>\n <td> Active</td>\n <td>NaN</td>\n <td>NaN</td>\n <td> mbaM-Site2-300x250</td>\n <td> 3999036</td>\n <td> 109503585</td>\n <td> Active</td>\n <td> DFA</td>\n <td>NaN</td>\n <td> 8043</td>\n <td> 1</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0.01%</td>\n </tr>\n <tr>\n <th>2014-06-16</th>\n <td> mbaMission - Site Retargeting</td>\n <td> 1839989</td>\n <td> Active</td>\n <td>NaN</td>\n <td>NaN</td>\n <td> mbaM-Site2-728x90</td>\n <td> 3999037</td>\n <td> 109501465</td>\n <td> Active</td>\n <td> DFA</td>\n <td>NaN</td>\n <td> 9157</td>\n <td> 1</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0.01%</td>\n </tr>\n <tr>\n <th>2014-06-17</th>\n <td> mbaMission - Site Retargeting</td>\n <td> 1839989</td>\n <td> Active</td>\n <td>NaN</td>\n <td>NaN</td>\n <td> mbaM-Site2-160x600</td>\n <td> 3999035</td>\n <td> 109499360</td>\n <td> Active</td>\n <td> DFA</td>\n <td>NaN</td>\n <td> 17255</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0.00%</td>\n </tr>\n <tr>\n <th>2014-06-17</th>\n <td> mbaMission - Site Retargeting</td>\n <td> 1839989</td>\n <td> Active</td>\n <td>NaN</td>\n <td>NaN</td>\n <td> mbaM-Site2-300x250</td>\n <td> 3999036</td>\n <td> 109503585</td>\n <td> Active</td>\n <td> DFA</td>\n <td>NaN</td>\n <td> 29887</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0.00%</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 85, | |
"text": " Line Item Line Item ID Line Item Status \\\nDate \n2014-06-16 mbaMission - Site Retargeting 1839989 Active \n2014-06-16 mbaMission - Site Retargeting 1839989 Active \n2014-06-16 mbaMission - Site Retargeting 1839989 Active \n2014-06-17 mbaMission - Site Retargeting 1839989 Active \n2014-06-17 mbaMission - Site Retargeting 1839989 Active \n\n Line Item Integration Code Targeted Data Providers \\\nDate \n2014-06-16 NaN NaN \n2014-06-16 NaN NaN \n2014-06-16 NaN NaN \n2014-06-17 NaN NaN \n2014-06-17 NaN NaN \n\n Creative Creative ID DFA Placement ID Creative Status \\\nDate \n2014-06-16 mbaM-Site2-160x600 3999035 109499360 Active \n2014-06-16 mbaM-Site2-300x250 3999036 109503585 Active \n2014-06-16 mbaM-Site2-728x90 3999037 109501465 Active \n2014-06-17 mbaM-Site2-160x600 3999035 109499360 Active \n2014-06-17 mbaM-Site2-300x250 3999036 109503585 Active \n\n Creative Source Creative Integration Code Impressions Clicks \\\nDate \n2014-06-16 DFA NaN 5125 0 \n2014-06-16 DFA NaN 8043 1 \n2014-06-16 DFA NaN 9157 1 \n2014-06-17 DFA NaN 17255 0 \n2014-06-17 DFA NaN 29887 0 \n\n Total Conversions Post-Click Conversions Post-View Conversions \\\nDate \n2014-06-16 0 0 0 \n2014-06-16 0 0 0 \n2014-06-16 0 0 0 \n2014-06-17 0 0 0 \n2014-06-17 0 0 0 \n\n Click Rate (CTR) \nDate \n2014-06-16 0.00% \n2014-06-16 0.01% \n2014-06-16 0.01% \n2014-06-17 0.00% \n2014-06-17 0.00% " | |
} | |
], | |
"prompt_number": 85 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# Create a subset of the dataframe. Don't have to include 'Date', since its an index \nad_data = df[['Creative','Impressions','Clicks', 'Total Conversions', 'Post-Click Conversions']]\nad_data.head()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Creative</th>\n <th>Impressions</th>\n <th>Clicks</th>\n <th>Total Conversions</th>\n <th>Post-Click Conversions</th>\n </tr>\n <tr>\n <th>Date</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2014-06-16</th>\n <td> mbaM-Site2-160x600</td>\n <td> 5125</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-06-16</th>\n <td> mbaM-Site2-300x250</td>\n <td> 8043</td>\n <td> 1</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-06-16</th>\n <td> mbaM-Site2-728x90</td>\n <td> 9157</td>\n <td> 1</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-06-17</th>\n <td> mbaM-Site2-160x600</td>\n <td> 17255</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-06-17</th>\n <td> mbaM-Site2-300x250</td>\n <td> 29887</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 105, | |
"text": " Creative Impressions Clicks Total Conversions \\\nDate \n2014-06-16 mbaM-Site2-160x600 5125 0 0 \n2014-06-16 mbaM-Site2-300x250 8043 1 0 \n2014-06-16 mbaM-Site2-728x90 9157 1 0 \n2014-06-17 mbaM-Site2-160x600 17255 0 0 \n2014-06-17 mbaM-Site2-300x250 29887 0 0 \n\n Post-Click Conversions \nDate \n2014-06-16 0 \n2014-06-16 0 \n2014-06-16 0 \n2014-06-17 0 \n2014-06-17 0 " | |
} | |
], | |
"prompt_number": 105 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "list( ad_data.index )[-10:-1]", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 146, | |
"text": "[Timestamp('2014-11-24 00:00:00', tz=None),\n Timestamp('2014-11-24 00:00:00', tz=None),\n Timestamp('2014-11-24 00:00:00', tz=None),\n Timestamp('2014-11-24 00:00:00', tz=None),\n Timestamp('2014-11-25 00:00:00', tz=None),\n Timestamp('2014-11-25 00:00:00', tz=None),\n Timestamp('2014-11-25 00:00:00', tz=None),\n Timestamp('2014-11-25 00:00:00', tz=None),\n Timestamp('2014-11-25 00:00:00', tz=None)]" | |
} | |
], | |
"prompt_number": 146 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "last_measurement_date = ad_data.index[-1]\noffset = (today.weekday()-2) % 7\nprevious_week = last_measurement_date - timedelta(days=offset)\nprint previous_week, type(previous_week), last_measurement_date, type(last_measurement_date)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "2014-11-20 00:00:00 <type 'datetime.datetime'> 2014-11-25 00:00:00 <class 'pandas.tslib.Timestamp'>\n" | |
} | |
], | |
"prompt_number": 147 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# see this SO for slicing pandas df's with datetime indices\n# http://stackoverflow.com/questions/9788299/convenient-slicing-of-dataframes-with-datetime-indexes-in-pandas\n\ndf2 = ad_data.ix[previous_week:last_measurement_date]\ndf2.head()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Creative</th>\n <th>Impressions</th>\n <th>Clicks</th>\n <th>Total Conversions</th>\n <th>Post-Click Conversions</th>\n </tr>\n <tr>\n <th>Date</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2014-11-20</th>\n <td> mbaM-Site2-300x250</td>\n <td> 0</td>\n <td> 0</td>\n <td> 1</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-11-20</th>\n <td> mbaM-Site2-728x90</td>\n <td> 0</td>\n <td> 0</td>\n <td> 1</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-11-20</th>\n <td> mbaM-Nov-Services-Prep-early-160x600</td>\n <td> 1199</td>\n <td> 1</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-11-20</th>\n <td> mbaM-Nov-Services-Prep-early-300x250</td>\n <td> 3518</td>\n <td> 0</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>2014-11-20</th>\n <td> mbaM-Nov-Services-Prep-early-728x90</td>\n <td> 2343</td>\n <td> 2</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 148, | |
"text": " Creative Impressions Clicks \\\nDate \n2014-11-20 mbaM-Site2-300x250 0 0 \n2014-11-20 mbaM-Site2-728x90 0 0 \n2014-11-20 mbaM-Nov-Services-Prep-early-160x600 1199 1 \n2014-11-20 mbaM-Nov-Services-Prep-early-300x250 3518 0 \n2014-11-20 mbaM-Nov-Services-Prep-early-728x90 2343 2 \n\n Total Conversions Post-Click Conversions \nDate \n2014-11-20 1 0 \n2014-11-20 1 0 \n2014-11-20 0 0 \n2014-11-20 0 0 \n2014-11-20 0 0 " | |
} | |
], | |
"prompt_number": 148 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "creative = ad_data.groupby('Creative')\ncreative.sum()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Impressions</th>\n <th>Clicks</th>\n <th>Total Conversions</th>\n <th>Post-Click Conversions</th>\n </tr>\n <tr>\n <th>Creative</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>mbaM-Nov-Services-Prep-early-160x600</th>\n <td> 8430</td>\n <td> 3</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Nov-Services-Prep-early-300x250</th>\n <td> 24613</td>\n <td> 11</td>\n <td> 1</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Nov-Services-Prep-early-728x90</th>\n <td> 16395</td>\n <td> 4</td>\n <td> 0</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Nov-Services-Right-Around-Corner-160x600</th>\n <td> 8305</td>\n <td> 2</td>\n <td> 2</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Nov-Services-Right-Around-Corner-300x250</th>\n <td> 24115</td>\n <td> 11</td>\n <td> 1</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Nov-Services-Right-Around-Corner-728x90</th>\n <td> 19570</td>\n <td> 9</td>\n <td> 2</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Services-Site3-160x600-get-a-head-start</th>\n <td> 39858</td>\n <td> 13</td>\n <td> 1</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Services-Site3-160x600-its-not-too-late</th>\n <td> 16557</td>\n <td> 10</td>\n <td> 4</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Services-Site3-300x250-get-a-head-start</th>\n <td> 127232</td>\n <td> 50</td>\n <td> 18</td>\n <td> 1</td>\n </tr>\n <tr>\n <th>mbaM-Services-Site3-300x250-its-not-too-late</th>\n <td> 56636</td>\n <td> 25</td>\n <td> 8</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Services-Site3-728x90-get-a-head-start</th>\n <td> 86941</td>\n <td> 39</td>\n <td> 11</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Services-Site3-728x90-its-not-too-late</th>\n <td> 39902</td>\n <td> 13</td>\n <td> 6</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Site2-160x600</th>\n <td> 294599</td>\n <td> 736</td>\n <td> 46</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Site2-300x250</th>\n <td> 766177</td>\n <td> 794</td>\n <td> 127</td>\n <td> 0</td>\n </tr>\n <tr>\n <th>mbaM-Site2-728x90</th>\n <td> 612543</td>\n <td> 758</td>\n <td> 101</td>\n <td> 1</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 149, | |
"text": " Impressions Clicks \\\nCreative \nmbaM-Nov-Services-Prep-early-160x600 8430 3 \nmbaM-Nov-Services-Prep-early-300x250 24613 11 \nmbaM-Nov-Services-Prep-early-728x90 16395 4 \nmbaM-Nov-Services-Right-Around-Corner-160x600 8305 2 \nmbaM-Nov-Services-Right-Around-Corner-300x250 24115 11 \nmbaM-Nov-Services-Right-Around-Corner-728x90 19570 9 \nmbaM-Services-Site3-160x600-get-a-head-start 39858 13 \nmbaM-Services-Site3-160x600-its-not-too-late 16557 10 \nmbaM-Services-Site3-300x250-get-a-head-start 127232 50 \nmbaM-Services-Site3-300x250-its-not-too-late 56636 25 \nmbaM-Services-Site3-728x90-get-a-head-start 86941 39 \nmbaM-Services-Site3-728x90-its-not-too-late 39902 13 \nmbaM-Site2-160x600 294599 736 \nmbaM-Site2-300x250 766177 794 \nmbaM-Site2-728x90 612543 758 \n\n Total Conversions \\\nCreative \nmbaM-Nov-Services-Prep-early-160x600 0 \nmbaM-Nov-Services-Prep-early-300x250 1 \nmbaM-Nov-Services-Prep-early-728x90 0 \nmbaM-Nov-Services-Right-Around-Corner-160x600 2 \nmbaM-Nov-Services-Right-Around-Corner-300x250 1 \nmbaM-Nov-Services-Right-Around-Corner-728x90 2 \nmbaM-Services-Site3-160x600-get-a-head-start 1 \nmbaM-Services-Site3-160x600-its-not-too-late 4 \nmbaM-Services-Site3-300x250-get-a-head-start 18 \nmbaM-Services-Site3-300x250-its-not-too-late 8 \nmbaM-Services-Site3-728x90-get-a-head-start 11 \nmbaM-Services-Site3-728x90-its-not-too-late 6 \nmbaM-Site2-160x600 46 \nmbaM-Site2-300x250 127 \nmbaM-Site2-728x90 101 \n\n Post-Click Conversions \nCreative \nmbaM-Nov-Services-Prep-early-160x600 0 \nmbaM-Nov-Services-Prep-early-300x250 0 \nmbaM-Nov-Services-Prep-early-728x90 0 \nmbaM-Nov-Services-Right-Around-Corner-160x600 0 \nmbaM-Nov-Services-Right-Around-Corner-300x250 0 \nmbaM-Nov-Services-Right-Around-Corner-728x90 0 \nmbaM-Services-Site3-160x600-get-a-head-start 0 \nmbaM-Services-Site3-160x600-its-not-too-late 0 \nmbaM-Services-Site3-300x250-get-a-head-start 1 \nmbaM-Services-Site3-300x250-its-not-too-late 0 \nmbaM-Services-Site3-728x90-get-a-head-start 0 \nmbaM-Services-Site3-728x90-its-not-too-late 0 \nmbaM-Site2-160x600 0 \nmbaM-Site2-300x250 0 \nmbaM-Site2-728x90 1 " | |
} | |
], | |
"prompt_number": 149 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment