Created
February 24, 2016 01:01
-
-
Save francoiseprovencher/9f2706b883b55612256f to your computer and use it in GitHub Desktop.
Notebook for "How to Measure your E-Commerce Business" at Confoo 2016
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# How to measure your e-commerce business\n", | |
"### Confoo presentation - Françoise Provencher - Feb 24th 2016" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# For fancy looking graphs\n", | |
"from matplotlib import pyplot as plt \n", | |
"plt.style.use('ggplot')\n", | |
"%matplotlib inline\n", | |
"\n", | |
"# For dealing elegantly with unicode and with integer division\n", | |
"from __future__ import unicode_literals\n", | |
"from __future__ import division\n", | |
"\n", | |
"# Pandas is our favorite library for data exploration\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Q1 - What’s my top 2 popular t-shirt colors?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"I sell lots of different t-shirt colors, but to much choice is a bad thing so I want to re-focus my collection to only the best-seller colors. Which are those best-sellers?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Let's read the data and store it into a pandas dataframe\n", | |
"orders = pd.read_csv('data/orders_export.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Name</th>\n", | |
" <th>Email</th>\n", | |
" <th>Financial Status</th>\n", | |
" <th>Paid at</th>\n", | |
" <th>Fulfillment Status</th>\n", | |
" <th>Fulfilled at</th>\n", | |
" <th>Accepts Marketing</th>\n", | |
" <th>Currency</th>\n", | |
" <th>Subtotal</th>\n", | |
" <th>Shipping</th>\n", | |
" <th>...</th>\n", | |
" <th>Tax 1 Name</th>\n", | |
" <th>Tax 1 Value</th>\n", | |
" <th>Tax 2 Name</th>\n", | |
" <th>Tax 2 Value</th>\n", | |
" <th>Tax 3 Name</th>\n", | |
" <th>Tax 3 Value</th>\n", | |
" <th>Tax 4 Name</th>\n", | |
" <th>Tax 4 Value</th>\n", | |
" <th>Tax 5 Name</th>\n", | |
" <th>Tax 5 Value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>#1017</td>\n", | |
" <td>stanley.Kubrick@gmail.com</td>\n", | |
" <td>paid</td>\n", | |
" <td>2016-01-29 16:06:01 -0500</td>\n", | |
" <td>fulfilled</td>\n", | |
" <td>2016-02-03 02:54:26 -0500</td>\n", | |
" <td>no</td>\n", | |
" <td>CAD</td>\n", | |
" <td>35</td>\n", | |
" <td>5</td>\n", | |
" <td>...</td>\n", | |
" <td>GST 5%</td>\n", | |
" <td>1.75</td>\n", | |
" <td>QST 9.975%</td>\n", | |
" <td>3.49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>#1016</td>\n", | |
" <td>alfred.Hitchcock@gmail.com</td>\n", | |
" <td>paid</td>\n", | |
" <td>2016-01-29 15:31:15 -0500</td>\n", | |
" <td>fulfilled</td>\n", | |
" <td>2016-02-02 15:58:06 -0500</td>\n", | |
" <td>no</td>\n", | |
" <td>CAD</td>\n", | |
" <td>40</td>\n", | |
" <td>5</td>\n", | |
" <td>...</td>\n", | |
" <td>GST 5%</td>\n", | |
" <td>1.75</td>\n", | |
" <td>QST 9.975%</td>\n", | |
" <td>3.49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>#1015</td>\n", | |
" <td>alejandro.Inaritu@gmail.com</td>\n", | |
" <td>paid</td>\n", | |
" <td>2016-01-29 14:55:48 -0500</td>\n", | |
" <td>fulfilled</td>\n", | |
" <td>2016-02-01 18:03:12 -0500</td>\n", | |
" <td>no</td>\n", | |
" <td>CAD</td>\n", | |
" <td>40</td>\n", | |
" <td>5</td>\n", | |
" <td>...</td>\n", | |
" <td>GST 5%</td>\n", | |
" <td>1.75</td>\n", | |
" <td>QST 9.975%</td>\n", | |
" <td>3.49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>#1014</td>\n", | |
" <td>ginger.Rogers@gmail.com</td>\n", | |
" <td>paid</td>\n", | |
" <td>NaN</td>\n", | |
" <td>fulfilled</td>\n", | |
" <td>2016-01-19 13:21:23 -0500</td>\n", | |
" <td>no</td>\n", | |
" <td>CAD</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>GST 5%</td>\n", | |
" <td>1.75</td>\n", | |
" <td>QST 9.975%</td>\n", | |
" <td>3.49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>#1013</td>\n", | |
" <td>takashi.Miike@gmail.com</td>\n", | |
" <td>paid</td>\n", | |
" <td>2015-12-17 15:54:02 -0500</td>\n", | |
" <td>fulfilled</td>\n", | |
" <td>2016-01-07 15:21:15 -0500</td>\n", | |
" <td>no</td>\n", | |
" <td>CAD</td>\n", | |
" <td>35</td>\n", | |
" <td>5</td>\n", | |
" <td>...</td>\n", | |
" <td>GST 5%</td>\n", | |
" <td>1.75</td>\n", | |
" <td>QST 9.975%</td>\n", | |
" <td>3.49</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 66 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Name Email Financial Status \\\n", | |
"0 #1017 stanley.Kubrick@gmail.com paid \n", | |
"1 #1016 alfred.Hitchcock@gmail.com paid \n", | |
"2 #1015 alejandro.Inaritu@gmail.com paid \n", | |
"3 #1014 ginger.Rogers@gmail.com paid \n", | |
"4 #1013 takashi.Miike@gmail.com paid \n", | |
"\n", | |
" Paid at Fulfillment Status Fulfilled at \\\n", | |
"0 2016-01-29 16:06:01 -0500 fulfilled 2016-02-03 02:54:26 -0500 \n", | |
"1 2016-01-29 15:31:15 -0500 fulfilled 2016-02-02 15:58:06 -0500 \n", | |
"2 2016-01-29 14:55:48 -0500 fulfilled 2016-02-01 18:03:12 -0500 \n", | |
"3 NaN fulfilled 2016-01-19 13:21:23 -0500 \n", | |
"4 2015-12-17 15:54:02 -0500 fulfilled 2016-01-07 15:21:15 -0500 \n", | |
"\n", | |
" Accepts Marketing Currency Subtotal Shipping ... Tax 1 Name \\\n", | |
"0 no CAD 35 5 ... GST 5% \n", | |
"1 no CAD 40 5 ... GST 5% \n", | |
"2 no CAD 40 5 ... GST 5% \n", | |
"3 no CAD 0 0 ... GST 5% \n", | |
"4 no CAD 35 5 ... GST 5% \n", | |
"\n", | |
" Tax 1 Value Tax 2 Name Tax 2 Value Tax 3 Name Tax 3 Value Tax 4 Name \\\n", | |
"0 1.75 QST 9.975% 3.49 NaN NaN NaN \n", | |
"1 1.75 QST 9.975% 3.49 NaN NaN NaN \n", | |
"2 1.75 QST 9.975% 3.49 NaN NaN NaN \n", | |
"3 1.75 QST 9.975% 3.49 NaN NaN NaN \n", | |
"4 1.75 QST 9.975% 3.49 NaN NaN NaN \n", | |
"\n", | |
" Tax 4 Value Tax 5 Name Tax 5 Value \n", | |
"0 NaN NaN NaN \n", | |
"1 NaN NaN NaN \n", | |
"2 NaN NaN NaN \n", | |
"3 NaN NaN NaN \n", | |
"4 NaN NaN NaN \n", | |
"\n", | |
"[5 rows x 66 columns]" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let'S look at the first 5 rows of the dataframe to have an idea what it looks like\n", | |
"orders.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index([u'Name', u'Email', u'Financial Status', u'Paid at',\n", | |
" u'Fulfillment Status', u'Fulfilled at', u'Accepts Marketing',\n", | |
" u'Currency', u'Subtotal', u'Shipping', u'Taxes', u'Total',\n", | |
" u'Discount Code', u'Discount Amount', u'Shipping Method', u'Created at',\n", | |
" u'Lineitem quantity', u'Lineitem name', u'Lineitem price',\n", | |
" u'Lineitem compare at price', u'Lineitem sku',\n", | |
" u'Lineitem requires shipping', u'Lineitem taxable',\n", | |
" u'Lineitem fulfillment status', u'Billing Name', u'Billing Street',\n", | |
" u'Billing Address1', u'Billing Address2', u'Billing Company',\n", | |
" u'Billing City', u'Billing Zip', u'Billing Province',\n", | |
" u'Billing Country', u'Billing Phone', u'Shipping Name',\n", | |
" u'Shipping Street', u'Shipping Address1', u'Shipping Address2',\n", | |
" u'Shipping Company', u'Shipping City', u'Shipping Zip',\n", | |
" u'Shipping Province', u'Shipping Country', u'Shipping Phone', u'Notes',\n", | |
" u'Note Attributes', u'Cancelled at', u'Payment Method',\n", | |
" u'Payment Reference', u'Refunded Amount', u'Vendor', u'Id', u'Tags',\n", | |
" u'Risk Level', u'Source', u'Lineitem discount', u'Tax 1 Name',\n", | |
" u'Tax 1 Value', u'Tax 2 Name', u'Tax 2 Value', u'Tax 3 Name',\n", | |
" u'Tax 3 Value', u'Tax 4 Name', u'Tax 4 Value', u'Tax 5 Name',\n", | |
" u'Tax 5 Value'],\n", | |
" dtype='object')" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let's look at all the columns\n", | |
"orders.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 p is for momentum - 6-12 months\n", | |
"1 Reciprocate - L / Eggplant / Men\n", | |
"2 Above Average - XL / Sea foam / Men\n", | |
"3 Test\n", | |
"4 Shine on you crazy diamond - Women / Tri-Cranb...\n", | |
"5 Ailleurs - Men / Truffle / S\n", | |
"6 Special request\n", | |
"7 Special request\n", | |
"8 Ailleurs - Men / Eggplant / M\n", | |
"9 Shine on you crazy diamond - M / Slate / Men\n", | |
"10 Ailleurs - Women / Eggplant / M\n", | |
"11 Shine on you crazy diamond - M / Cream / Men\n", | |
"12 Shine on you crazy diamond - M / Slate / Men\n", | |
"13 Quark & Laser - Medium / Sea Foam / Men\n", | |
"14 Shine on you crazy diamond - M / Sea foam / Men\n", | |
"15 Shine on you crazy diamond - S / Mint / Women\n", | |
"16 Reciprocate - S / Eggplant / Women\n", | |
"17 Mainstream - Large / Asphalt\n", | |
"18 Above Average - M / Sea foam / Men\n", | |
"19 Quark & Laser - Medium / Cream\n", | |
"20 Shine on you crazy diamond Bravais lattice - S...\n", | |
"21 Standing on the shoulders of giants - L / Crea...\n", | |
"Name: Lineitem name, dtype: object" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let's look at the data of the column 'Lineitem name'\n", | |
"orders['Lineitem name']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### At this point, I know I want\n", | |
"\n", | |
" **1 - A function that takes in the text of `Lineitem name` and returns a list of variants (if any)**\n", | |
" \n", | |
" **2 - A function that takes in a list of variants and returns the color (if there is one)**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Let's define the first function\n", | |
"def get_variants(lineitem):\n", | |
" try:\n", | |
" return [variant.strip() \n", | |
" for variant \n", | |
" in lineitem.split('-', 1)[1].split('/')]\n", | |
" except:\n", | |
" return []" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 [6-12 months]\n", | |
"1 [L, Eggplant, Men]\n", | |
"2 [XL, Sea foam, Men]\n", | |
"3 []\n", | |
"4 [Women, Tri-Cranberry, S]\n", | |
"5 [Men, Truffle, S]\n", | |
"6 []\n", | |
"7 []\n", | |
"8 [Men, Eggplant, M]\n", | |
"9 [M, Slate, Men]\n", | |
"10 [Women, Eggplant, M]\n", | |
"11 [M, Cream, Men]\n", | |
"12 [M, Slate, Men]\n", | |
"13 [Medium, Sea Foam, Men]\n", | |
"14 [M, Sea foam, Men]\n", | |
"15 [S, Mint, Women]\n", | |
"16 [S, Eggplant, Women]\n", | |
"17 [Large, Asphalt]\n", | |
"18 [M, Sea foam, Men]\n", | |
"19 [Medium, Cream]\n", | |
"20 [S, Sea foam, Men]\n", | |
"21 [L, Cream, Women]\n", | |
"Name: Lineitem name, dtype: object" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let's apply (map) it to every lineitem and make sure it works as expected\n", | |
"orders['Lineitem name'].map(get_variants)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Let's define the 2nd function\n", | |
"def get_color(variants):\n", | |
" sizes_and_models = ['6-12 months', 'Small', \n", | |
" 'Medium', 'Large', 'S', \n", | |
" 'M', 'L', 'XL', '2XL', \n", | |
" 'Men', 'Women']\n", | |
" color = [variant.lower() \n", | |
" for variant in variants\n", | |
" if variant not in sizes_and_models]\n", | |
" try:\n", | |
" return color[0]\n", | |
" except:\n", | |
" return" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 None\n", | |
"1 eggplant\n", | |
"2 sea foam\n", | |
"3 None\n", | |
"4 tri-cranberry\n", | |
"5 truffle\n", | |
"6 None\n", | |
"7 None\n", | |
"8 eggplant\n", | |
"9 slate\n", | |
"10 eggplant\n", | |
"11 cream\n", | |
"12 slate\n", | |
"13 sea foam\n", | |
"14 sea foam\n", | |
"15 mint\n", | |
"16 eggplant\n", | |
"17 asphalt\n", | |
"18 sea foam\n", | |
"19 cream\n", | |
"20 sea foam\n", | |
"21 cream\n", | |
"Name: Lineitem name, dtype: object" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let's apply (map) it to every variant list and make sure it works as expected\n", | |
"orders['Lineitem name'].map(get_variants)\\\n", | |
" .map(get_color)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"sea foam 5\n", | |
"eggplant 4\n", | |
"cream 3\n", | |
"slate 2\n", | |
"mint 1\n", | |
"truffle 1\n", | |
"asphalt 1\n", | |
"tri-cranberry 1\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let's count these up to get he most popular colors\n", | |
"orders['Lineitem name'].map(get_variants)\\\n", | |
" .map(get_color)\\\n", | |
" .value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.axes._subplots.AxesSubplot at 0x10b2611d0>" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAE5CAYAAAC5wmq6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGsBJREFUeJzt3XmUZWV57/FvdzUlg4VKgnZaMSpGxRFQCRGN5YSzXhPr\nMSbGOEQjmivGIWhuDG1yjaBiHBL1CkrEiNeHeBXRoKixiIpRmSICuq6KA7Z95RqQRpm6qfyx9+k+\nXV3Dbqlz9lP6/axVq2vvs+vsZ1X1+Z33vPvd77tmbm4OSVJda/suQJK0NINakoozqCWpOINakooz\nqCWpOINakopb1+WgiPgOcDWwDbgxMw8bZVGSpB26tqjngOnMPKSPkI6I6XGf8+dhnSvLOleWda6c\ncde4O10fa0ZWxfKmezz37pjuu4COpvsuoKPpvgvoaLrvAjqa7ruAjqb7LqCD6XGebHda1J+OiHMj\n4nmjLEiStLOuQX1EZh4CPBZ4UUQ8ZIQ1SZKGrNnduT4i4ljgmsw8od2eZuhjQGYeu4L1SdIvjYh4\nzdDmbGbOQoegjoi9gYnM3BIR+wBnAa/JzLMW+ZG5TZs2dSpq4luXcsNxx3Q6tqvJVx7PtgMPWtHn\n7GpqaootW7b0cu7dYZ0ryzpX1mqocxQ1btiwARa5FthleN7tgA9HxOD49y8R0pKkFbZsUGfmZcDB\nY6hFkrQA70yUpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIM\nakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkq\nzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqzqCWpOIMakkqbl2X\ngyJiAjgXuDwznzjakiRJw7q2qI8GLgHmRliLJGkBywZ1RNwBeBxwErBm5BVJknbSpUX9d8ArgJtG\nXIskaQFLBnVEPAH4UWZegK1pSerFchcTHwQ8KSIeB+wJ7BsRp2TmMwcHRMQ0MD3YzkympqY6nfz6\niU7XMnfLxMQ69u54/q5u3LyJm67YvPxxa9cweVO3bvy1+69nj/Ubbm5pP5fJycnOf6M+WefKss6V\nM6oaI2Lj0OZsZs4CrJmb6xYsEfFQ4OUdRn3Mbdq0qdNzTnzrUm447phOx3Y1+crj2XbgQSv6nKul\nzq6mpqbYsmVLL+feHda5sqxz5Yyixg0bNsAiPRe7O47aUR+SNGad+x4y82zg7BHWIklagHcmSlJx\nBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUk\nFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQ\nS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1Jx65Y7ICL2BM4GbgFMAqdn5qtG\nXZgkqbFsizozrwMelpkHA/cFHhYRDx55ZZIkoGPXR2b+rP12EpgA/nNkFUmSdrJs1wdARKwFzgcO\nBN6RmZeMtCpJ0nZdW9Q3tV0fdwB+OyKmR1qVJGm7Ti3qgcz8SUR8HHgAMAvQhvb00DFMTU11er7r\nJ3br9J1MTKxj747n72q11Hnj5k3cdMXm5Y9bu4bJm+Y6Pefa/dezx/oNN7e0n8vk5GTn/0t9ss6V\ntRrqHFWNEbFxaHM2M2eh26iPXwW2ZuZVEbEX8CjgNYPH2yeaHfqRY7ds2dKpqIltWzsdtzu2bdtK\n1/N3tWrq3Hw5Nxx3zIo+5+Qrj+e6ffp50UxNTa3472gUrHNlrYY6R1Hj1NQUmblxoce6NBV/DXhv\n20+9FnhfZn5mBeuTJC1h2aDOzIuAQ8dQiyRpAd6ZKEnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxB\nLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnF\nGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSSVJxBLUnFGdSS\nVJxBLUnFGdSSVJxBLUnFGdSSVNy65Q6IiAOAU4DbAnPAuzLzraMuTJLU6NKivhH4s8y8F3A48KKI\nOGi0ZUmSBpYN6szcnJkXtt9fA1wKbBh1YZKkxm71UUfEnYBDgC+NpBpJ0i46B3VE3BL4Z+DotmUt\nSRqDZS8mAkTEHsCHgH/KzI/Me2wamB5sZyZTU1OdTn79RKfT75aJiXXs3fH8XVnnytZ54+ZN3HTF\n5uWPW7uGyZvmlj1u7f7r2WN9f71xk5OTnf/P98k6V86oaoyIjUObs5k5C91GfawB3g1ckplvnv94\n+0SzQ7uO3bJlS6eiJrZt7XTc7ti2bStdz9+Vda5wnZsv54bjjlmx55t85fFct09/L+ypqakV/x2N\ngnWunFHUODU1RWZuXOixLk2wI4BnAF+NiAvafa/KzE+sUH2SpCUsG9SZ+Xm8MUaSemMAS1JxBrUk\nFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQ\nS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1Jx\nBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFWdQS1JxBrUkFbduuQMi4j3A44EfZeZ9Rl+S\nJGlYlxb1ycBjRl2IJGlhywZ1Zn4OuHIMtUiSFmAftSQVZ1BLUnHLXkxcTkRMA9OD7cxkamqq089e\nP3GzT7+LiYl17N3x/F1ZZ+06R1HjjZs3cdMVm7sdu3YNkzfNLXvc2v3Xs8f6DTe3tJ3PbZ03t7Sd\nz92xzq41wu7VGREbhzZnM3MWViCo2yeaHdp17JYtWzr97MS2rTf39LvYtm0rXc/flXXWrnMkNW6+\nnBuOO2ZFn3Pylcdz3T4r+4Zinb84dU5NTZGZGxd6bNmuj4j4AHAOcLeI+H5EPHt3C5Uk/fyWbVFn\n5tPHUYgkaWFeTJSk4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak\n4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxq\nSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4tYtd0BE\nPAZ4MzABnJSZx4+8KknSdku2qCNiAvh74DHAPYGnR8RB4yhMktRYruvjMOCbmfmdzLwR+N/Ak0df\nliRpYLmgvj3w/aHty9t9kqQxWS6o58ZShSRpUctdTPwBcMDQ9gE0rertImIamB5sZyYbNmzodvYN\nG+Ah53Y7tk/WubJWQ52roUawzpXWc50RsXFoczYzZwGYm5tb9GtmZmbdzMzMt2ZmZu40MzMzOTMz\nc+HMzMxBS/3MKL5mZmY2jvuc1tn/l3VaZ9Wvcde4ZNdHZm4F/hT4JHAJ8MHMvHQ07yWSpIUsO446\nM88EzhxDLZKkBayWOxNn+y6go9m+C+hotu8COprtu4COZvsuoKPZvgvoaLbvAjqYHefJ1szNObBD\nkipbLS1qSfqlZVBLUnEGtSQVVzaoI+I2EXG/iDh08NV3TfNFxF267OtbRKyLiCdHxNER8bL266V9\n17VaRcTRXfZVERF7913DUiJilxk5F9r3y2zZ4Xl9iIi/AZ4FfBu4aeihh/VS0OI+BBwyb99pwP17\nqGUpZwDXAhex8++zlIjYB3gpcMfMfF5E/AZw98z8WM+lzfcs4C3z9j17gX29iogHAScBU8ABEXEw\n8PzMfGG/le3iSOCYefset8C+3kTEm4B3Z+bFfZy/ZFADTwMOzMwb+i5kIe1Ur/cEbhURvwOsoZkX\nZV9gzz5rW8TtM/O+fRfRwcnAecCD2u1NwD8DJYI6Ip4O/D5w54g4Y+ihKeDH/VS1pDfTTFF8OkBm\nXhgRD+23pB0i4ijghcCBEXHR0ENTwBf6qWpRlwLviog9gPcAH8jMn4zr5FW7Pi4GbtN3EUu4G/BE\n4Fbtv09o/z0UeF6PdS3mrIh4dN9FdHBguzDFDQCZ+dOe65nvHOAE4OvAG9vvTwBeBpT8/Wbm9+bt\n2tpLIQs7leZ181F2vIaeCNw/M/+gz8Lmy8wTM/MI4JnAnYCLIuLUiBjLp/yqLeq/BS6IiK8B17f7\n5jLzST3WtF1mng6cHhEPysxz+q6ng3OAD0fEWuDGdt9cZu7bY00LuT4i9hpsRMSB7Pj79y4zvwt8\nFzi871o6+l5EHAEQEZPAi2lahlVMAFcDL2LeTJ0RsV9m/mcvVS2iXUjlHsBBwBXAfwAvjYgXZObT\nRnnuqkF9CnAc8DV29KlWvDPnmxHxP2jeYQe/y7nMfE5/JS3oTTTh8rXMLNtHDWwEPgHcISJOBY6g\n6Q8uJSJ+l+b/5+1our2g5hvfUTT95renmQnzLJpQrOJ8ln5d33lchSwnIv6OprX/r8BrM/PL7UPH\nR8Q3Rn3+qkF9TWa+te8iOjgd+DfgU9R+Q/kecHHxkCYzz4qI89nRYj06M6/os6ZFvB54QvUJytrf\n3e/3XcdiMvNOfdfQRUSsAa4E7rdId9xvjrqGqkH9uYh4HU3f1faPvpl5fn8lLWivzCxzZXoJlwGf\njYgzaft/aVqAb+qxpl1ExGcy8xEMXTwc2lfJ5sohHRFvW+Lhucx88diK6SgibgP8BkMX4zPz3/qr\naBeRmX+90AOZedWoT141qA+laZnO7wusNjzvYxHx+Mz8eN+FLOOy9muy/RqMUimh7ZfeG9g/IvYb\nemhfai79dm5EfBD4CDu/8f2fHmsattjM96X+7gMR8Tya/vMDgAtoXvdfBB7eZ10DmTkXEedFxGFD\nXR5jVTKoM3O67xo6egnwFxFxA4Uv0mXmxr5rWMafAEcDG2iG5w1sAf6+l4qWdiuacelHzttfJagf\nmZl/GBEvycw3911MB0cDDwS+mJkPi4h7AK/ruab5DgeeERHfBQbdH3PjGvZadva8iHgCzVjl4Y9C\nC3700NIi4rbAn9P8PgejKuYys0SLZSAiXrxKrk2UFhGXAI+kuTA7Pf/xgqMpzs3MB0TEhcDhmXld\nRFySmffsuzbY3kf9EJprPTvJzO+Mo4aSLeqI+F80gfJw4ERgBvhSr0UtYhX0rQG8H/ggzVjVP6EZ\nSVHuIl1mvjUi7s2ub9Cn9FfVDhFxTGYev0gfcKW+33cCnwHuws6fUKDp+qg2zcH329fRR4BPRcSV\nwHf6LWkXb8/Me/d18pJBDTwoM+8TEV/NzNdExAk0rYNSqvetDfmVzDypbbGeDZwdEeVWGm0X9nwo\ncC/g48Bjgc/TDNes4JL23/Mo2Nc75GPtm947MvOovotZTmY+pf12Y0TM0lybKPN6t496cde2//4s\nIm5Pc3vu+h7rWcxq6FuDHRe8NrddSpuoeefnU4H7Aedn5rMj4nY0nwZKyMzBbeMXA3/BzuPnAd47\n7poWMZhv5u59F9JVezPJ7Wjm91lD83rfpauhR732UVcN6jPaj0JvYMeg+BP7LWlB12XmtRFBROyZ\nmV+PiIovjtdGxK1pbnV+G02L5c/6LWlB12bmtojYGhG3An5E82mlmvcDL2fnG7IqmWhvxLpbO0vi\nmqHHKg7L/O/AsTR/721DD92nn4oW1OsUASWDOjP/pv32QxHxMWDPcU6AshtWQ9/acEvwKha4uFTI\nV9rf54k0Q8x+SnP7ezVXZOZH+y5iCb8H/DeaW7SnhvaXHJ5HM3rq7plZcWIroLloGBEPAe6amSdH\nxP7ALcd1/pKjPtp5CY4CfrvdNQu8MzNvXPSHehYR07R9a9Vm/Wtb+W8H1mfmvSLivsCTMvN/9lza\noiLizsC+mfkffdcyX0QcSTPD46epOY4agIh4bGae2Xcdy4mIzwJHFn99b6TtTsrMu7VdstlO1DRy\nJVvUwDtoavsHmlbAH7b7/rjPogbm3ZQx8NX231sCpYY/0bRQX0EzGgCaeak/AJQI6oi4Pwu39NZE\nxKEF70j9I5r+33Xs3PVRKqiBwyLisKHtOagzzDUiXtZ++21gtv30XPXO2afQzD1/HkBm/iAippb+\nkZVTNagfOK+T/jMR8dVFjx6/pSaTqTj8ae/M/FJEANuvYldqvZzA0h/Jq92R+gDgHplZ7+Pozn7K\njt/rXjTDMy9Z/PCxm6Kp73vA9yl652zr+sy8afAaahe5GJuqQb01Iu6amd+E7dNdlplHd7VMJjPk\nioi462AjIp4K/LDHenYyuBM1mlfBJzLz6oj4K5oWTIlW/zzn0Iz17mW1j64y843D2xHxBpoZ9EqY\nf8dsewF5LjOv7qeiJZ3W3t9x64h4PvAcmtVzxqJqUL8C+NeIuKzdvhPNUkeltHcs/Q7wYJqPwJ/P\nzA/3W9WC/hR4F3D3iNhEM+9HqYnZW6/OzIyIB9OMRX8jTd/6yGcn202/BVzY/v8cni+9+io6+1Bw\n7pSIeCDNqin7tttXAc/NzDJj/TPzDe21iS00C4e8OjM/Na7zlwrqiJjJzNNoguRu7BgH+o3MvK6/\nyhb1duBAmv7eNcALIuJRldaka8enHpWZj4iIWwJri7ZYYMfQrCcAJ2bmx6JZP7Oax/RdQBfRLLwx\n6EJYC9wWKNE/Pc97gBdm5ucA2jfq9wDV3vguoulCmmu/H5tSQU1zE8FpwIcy8xCaFRQqexhwz8E8\nzxHxj9TqA6Qdl/zgiFiTmdf0Xc8yfhAR7wIeBRwXEXtScLm4cc3vsAIez44x1FuB/1d0ZMXWQUgD\nZObnI6JMVydARPwx8FfAZ9tdb4uIv87Md4/j/NWC+scR8Sl2XTwUCi3FNeSbwB3ZMXb6ju2+ai6k\nWTrsNOBn7b5yw8mAoGmtviEzr4qIX6PpBtNuioh1wCcz8x5919LB2W3/7wfa7ae1+w6FMvPQ/zlw\nyGCsd0T8Cs10Eb+UQf04mrmo30fTP7nTHVW9VLS0fYFLI+LLNPUdRnPTxhnUemO5Bc1t+MNzkMxR\nbDhZu3rGh4a2f0ihi56rSWZujYhvRMSvZ7PWY2UH0/x/PLbdHoz6OLjdrjDq5/8Dw59Ir2n3jUWp\noG5vFPn3iDgiM3/Udz0d/NUSj1V6Y5mgWdbqStg+DvyEfkvSGOwHXNw2JIbnp6jSgABqzz8/NNb7\nm8CXIuIj7faT2XHvxMiVCuqBVRLSZOZs3zV0dN9BSEMzH3FEHNJnQRqLW7BzPzU06z2WEhFHAyfT\njKg4iWZY5qsy85O9FtYYjPX+Fs2NOYMG2OmMsTFWMqhXi4jYssDunwBfAV6Wmd8ec0mLWRMR+w0m\njG9b1BM916TR26Od1na7aJY9q+a5mfmWiHg0zaeAZ9J0f/Ye1FVWRzKob5630NxRNbgI8ns0w/Uu\noBleNN1PWbs4AfhiRCRN62oGeG2/JWlUIuIo4IXAgRExPIxsCvhCP1UtadDifzzwvsz82uAOwCra\n+XJezs5T245tlaSSQd2+6z+XZgL5wUofc5n5nP6qWtCT5t3k8K6IuDAzj4mIV/VW1TyZeUpEnEdz\nMXEOeEpmlhpGqBV1KnAmcBxwDDuCcEvRGerOi4izaKZeeGVE7Eu96WNPo5lv6CR2jPf/pe/6eB9w\nKc0csK8BntFuV/OziHgazR8RmonvBzfmVLqYSGZeTPFbnrUy2imBf0LzCW81eA7NCI9JmnlU9qfO\nIgwDN2bmO/o6edWgvmtmPjUinpyZ742IU2mWZKrmD2i6P/6h3f53mlUg9qK5bVvS8p5Ls6TdHWjG\n/A+WtKu00PEZEfEimiGtg2kDxrZQcNWgHkx1+JOIuA+wmeZdtpTM/BbN7c4LqfjGIlW0Gpa0exbN\np+SXz9t/53GcvGpQn9iOTPhL4KM0czy/ut+SdhXNatTzuziuBr6Smaf3UJK0GpVf0q7vGTNLrvCy\nWkTEiTQTR51Gc8Hmd2kmlNoP+HZmvqTH8qRVISI+TNNPfTTwCOBKYF1mPq7XwuaJiHvTTG87GOBA\nZp4yjnOXbFFHxHqa4WO3z8zHRMQ9gd8a1wQou+G+wBGZuRUgIt5O0+XxYMY8u5a0WmXmU9pvN0bE\nLO2Sdv1VtKt2Ka6H0oxE+zjwWJrX+liCutzMZK1/pJngfEO7/X+puWr2rdl5gctbAvu1wV1xWlap\ntMyczcyPVlt3lGZE1yOBH2bms4H70bz+x6JqUP9qZn6QdrxiOzVjqWkPW68HLoiIk9spTi8A3tAu\n0/PpXiuTtJKuzcxtNKtP3Qr4EXDAuE5esusDuKadRhCAiDicZlxoNScDN9K09jfSzP61vp0Fzuk5\npV8cX4mI29AsFH0uzSRX54zr5FVb1C8DzgDuEhHn0NwA8+J+S1rQYJmovdpRHlvYMaZa0i+Adsm9\n4zLzysx8J3Ak8EdtF8hYlAzqzDyPpuP+COD5NKuoVFzt5Tcz80XAtbB98Pse/ZYkaQT+ZfBNZl42\n7jwqGdTtatR7ZebXgKcAHxys9lDMDe2ahABExP7Um6NA0s2QmXM085Ec1lcNVfuoh1ejfgTNai/v\npFlBpZK3AR8GbhsRf0tzZfgv+y1J0ggcTjM9xHfZeRGGsSzAW7JFzQKrUVOwSyEz/4lmdrLXAZuA\nJ2dm9luVpBF4NM0Uxg8Hnth+jW2lnKot6lWxGjVAZl5KzZn9JK2c9cAlmXk1QDsV60HsWNh6pEqG\nH81q1J8EjszMq4Db4HA3Sf15JzsvbvvTdt9YONeHJC2jXRDk4Hn7vjquPuqqXR+SVMllEfFimlVe\n1gBH0Sx2OxZVuz4kqZIX0NzX8QPgcppRIM8f18nt+pCk4mxRS9JuiIjzx31Og1qSds+a5Q9ZWQa1\nJO2ef1n+kJVlH7UkFefwPElaRER8ITOPiIhr2HUh67nM3HccddiilqTi7KOWpCVExLqI+HqfNRjU\nkrSEdrHqb0TEr/dVg33UkrS8/YCLI+LL7Dwf9VimOjWoJWl5twAez85jqF8/rpMb1JK0vD0y8+zh\nHRGx17hOblBL0iIi4ijghcCBEXHR0ENTwBfGVYdBLUmLOxU4EziOZtm9QdfHlsz88biKcBy1JBXn\n8DxJKs6glqTiDGpJKs6glqTiDGpJKu6/APiTWlMvGOdKAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10b0c91d0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# We can even visualize the counts as a bar graph\n", | |
"orders['Lineitem name'].map(get_variants)\\\n", | |
" .map(get_color)\\\n", | |
" .value_counts()\\\n", | |
" .plot(kind=\"bar\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"## Q2 -Do people drop out of the checkout funnel at the shipping page?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The main reason why people abandon they online carts is because shipping fees are too high. Other shipping turn-off include not offering shipping to the shopper's country. A good number to diagnose those kinds of problems is the drop-off rate during checkout at the step where customers calculate the shipping rates.\n", | |
"\n", | |
"This is actually easier to do in Google Analytics, but I want to show how to parse unruly CSV and a simple formula for the drop-off rate." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# These are the steps I care about for the purchase funnel \n", | |
"funnel_steps = ['Add to cart',\n", | |
" 'Contact info',\n", | |
" 'Shipping info',\n", | |
" 'Payment info',\n", | |
" 'Place an order']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Let's parse the unruly CSV we downloaded from Google Analytics. \n", | |
"# We don't care about most numbers in there, I just want to get the essential\n", | |
"funnel_data = dict()\n", | |
"with open('data/Goal_Funnel.csv') as f:\n", | |
" for line in f:\n", | |
" cells = line.split(',')\n", | |
" try:\n", | |
" if cells[3] in funnel_steps:\n", | |
" funnel_data[cells[3]] = int(cells[4].strip())\n", | |
" except:\n", | |
" pass\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{u'Add to cart': 100,\n", | |
" u'Contact info': 40,\n", | |
" u'Payment info': 13,\n", | |
" u'Place an order': 12,\n", | |
" u'Shipping info': 35}" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# This dictionary contains the number of people who reached each page\n", | |
"funnel_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"62.8571428571 %\n" | |
] | |
} | |
], | |
"source": [ | |
"# Let's compute the abandonment rate, or drop-off rate, at the shipping step\n", | |
"shipping_abandonment_rate = \\\n", | |
" (funnel_data['Shipping info'] - funnel_data['Payment info'])\\\n", | |
" / funnel_data['Shipping info'] \\\n", | |
" * 100.\n", | |
" \n", | |
"print shipping_abandonment_rate, \"%\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"That is enormous! Next step would be to figure out how I can offer free shipping." | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment