Last active
September 5, 2017 02:27
-
-
Save iaindooley/8818465 to your computer and use it in GitHub Desktop.
Originally published along with a blog post explaining cohort analysis for eCommerce here: http://www.decalmarketing.com/page/The_simplest_explanation_of_cohort_analysis_for_determining_lifetime_customer_value_in_an_eCommerce_business
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/**** | |
* This script presents an "algorithmic" way of doing a cohort analysis | |
* on your eCommerce customer database. | |
* | |
* This can be useful if you want to automate a lifetime value calculation | |
* based on some data available from a CSV file, database or API. | |
* | |
* The specifics of how your eCommerce system stores this information is | |
* abstracted out to a couple of objects with methods such as | |
* "firstSuccessfulOrder". | |
* | |
* For your reference, 2 abstract class definitions appear at the end | |
* of this file that describe the methods and return values required | |
* by the Person and Order class. | |
* | |
* Also, where variables represent these "abstraction points", I have noted | |
* it in the source code. That way, you can relatively simply implement | |
* this for your own eCommerce system by just creating objects that | |
* implement these methods. The bulk of the work here is done in | |
* "plain old procedural" PHP. Even though in a lot of cases I find it | |
* much simpler to work with objects (because keeping track of lots of | |
* arrays and indexes and whatnot can be a strain on the brain!) I wanted | |
* to just have this laid out as "purely" as possible so you can easily see | |
* the "algorithm" behind it. | |
* | |
* This code was first published on the Decal Marketing blog at: | |
* | |
* http://www.decalmarketing.com/blog/ | |
* | |
* and written by Iain Dooley in February 2014. | |
* | |
* You're welcome to re-use or redistribute this code however you see fit | |
* however I'd appreciate it if you could leave this message in-tact | |
* at the top. Any questions, please don't hesitate to email me: | |
* | |
* iain@workingsoftware.com.au | |
* | |
* Enjoy! | |
*/ | |
//First, place each customer into a cohort based on month of first purchase | |
$cohorts = array(); | |
//Here, we assume that we have an array of Person objects that has | |
//been generated by some method specific to your eCommerce system. | |
//Each Person object will have some methods, which have been noted | |
//in the comments below. In order to re-use the code below *verbatim* | |
//all you should need to do is implement the Person class and Order | |
//class as described below, then put them into an array called | |
//$people anywhere above the following line of code. | |
array_filter($people,function($person) use(&$cohorts) | |
{ | |
//The Person object needs to implement a method: | |
// | |
//Person::firstSuccessfulOrder() | |
// | |
//This method should return an object of class Order | |
//which has methods as described below | |
if(is_object($person->firstSuccessfulOrder())) | |
{ | |
//The return value of firstSuccessfulOrder | |
//is an object which has the method orderDate() on it, | |
//for example you might return an object of class | |
//Order which has a method orderDate(). The orderDate() | |
//method needs to return a date/time string in a format | |
//that can be correctly parsed by php.net/strtotime | |
// | |
//NB: we use a unix timestamp for the purchase month | |
//in the array index so we can sort based on | |
//array key | |
$purchase_month = strtotime(date('Y-m-01',strtotime($person->firstSuccessfulOrder()->orderDate()))); | |
if(!isset($cohorts[$purchase_month])) | |
$cohorts[$purchase_month] = array(); | |
$cohorts[$purchase_month][] = $person; | |
} | |
}); | |
//Get into descending order | |
krsort($cohorts); | |
//Now for each cohort create an array with one element per month since the | |
//month of first purchase, and find the average purchase in that month for | |
//the given cohort | |
$avg_month_purchase_by_cohort = array(); | |
$max_months = 0; | |
array_walk($cohorts,function($cohort,$purchase_month) use(&$max_months,&$avg_month_purchase_by_cohort) | |
{ | |
//Start by finding the number of months since first purchase | |
$first_purchase_month_start = date('Y-m-01 00:00:00',$purchase_month); | |
$date_format = 'Y-m-d H:i:s'; | |
$diff = date_diff(date_create_from_format($date_format, $first_purchase_month_start), date_create()); | |
//The date_diff months value here resets as the year ticks over, so need to | |
//add years * 12 to the month value in order to work for more than 1 year ago | |
$months = $diff->m + ($diff->y * 12); | |
//Now define our array to hold averages per month, noting | |
//that our months will start from 0 | |
$current_row = range(0,$months); | |
//If this is the oldest cohort, record that this is the maximum number of months we have | |
//history for. Later we will use this to "pad out" incomplete cohorts with averages | |
//based on past purchase data | |
if(count($current_row) > $max_months) | |
$max_months = count($current_row); | |
array_walk($current_row,function(&$average,$index) use($cohort) | |
{ | |
$current_total = 0; | |
foreach($cohort as $person) | |
//Find total purchases in this month number. | |
//The Person class will need to implement the | |
//method totalPurchasesInMonth() such that it | |
//accepts the number of months since first purchase | |
//as a sort of "offset" | |
$current_total += $person->totalPurchasesInMonth($index); | |
//Average is the total purchases in month $index | |
//divided by number of people in this cohort | |
$average = $current_total/count($cohort); | |
}); | |
//We're building a "matrix" where each row represents a cohort | |
//and each column represents the number of months since first purchase. | |
//Each "cell" is therefore the average amount spent by people in that | |
//cohort in the given month number. | |
$avg_month_purchase_by_cohort[date('Y-m',$purchase_month)] = $current_row; | |
}); | |
//Now we have a "matrix" but the right hand end is "jagged", that is, for those | |
//who first purchased 2 months ago, we don't have a value for any months | |
//beyond the 2nd month, so fill up any "empty" cells with the average for | |
//that month number across all cohorts for which we have data in that month number | |
array_walk($avg_month_purchase_by_cohort,function(&$months,$cohort) use($max_months,&$avg_month_purchase_by_cohort) | |
{ | |
//If there are months for which we have data that aren't recorded for this | |
//cohort | |
if(count($months) < $max_months) | |
{ | |
//Get a range of months for which we don't have data | |
$range = range(count($months),$max_months-1); | |
//Create a copy of our array so we can loop over it below | |
//without modifying the array pointer | |
$averages_orig = $avg_month_purchase_by_cohort; | |
//Now loop over all cohorts for each month number for which | |
//we don't have data, and enter in the average we have for | |
//all cohorts that do have sales data for this month number | |
array_filter($range,function($index) use($averages_orig,&$avg_month_purchase_by_cohort,$cohort) | |
{ | |
$cohort_totals = array(); | |
array_walk($averages_orig,function($months) use(&$cohort_totals,$index) | |
{ | |
if(isset($months[$index])) | |
$cohort_totals[] = $months[$index]; | |
}); | |
if(count($cohort_totals)) | |
$cohort_average_for_month = array_sum($cohort_totals)/count($cohort_totals); | |
else | |
$cohort_average_for_month = 0; | |
//This is the bit where we set the value for future months with | |
//the average based on past months | |
$avg_month_purchase_by_cohort[$cohort][$index] = $cohort_average_for_month; | |
}); | |
} | |
}); | |
//Format everything into a CSV file so we can easily open it in a spreadsheet application | |
//to make a pretty graph or sum totals etc. | |
$header = array('Cohort'); | |
array_walk(range(1,$max_months),function($i) use(&$header) | |
{ | |
$header[] = 'Month '.$i; | |
}); | |
$header[] = '# in Cohort'; | |
$out = array(); | |
$out[] = '"'.implode('","',$header).'"'; | |
array_walk($avg_month_purchase_by_cohort,function($months,$cohort) use(&$out,$cohorts) | |
{ | |
array_unshift($months,$cohort); | |
$months[] = count($cohorts[strtotime($cohort.'-01')]); | |
$out[] = '"'.implode('","',$months).'"'; | |
}); | |
print implode(PHP_EOL,$out); | |
/*** | |
* Below are interfaces for Person and Order, describing what | |
* methods need to be implemented, what arguments they accept | |
* and what the return values are | |
*/ | |
interface Person | |
{ | |
/** | |
* Returns an object of the class Order that represents | |
* the first time this person successfully completed an | |
* order | |
*/ | |
public function firstSuccessfulOrder(); | |
/** | |
* Accepts the "month since first purchase" as an | |
* argument and returns total amount purchased in | |
* that month by this Person. For example, if they | |
* first purchased in January, and the parameter | |
* passed in is 0, you would return the total | |
* spent in January. If the parameter was 1, | |
* you would return the total spent in February. | |
*/ | |
public function totalPurchasesInMonth($index); | |
} | |
interface Order | |
{ | |
/** | |
* Return a string that can be parsed by | |
* php.net/strtotime representing the date | |
* and time of this order | |
*/ | |
public function orderDate(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment