Skip to content

Instantly share code, notes, and snippets.

@JulianNorton
Last active July 12, 2018 16:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JulianNorton/2d49d0529500db56ba262fce0fb1acc3 to your computer and use it in GitHub Desktop.
Save JulianNorton/2d49d0529500db56ba262fce0fb1acc3 to your computer and use it in GitHub Desktop.
retention analysis by cohort
import csv
org_data = open("data2.csv")
data_alpha, data_beta, org_first_seen = list(), list(), list()
with open('data2.csv', 'rb') as csvfile_A:
csvfile_A = csv.reader(csvfile_A)
for item in csvfile_A:
# print(item)
data_alpha.append(item)
def parse_data(data):
parsed_data = list()
# Get the ordered date and Org ID, and Order status
parsed_data = [data[1], data[3], data[4]]
# Convert ordered date to yyyy-mm
parsed_data[0] = parsed_data[0][1:8]
# convert order status to lowercase and delete extra space
parsed_data[1] = parsed_data[1][1:].lower()
return parsed_data
def generate_clean_data(raw_data, clean_data):
clean_data = list()
for item in raw_data:
clean_data.append(parse_data(item))
return clean_data
def add_to_cohort(data, existing_cohort, cohort_date):
order_date = data[0]
order_status = data[1]
org_id = data[2]
if order_date == cohort_date and order_status == 'completed' and org_id not in existing_cohort:
return True
else:
return False
data_beta = generate_clean_data(data_alpha, data_beta)
def generate_cohort(data_beta, cohort, date):
for item in data_beta:
if add_to_cohort(item, cohort, date) == True:
cohort.append(item[2])
april_cohort, may_cohort, june_cohort = list(), list(), list()
generate_cohort(data_beta, april_cohort, '2018-04')
generate_cohort(data_beta, may_cohort, '2018-05')
generate_cohort(data_beta, june_cohort, '2018-06')
print(len(april_cohort))
print(len(may_cohort))
print(len(june_cohort))
returning_orgs_april_may = 0
for item in april_cohort:
if item in may_cohort:
# print(item)
returning_orgs_april_may += 1
# print(returning_orgs_april_may, '!!!')
print('Retention is')
print(float(returning_orgs_april_may)) / float(len(april_cohort))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment