Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created July 14, 2021 15:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/f696204e7c3ff9cf5440071b6522d37c to your computer and use it in GitHub Desktop.
Save codecademydev/f696204e7c3ff9cf5440071b6522d37c to your computer and use it in GitHub Desktop.
Codecademy export
import codecademylib
import pandas as pd
visits = pd.read_csv('visits.csv',
parse_dates=[1])
cart = pd.read_csv('cart.csv',
parse_dates=[1])
checkout = pd.read_csv('checkout.csv',
parse_dates=[1])
purchase = pd.read_csv('purchase.csv',
parse_dates=[1])
# print(visits.head(), cart.head(), checkout.head(), purchase.head())
# Calculate percentage of not carting
# vi_cart = pd.merge(visits, cart, how='left')
# null_cart = vi_cart[vi_cart.cart_time.isnull()]
# notcart_percentage = (float(len(null_cart))/len(vi_cart))*100
# print(notcart_percentage)
# # print(vi_cart)
# # Calculate percentage of not checking out
# cart_checkout = pd.merge(cart, checkout, how='left')
# null_checkout = cart_checkout[cart_checkout.checkout_time.isnull()]
# notcheck_percentage = (float(len(null_checkout))/len(cart_checkout))*100
# print(notcheck_percentage)
# # Calculate percentage of not buying
# check_purchase = pd.merge(checkout, purchase, how='left')
# null_purchase = check_purchase[check_purchase.purchase_time.isnull()]
# not_buy_percentage = (float(len(null_purchase))/len(check_purchase))*100
# print(not_buy_percentage)
# Merge all data & drop all duplicates
all_data = visits.merge(cart, how='left').merge(checkout, how='left').merge(purchase, how='left').reset_index(drop=True)
all_data.drop_duplicates(subset='user_id', inplace=True)
print(all_data.info())
# Calculate the percentages
null_cart = all_data[all_data.cart_time.isnull()].reset_index()
null_cart_percentage = (float(len(null_cart))/len(all_data))*100
# print(null_cart)
print(null_cart_percentage)
cart_checkout = pd.merge(all_data[all_data.cart_time.notnull()], all_data[all_data.checkout_time.isnull()], how='left').reset_index()
null_checkout = cart_checkout[cart_checkout.checkout_time.isnull()].reset_index()
null_checkout_percentage = (float(len(null_checkout))/float(len(cart_checkout)))*100
print(null_checkout_percentage)
check_purchase = pd.merge(all_data[all_data.checkout_time.notnull()], all_data[all_data.purchase_time.isnull()], how='left').reset_index()
# print(check_purchase)
null_purchase = check_purchase[check_purchase.purchase_time.isnull()].reset_index()
null_purchase_percentage = (float(len(null_purchase))/float(len(check_purchase)))*100
print(null_purchase_percentage)
# Add new column Time to purchase
all_data['time_to_purchase'] = all_data.purchase_time - all_data.visit_time
# Calculate the average time to finish a purchase
avg_time_to_purchase = all_data.time_to_purchase.mean()
# print(all_data.reset_index(drop=True))
print(avg_time_to_purchase)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment