karamanbk

## g1_monthly_revenue.py
#converting the type of Invoice Date Field from string to datetime.
tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate'])

#creating YearMonth field for the ease of reporting and visualization
tx_data['InvoiceYearMonth'] = tx_data['InvoiceDate'].map(lambda date: 100*date.year + date.month)

#calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns
tx_data['Revenue'] = tx_data['UnitPrice'] * tx_data['Quantity']
tx_revenue = tx_data.groupby(['InvoiceYearMonth'])['Revenue'].sum().reset_index()
tx_revenue

## g1_load_data.py
# import libraries
from datetime import datetime, timedelta
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from __future__ import division

import plotly.plotly as py

## g1_visual_revenue.py
#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
plot_data = [
    go.Scatter(
        x=tx_revenue['InvoiceYearMonth'],
        y=tx_revenue['Revenue'],
    )
]

plot_layout = go.Layout(
        xaxis={"type": "category"},

## g1_m_revenue_growth.py
#using pct_change() function to see monthly percentage change
tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change()

#showing first 5 rows
tx_revenue.head()

#visualization - line graph
plot_data = [
    go.Scatter(
        x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'],

## g1_m_active_users.py
#creating a new dataframe with UK customers only
tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)

#creating monthly active customers dataframe by counting unique Customer IDs
tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()

#print the dataframe
tx_monthly_active

#plotting the output

## g1_m_order.py
#create a new dataframe for no. of order by using quantity field
tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index()

#print the dataframe
tx_monthly_sales

#plot
plot_data = [
    go.Bar(
        x=tx_monthly_sales['InvoiceYearMonth'],

## g1_m_avg_rev.py
# create a new dataframe for average revenue by taking the mean of it
tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index()

#print the dataframe
tx_monthly_order_avg

#plot the bar chart
plot_data = [
    go.Bar(
        x=tx_monthly_order_avg['InvoiceYearMonth'],

## g1_new_user.py
#create a dataframe contaning CustomerID and first purchase date
tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index()
tx_min_purchase.columns = ['CustomerID','MinPurchaseDate']
tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month)

#merge first purchase date column to our main dataframe (tx_uk)
tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID')

tx_uk.head()

## g1_new_user_ratio.py
#create a dataframe that shows new user ratio - we also need to drop NA values (first month new user ratio is 0)
tx_user_ratio = tx_uk.query("UserType == 'New'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()/tx_uk.query("UserType == 'Existing'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()
tx_user_ratio = tx_user_ratio.reset_index()
tx_user_ratio = tx_user_ratio.dropna()

#print the dafaframe
tx_user_ratio

#plot the result

## g1_m_retention.py
#identify which users are active by looking at their revenue per month
tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()

#create retention matrix with crosstab
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()

tx_retention.head()

#create an array of dictionary which keeps Retained & Total User count for each month
months = tx_retention.columns[2:]
	#converting the type of Invoice Date Field from string to datetime.
	tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate'])

	#creating YearMonth field for the ease of reporting and visualization
	tx_data['InvoiceYearMonth'] = tx_data['InvoiceDate'].map(lambda date: 100*date.year + date.month)

	#calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns
	tx_data['Revenue'] = tx_data['UnitPrice'] * tx_data['Quantity']
	tx_revenue = tx_data.groupby(['InvoiceYearMonth'])['Revenue'].sum().reset_index()
	tx_revenue
	# import libraries
	from datetime import datetime, timedelta
	import pandas as pd
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division

	import plotly.plotly as py
	#X and Y axis inputs for Plotly graph. We use Scatter for line graphs
	plot_data = [
	go.Scatter(
	x=tx_revenue['InvoiceYearMonth'],
	y=tx_revenue['Revenue'],
	)
	]

	plot_layout = go.Layout(
	xaxis={"type": "category"},
	#using pct_change() function to see monthly percentage change
	tx_revenue['MonthlyGrowth'] = tx_revenue['Revenue'].pct_change()

	#showing first 5 rows
	tx_revenue.head()

	#visualization - line graph
	plot_data = [
	go.Scatter(
	x=tx_revenue.query("InvoiceYearMonth < 201112")['InvoiceYearMonth'],
	#creating a new dataframe with UK customers only
	tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)

	#creating monthly active customers dataframe by counting unique Customer IDs
	tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()

	#print the dataframe
	tx_monthly_active

	#plotting the output
	#create a new dataframe for no. of order by using quantity field
	tx_monthly_sales = tx_uk.groupby('InvoiceYearMonth')['Quantity'].sum().reset_index()

	#print the dataframe
	tx_monthly_sales

	#plot
	plot_data = [
	go.Bar(
	x=tx_monthly_sales['InvoiceYearMonth'],
	# create a new dataframe for average revenue by taking the mean of it
	tx_monthly_order_avg = tx_uk.groupby('InvoiceYearMonth')['Revenue'].mean().reset_index()

	#print the dataframe
	tx_monthly_order_avg

	#plot the bar chart
	plot_data = [
	go.Bar(
	x=tx_monthly_order_avg['InvoiceYearMonth'],
	#create a dataframe contaning CustomerID and first purchase date
	tx_min_purchase = tx_uk.groupby('CustomerID').InvoiceDate.min().reset_index()
	tx_min_purchase.columns = ['CustomerID','MinPurchaseDate']
	tx_min_purchase['MinPurchaseYearMonth'] = tx_min_purchase['MinPurchaseDate'].map(lambda date: 100*date.year + date.month)

	#merge first purchase date column to our main dataframe (tx_uk)
	tx_uk = pd.merge(tx_uk, tx_min_purchase, on='CustomerID')

	tx_uk.head()
	#create a dataframe that shows new user ratio - we also need to drop NA values (first month new user ratio is 0)
	tx_user_ratio = tx_uk.query("UserType == 'New'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()/tx_uk.query("UserType == 'Existing'").groupby(['InvoiceYearMonth'])['CustomerID'].nunique()
	tx_user_ratio = tx_user_ratio.reset_index()
	tx_user_ratio = tx_user_ratio.dropna()

	#print the dafaframe
	tx_user_ratio

	#plot the result
	#identify which users are active by looking at their revenue per month
	tx_user_purchase = tx_uk.groupby(['CustomerID','InvoiceYearMonth'])['Revenue'].sum().reset_index()

	#create retention matrix with crosstab
	tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()

	tx_retention.head()

	#create an array of dictionary which keeps Retained & Total User count for each month
	months = tx_retention.columns[2:]