AloyASen/Customer_Prediction.py

## Customer_Prediction.py
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# this is a test simulated run on a pruned customer data where customer analytics is a branch of modeling the ecommerce business
# this is the analytics for the dataset at https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import pandas as pd


# In[ ]:


# the minimal requirements are added to the repository i guess!! leets see what is in store


# In[2]:


from pathlib import Path
root= Path('data')
df = pd.read_csv(root / 'marketingCustomer.csv')


# In[8]:


# the file object is now loaded into the python compiler
# now find the size of the dataset


# In[9]:


df.shape


# In[10]:


df.head()


# In[ ]:


# now get the analytics on the engaged customers

# Understand how different customers beheave and react to different maketing strategies

# --- starting wwith  the overall engagement rates


# In[11]:


df.groupby('Response').count()


# In[13]:


# just filter out the customer field from the above learning

df.groupby('Response').count()['Customer']


# In[ ]:


# vizualize this in a bar plot


# In[16]:


ax=df.groupby('Response').count()['Customer'].plot(
kind='bar',color='orchid',grid=True, figsize=(10,7), title= 'Marketing Engagement')


# In[19]:


#calculate the percentages of the engaged and non engaged customers

df.groupby('Response').count()['Customer']/df.shape[0]


# In[23]:


#chapter 2

# engagement rates by offer types presented to the customer


# In[25]:


byOfferTypeDF= df.loc[ df['Response']=='Yes'].groupby(['Renew Offer Type']).count()['Customer']


# In[28]:


erateByOfferType= byOfferTypeDF/df.groupby(['Renew Offer Type']).count()['Customer']


# In[30]:


ax=(erateByOfferType*100).plot(kind='bar',figsize=(7,7),color='dodgerblue',grid=True)

ax.set_ylabel('Engagement Rate %')
plt.show()


# In[ ]:


# chapter 3

# classification by offer type

# how customers with different attributes respond to different marketing messages


# In[10]:


byOfferTypeDFrame = df.loc[ df['Response']== 'Yes'].groupby(
    ['Renew Offer Type', 'Vehicle Class']).count()['Customer']/ df.groupby('Renew Offer Type').count()['Customer']


# In[11]:


byOfferTypeDFrame


# In[ ]:


# make the previous output more useful by using the unstack function to tabulate the columnar skewed dataset

# to pivot the data and extract the inner level roups to columns


# In[12]:


byOfferTypeDFrame=byOfferTypeDFrame.unstack().fillna(0)

#print the output

byOfferTypeDFrame


# In[13]:


ax= (byOfferTypeDFrame*100).plot(kind='bar', figsize=(10,7),grid=True)


# In[ ]:


# engagement rates differ by different sales channels


# In[14]:


bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
                    'Sales Channel']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
bySalesChannelDFrame


# In[15]:


ax=(bySalesChannelDFrame*100).plot(
        kind='bar',
        figsize=(7,7),
        color='palegreen',
        grid=True)
ax.set_ylabel('Engagement rate %')
plt.show()


# In[ ]:


# we can see that the agents work better in terms of getting responses from customers

#lets break the results deeper with more customer attributes


# In[17]:


bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
                    'Sales Channel','Vehicle Size']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
# unstack the data into more visible format

bySalesChannelDFrame=bySalesChannelDFrame.unstack().fillna(0)
bySalesChannelDFrame


# In[18]:


ax=(bySalesChannelDFrame*100).plot(
        kind='bar',
        figsize=(10,7),
        grid=True)
ax.set_ylabel('Engagement rate %')
plt.show()


# In[ ]:


# as we can see customers with medium size vehicles resspond the best to all sales channel whereas the
#other customers differs slightly in terms of the engagement rates accross different sales channels


# In[ ]:


# chapter 6

# engagement rates by months since policy inception


# In[4]:


byMonthsSinceInceptionDF=df.loc[df['Response']=='Yes'].groupby(by='Months Since Policy Inception'
                                                              )['Response'].count()/df.groupby(
                                                    by='Months Since Policy Inception')['Response'].count()*100


# In[5]:


byMonthsSinceInceptionDF=byMonthsSinceInceptionDF.fillna(0)

byMonthsSinceInceptionDF


# In[6]:


ax=byMonthsSinceInceptionDF.fillna(0).plot(
        figsize=(10,7),
        title='Engagement rates by months since inception',
        grid=True,
        color='skyblue')

ax.set_xlabel('Months since policy inception')
ax.set_ylabel('Engagement rates in %')

plt.show()


# In[ ]:


# chapter 7

# customer segmentation by customer lifetime value and months since inception


# In[7]:


df['Customer Lifetime Value'].describe()


# In[8]:


df["CLV Segment"] = df['Customer Lifetime Value'].apply(
        lambda x:'High' if x >df['Customer Lifetime Value'].median() else 'Low')


# In[ ]:


#do the same thing for months since policy inception


# In[11]:


df['Months Since Policy Inception'].describe()


# In[12]:


df['Policy Age Segment']=df['Months Since Policy Inception'].apply(
    lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low')
df.head()


# In[19]:


ax = df.loc[
    (df['CLV Segment']=='High') & (df['Policy Age Segment']=='High')
].plot.scatter(
x='Months Since Policy Inception',
y='Customer Lifetime Value',
logy=True,
color='red')
df.loc[
    (df['CLV Segment']=='Low') & (df['Policy Age Segment']=='High')
].plot.scatter(
ax=ax,
x='Months Since Policy Inception',
y='Customer Lifetime Value',
logy=True,
color='blue')

df.loc[
    (df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low')
].plot.scatter( ax=ax, x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange' )

df.loc[
    (df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low')
].plot.scatter( ax=ax, x='Months Since Policy Inception',y='Customer Lifetime Value', logy=True, color='green', grid=True, figsize=(10, 7))

ax.set_ylabel('CLV (in log scale)')
ax.set_xlabel('Months Since Policy Inception')
ax.set_title('Segments by CLV and Policy Age')
plt.show()


# In[20]:


engagementRatesBySegmentDF=df.loc[
    df['Response']=='Yes'
].groupby([
    'CLV Segment', 'Policy Age Segment'
]).count()['Customer']/df.groupby([
    'CLV Segment', 'Policy Age Segment'
]).count()['Customer']

engagementRatesBySegmentDF


# In[22]:


ax = (engagementRatesBySegmentDF.unstack()*100.0).plot( kind='bar', figsize=(10, 7), grid=True )
ax.set_ylabel('Engagement Rate (%)')
ax.set_title('Engagement Rates by Customer Segments')
plt.show()


# In[ ]:


# thank you this s the end of this tutorial
	#!/usr/bin/env python
	# coding: utf-8

	# In[ ]:


	# this is a test simulated run on a pruned customer data where customer analytics is a branch of modeling the ecommerce business
	# this is the analytics for the dataset at https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data

	# In[1]:


	get_ipython().run_line_magic('matplotlib', 'inline')
	import matplotlib.pyplot as plt
	import pandas as pd


	# In[ ]:


	# the minimal requirements are added to the repository i guess!! leets see what is in store


	# In[2]:


	from pathlib import Path
	root= Path('data')
	df = pd.read_csv(root / 'marketingCustomer.csv')


	# In[8]:


	# the file object is now loaded into the python compiler
	# now find the size of the dataset


	# In[9]:


	df.shape


	# In[10]:


	df.head()


	# In[ ]:


	# now get the analytics on the engaged customers

	# Understand how different customers beheave and react to different maketing strategies

	# --- starting wwith the overall engagement rates


	# In[11]:


	df.groupby('Response').count()


	# In[13]:


	# just filter out the customer field from the above learning

	df.groupby('Response').count()['Customer']


	# In[ ]:


	# vizualize this in a bar plot


	# In[16]:


	ax=df.groupby('Response').count()['Customer'].plot(
	kind='bar',color='orchid',grid=True, figsize=(10,7), title= 'Marketing Engagement')


	# In[19]:


	#calculate the percentages of the engaged and non engaged customers

	df.groupby('Response').count()['Customer']/df.shape[0]


	# In[23]:


	#chapter 2

	# engagement rates by offer types presented to the customer


	# In[25]:


	byOfferTypeDF= df.loc[ df['Response']=='Yes'].groupby(['Renew Offer Type']).count()['Customer']


	# In[28]:


	erateByOfferType= byOfferTypeDF/df.groupby(['Renew Offer Type']).count()['Customer']


	# In[30]:


	ax=(erateByOfferType*100).plot(kind='bar',figsize=(7,7),color='dodgerblue',grid=True)

	ax.set_ylabel('Engagement Rate %')
	plt.show()


	# In[ ]:


	# chapter 3

	# classification by offer type

	# how customers with different attributes respond to different marketing messages


	# In[10]:


	byOfferTypeDFrame = df.loc[ df['Response']== 'Yes'].groupby(
	['Renew Offer Type', 'Vehicle Class']).count()['Customer']/ df.groupby('Renew Offer Type').count()['Customer']


	# In[11]:


	byOfferTypeDFrame


	# In[ ]:


	# make the previous output more useful by using the unstack function to tabulate the columnar skewed dataset

	# to pivot the data and extract the inner level roups to columns


	# In[12]:


	byOfferTypeDFrame=byOfferTypeDFrame.unstack().fillna(0)

	#print the output

	byOfferTypeDFrame


	# In[13]:


	ax= (byOfferTypeDFrame*100).plot(kind='bar', figsize=(10,7),grid=True)


	# In[ ]:


	# engagement rates differ by different sales channels


	# In[14]:


	bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
	'Sales Channel']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
	bySalesChannelDFrame


	# In[15]:


	ax=(bySalesChannelDFrame*100).plot(
	kind='bar',
	figsize=(7,7),
	color='palegreen',
	grid=True)
	ax.set_ylabel('Engagement rate %')
	plt.show()


	# In[ ]:


	# we can see that the agents work better in terms of getting responses from customers

	#lets break the results deeper with more customer attributes


	# In[17]:


	bySalesChannelDFrame=df.loc[df['Response']== 'Yes'].groupby([
	'Sales Channel','Vehicle Size']).count()['Customer']/df.groupby('Sales Channel').count()['Customer']
	# unstack the data into more visible format

	bySalesChannelDFrame=bySalesChannelDFrame.unstack().fillna(0)
	bySalesChannelDFrame


	# In[18]:


	ax=(bySalesChannelDFrame*100).plot(
	kind='bar',
	figsize=(10,7),
	grid=True)
	ax.set_ylabel('Engagement rate %')
	plt.show()


	# In[ ]:


	# as we can see customers with medium size vehicles resspond the best to all sales channel whereas the
	#other customers differs slightly in terms of the engagement rates accross different sales channels


	# In[ ]:


	# chapter 6

	# engagement rates by months since policy inception


	# In[4]:


	byMonthsSinceInceptionDF=df.loc[df['Response']=='Yes'].groupby(by='Months Since Policy Inception'
	)['Response'].count()/df.groupby(
	by='Months Since Policy Inception')['Response'].count()*100


	# In[5]:


	byMonthsSinceInceptionDF=byMonthsSinceInceptionDF.fillna(0)

	byMonthsSinceInceptionDF


	# In[6]:


	ax=byMonthsSinceInceptionDF.fillna(0).plot(
	figsize=(10,7),
	title='Engagement rates by months since inception',
	grid=True,
	color='skyblue')

	ax.set_xlabel('Months since policy inception')
	ax.set_ylabel('Engagement rates in %')

	plt.show()


	# In[ ]:


	# chapter 7

	# customer segmentation by customer lifetime value and months since inception


	# In[7]:


	df['Customer Lifetime Value'].describe()


	# In[8]:


	df["CLV Segment"] = df['Customer Lifetime Value'].apply(
	lambda x:'High' if x >df['Customer Lifetime Value'].median() else 'Low')


	# In[ ]:


	#do the same thing for months since policy inception


	# In[11]:


	df['Months Since Policy Inception'].describe()


	# In[12]:


	df['Policy Age Segment']=df['Months Since Policy Inception'].apply(
	lambda x: 'High' if x > df['Months Since Policy Inception'].median() else 'Low')
	df.head()


	# In[19]:


	ax = df.loc[
	(df['CLV Segment']=='High') & (df['Policy Age Segment']=='High')
	].plot.scatter(
	x='Months Since Policy Inception',
	y='Customer Lifetime Value',
	logy=True,
	color='red')
	df.loc[
	(df['CLV Segment']=='Low') & (df['Policy Age Segment']=='High')
	].plot.scatter(
	ax=ax,
	x='Months Since Policy Inception',
	y='Customer Lifetime Value',
	logy=True,
	color='blue')

	df.loc[
	(df['CLV Segment'] == 'High') & (df['Policy Age Segment'] == 'Low')
	].plot.scatter( ax=ax, x='Months Since Policy Inception', y='Customer Lifetime Value', logy=True, color='orange' )

	df.loc[
	(df['CLV Segment'] == 'Low') & (df['Policy Age Segment'] == 'Low')
	].plot.scatter( ax=ax, x='Months Since Policy Inception',y='Customer Lifetime Value', logy=True, color='green', grid=True, figsize=(10, 7))

	ax.set_ylabel('CLV (in log scale)')
	ax.set_xlabel('Months Since Policy Inception')
	ax.set_title('Segments by CLV and Policy Age')
	plt.show()


	# In[20]:


	engagementRatesBySegmentDF=df.loc[
	df['Response']=='Yes'
	].groupby([
	'CLV Segment', 'Policy Age Segment'
	]).count()['Customer']/df.groupby([
	'CLV Segment', 'Policy Age Segment'
	]).count()['Customer']

	engagementRatesBySegmentDF


	# In[22]:


	ax = (engagementRatesBySegmentDF.unstack()*100.0).plot( kind='bar', figsize=(10, 7), grid=True )
	ax.set_ylabel('Engagement Rate (%)')
	ax.set_title('Engagement Rates by Customer Segments')
	plt.show()


	# In[ ]:


	# thank you this s the end of this tutorial