Skip to content

Instantly share code, notes, and snippets.

View abhijeet-talaulikar's full-sized avatar

Abhijeet Talaulikar abhijeet-talaulikar

View GitHub Profile
!git clone https://github.com/tctianchi/pyvenn.git
%matplotlib inline
from pyvenn import venn
# Get aspect labels
aspects = pd.DataFrame({
"aspect": proba_df.idxmax(axis=1),
"max_proba": proba_df.lookup(proba_df.index, proba_df.idxmax(axis=1))
}).groupby("aspect").sum().reset_index().sort_values("max_proba", ascending=False).head(5).aspect.to_list()
# Get inclusion probabilities
probabilities = gmm.predict_proba(matrix)
proba_df = pd.DataFrame(probabilities, columns = aspect_labels.values())
# Get dominant aspects
fig = px.line_polar(
pd.DataFrame({
"aspect": proba_df.idxmax(axis=1),
"max_proba": proba_df.lookup(proba_df.index, proba_df.idxmax(axis=1))
}).groupby("aspect").sum().reset_index(),
import plotly.express as px
# Best model
gmm = GaussianMixture(n_components=7)
gmm.fit(matrix)
# Get aspect labels and give names
labels = gmm.predict(matrix)
aspect_labels = {
from sklearn.manifold import TSNE
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import GridSearchCV
matrix = np.array(review_data['ada_embedding'].to_list())
# Grid search to find best n_components - number of clusters
components, aic, bic = [], [], []
for i in range(3,11):
import matplotlib.pyplot as plt
import matplotlib
matrix = clean_data['ada_embedding'].to_list()
# Create a t-SNE model and transform the data
tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
vis_dims = tsne.fit_transform(matrix)
x = [x for x,y in vis_dims]
import numpy as np
import pandas as pd
import openai
# Enter your own key in here
openai.api_key = ""
# Load data
data = pd.read_csv("complaints.csv")
def expected_steps(df):
Q = df.drop(
['Null', 'Activation'], axis=1).drop(['Null', 'Activation'], axis=0)
I = np.identity(Q.shape[1])
N = np.linalg.inv(I - Q.to_numpy())
t = np.sum(N, axis=1)
plt.figure(figsize=(10,5))
sns.scatterplot(data=df_scatter, x='Click Activation Rate', y='Activation Rate', s=200, color='#2653de')
for line in range(0, df_scatter.shape[0]):
plt.text(df_scatter['Click Activation Rate'][line]+0.001, df_scatter['Activation Rate'][line],
df_scatter['Channel'][line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
df_scatter = df_multi.copy()
df_scatter['Coverage'] = df_scatter['Channel'].map(
campaign_data.groupby('channel')['customer_id'].nunique().to_dict()
)
df_scatter['Total Clicks'] = df_scatter['Channel'].map(
journeys['path'].apply(lambda x: x[-2]).value_counts().to_dict()
)
df_multi = pd.DataFrame({
'Channel': attributions.keys(),
'Attribution style': 'Journey',
'Activations': attributions.values()
})
df_first = pd.DataFrame({
'Channel': attributions.keys(),
'Attribution style': 'First touchpoint'
})