Skip to content

Instantly share code, notes, and snippets.

@karamanbk
Created May 3, 2019 19:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karamanbk/0f299d59d9b0163cd08dafec7cad613c to your computer and use it in GitHub Desktop.
Save karamanbk/0f299d59d9b0163cd08dafec7cad613c to your computer and use it in GitHub Desktop.
#build 4 clusters for recency and add it to dataframe
kmeans = KMeans(n_clusters=4)
kmeans.fit(tx_user[['Recency']])
tx_user['RecencyCluster'] = kmeans.predict(tx_user[['Recency']])
#function for ordering cluster numbers
def order_cluster(cluster_field_name, target_field_name,df,ascending):
new_cluster_field_name = 'new_' + cluster_field_name
df_new = df.groupby(cluster_field_name)[target_field_name].mean().reset_index()
df_new = df_new.sort_values(by=target_field_name,ascending=ascending).reset_index(drop=True)
df_new['index'] = df_new.index
df_final = pd.merge(df,df_new[[cluster_field_name,'index']], on=cluster_field_name)
df_final = df_final.drop([cluster_field_name],axis=1)
df_final = df_final.rename(columns={"index":cluster_field_name})
return df_final
tx_user = order_cluster('RecencyCluster', 'Recency',tx_user,False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment