Skip to content

Instantly share code, notes, and snippets.

@toandaominh1997
Created August 6, 2021 02:28
Show Gist options
  • Save toandaominh1997/a05c80bf907beed85fcfeb5e39be9025 to your computer and use it in GitHub Desktop.
Save toandaominh1997/a05c80bf907beed85fcfeb5e39be9025 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/toandm2/devtools/anaconda3/lib/python3.8/site-packages/google/auth/_default.py:70: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. We recommend you rerun `gcloud auth application-default login` and make sure a quota project is added. Or you can use service accounts instead. For more information about service accounts, see https://cloud.google.com/docs/authentication/\n",
" warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n",
"/home/toandm2/devtools/anaconda3/lib/python3.8/site-packages/google/auth/_default.py:70: UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a \"quota exceeded\" or \"API not enabled\" error. We recommend you rerun `gcloud auth application-default login` and make sure a quota project is added. Or you can use service accounts instead. For more information about service accounts, see https://cloud.google.com/docs/authentication/\n",
" warnings.warn(_CLOUD_SDK_CREDENTIALS_WARNING)\n"
]
}
],
"source": [
"from cloudservice import CloudService\n",
"import pandas as pd \n",
"import numpy as np\n",
"\n",
"from sklearn.pipeline import Pipeline\n",
"from pipelineservice.compose import columnTransformer\n",
"from pipelineservice.impute import simpleImputer\n",
"from pipelineservice.preprocessing import robustScaler, quantileTransformer\n",
"from sklearn.preprocessing import MinMaxScaler, Normalizer\n",
"import category_encoders as ce\n",
"from sklearn.cluster import KMeans, SpectralClustering, AgglomerativeClustering, Birch\n",
"from sklearn.mixture import GaussianMixture\n",
"from sklearn.metrics import silhouette_score\n",
"\n",
"from tqdm.auto import tqdm, trange\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"pd.set_option('display.max_columns', 500)\n",
"\n",
"sns.set_style('whitegrid')\n",
"\n",
"pd.set_option('display.float_format', lambda x: '%.3f' % x)\n",
"\n",
"query_string = \"\"\"\n",
"select \n",
"mkt.*,\n",
"gmv.* except(user_id),\n",
"points.* except(user_id),\n",
"from `vinid-data-science-prod.P17S_REPORT.CUSTOMER_TIERING_FEATURES_TEMP` mkt\n",
"left join `vinid-data-science-prod.P17S_REPORT.CUSTOMER_TIERING_GMV_TEMP` gmv on mkt.user_id = gmv.user_id\n",
"left join `vinid-data-science-prod.P17S_REPORT.CUSTOMER_TIERING_POINTS_TEMP` points on mkt.user_id = points.user_id\n",
"\"\"\"\n",
"\n",
"cloud = CloudService(project = 'vinid-data-science-prod')\n",
"# df = cloud.read_gbq(query_string)\n",
"\n",
"# df.to_csv('tiering.csv', index = False)\n",
"\n",
"df = pd.read_csv('tiering.csv')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>churned_or_not_T30</th>\n",
" <th>churned_or_not_T60</th>\n",
" <th>churned_or_not_T90</th>\n",
" <th>churn_magnitude</th>\n",
" <th>used_vinpay_or_not</th>\n",
" <th>from_card_to_app_or_not</th>\n",
" <th>promotion_sensitive_score</th>\n",
" <th>three_month_gmv</th>\n",
" <th>six_month_gmv</th>\n",
" <th>one_year_gmv</th>\n",
" <th>three_month_total_txn</th>\n",
" <th>six_month_total_txn</th>\n",
" <th>one_year_total_txn</th>\n",
" <th>three_month_vinmart_gmv</th>\n",
" <th>three_month_vinmart_total_txn</th>\n",
" <th>three_month_mm_gmv</th>\n",
" <th>three_month_mm_total_txn</th>\n",
" <th>three_month_bill_gmv</th>\n",
" <th>three_month_bill_total_txn</th>\n",
" <th>three_month_ticket_gmv</th>\n",
" <th>three_month_ticket_total_txn</th>\n",
" <th>three_month_vietlot_gmv</th>\n",
" <th>three_month_vietlot_total_txn</th>\n",
" <th>three_month_vinhome_gmv</th>\n",
" <th>three_month_vinhome_total_txn</th>\n",
" <th>three_month_giup_viec_gmv</th>\n",
" <th>three_month_giup_viec_total_txn</th>\n",
" <th>three_month_insurance_gmv</th>\n",
" <th>three_month_insurance_total_txn</th>\n",
" <th>three_month_donation_gmv</th>\n",
" <th>three_month_donation_total_txn</th>\n",
" <th>three_month_pay_gmv</th>\n",
" <th>three_month_pay_total_txn</th>\n",
" <th>three_month_top_up_gmv</th>\n",
" <th>three_month_top_up_txn</th>\n",
" <th>three_month_other_PNLs_gmv</th>\n",
" <th>six_month_vinmart_gmv</th>\n",
" <th>six_month_vinmart_total_txn</th>\n",
" <th>six_month_mm_gmv</th>\n",
" <th>six_month_mm_total_txn</th>\n",
" <th>six_month_bill_gmv</th>\n",
" <th>six_month_bill_total_txn</th>\n",
" <th>six_month_ticket_gmv</th>\n",
" <th>six_month_ticket_total_txn</th>\n",
" <th>six_month_vietlot_gmv</th>\n",
" <th>six_month_vietlot_total_txn</th>\n",
" <th>six_month_vinhome_gmv</th>\n",
" <th>six_month_vinhome_total_txn</th>\n",
" <th>six_month_giup_viec_gmv</th>\n",
" <th>six_month_giup_viec_total_txn</th>\n",
" <th>six_month_insurance_gmv</th>\n",
" <th>six_month_insurance_total_txn</th>\n",
" <th>six_month_donation_gmv</th>\n",
" <th>six_month_donation_total_txn</th>\n",
" <th>six_month_pay_gmv</th>\n",
" <th>six_month_pay_total_txn</th>\n",
" <th>six_month_top_up_gmv</th>\n",
" <th>six_month_top_up_txn</th>\n",
" <th>six_month_other_PNLs_gmv</th>\n",
" <th>one_year_vinmart_gmv</th>\n",
" <th>one_year_vinmart_total_txn</th>\n",
" <th>one_year_mm_gmv</th>\n",
" <th>one_year_mm_total_txn</th>\n",
" <th>one_year_bill_gmv</th>\n",
" <th>one_year_bill_total_txn</th>\n",
" <th>one_year_ticket_gmv</th>\n",
" <th>one_year_ticket_total_txn</th>\n",
" <th>one_year_vietlot_gmv</th>\n",
" <th>one_year_vietlot_total_txn</th>\n",
" <th>one_year_vinhome_gmv</th>\n",
" <th>one_year_vinhome_total_txn</th>\n",
" <th>one_year_giup_viec_gmv</th>\n",
" <th>one_year_giup_viec_total_txn</th>\n",
" <th>one_year_insurance_gmv</th>\n",
" <th>one_year_insurance_total_txn</th>\n",
" <th>one_year_donation_gmv</th>\n",
" <th>one_year_donation_total_txn</th>\n",
" <th>one_year_pay_gmv</th>\n",
" <th>one_year_pay_total_txn</th>\n",
" <th>one_year_top_up_gmv</th>\n",
" <th>one_year_top_up_txn</th>\n",
" <th>one_year_other_PNLs_gmv</th>\n",
" <th>one_year_other_PNLs_txn</th>\n",
" <th>three_month_point</th>\n",
" <th>six_month_point</th>\n",
" <th>one_year_point</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10852107</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.572</td>\n",
" <td>317400.000</td>\n",
" <td>317400.000</td>\n",
" <td>317400.000</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>317400.000</td>\n",
" <td>4.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>4.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>4.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>300.000</td>\n",
" <td>300.000</td>\n",
" <td>300.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10690027</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.370</td>\n",
" <td>0.000</td>\n",
" <td>323900.000</td>\n",
" <td>323900.000</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>2.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>2.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>300.000</td>\n",
" <td>300.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10469664</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.729</td>\n",
" <td>296100.000</td>\n",
" <td>296100.000</td>\n",
" <td>473809.000</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>296100.000</td>\n",
" <td>5.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>5.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>7.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>300.000</td>\n",
" <td>300.000</td>\n",
" <td>500.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10688202</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.702</td>\n",
" <td>793700.000</td>\n",
" <td>995900.000</td>\n",
" <td>995900.000</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>793700.000</td>\n",
" <td>7.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>9.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>9.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>700.000</td>\n",
" <td>900.000</td>\n",
" <td>900.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10543784</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.781</td>\n",
" <td>0.000</td>\n",
" <td>495600.000</td>\n",
" <td>740300.000</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>2.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>4.000</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>nan</td>\n",
" <td>500.000</td>\n",
" <td>700.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id churned_or_not_T30 churned_or_not_T60 churned_or_not_T90 \\\n",
"0 10852107 1 1 1 \n",
"1 10690027 1 1 1 \n",
"2 10469664 1 1 1 \n",
"3 10688202 1 1 1 \n",
"4 10543784 1 1 1 \n",
"\n",
" churn_magnitude used_vinpay_or_not from_card_to_app_or_not \\\n",
"0 nan 0 0 \n",
"1 nan 0 0 \n",
"2 nan 0 0 \n",
"3 nan 0 0 \n",
"4 nan 0 0 \n",
"\n",
" promotion_sensitive_score three_month_gmv six_month_gmv one_year_gmv \\\n",
"0 0.572 317400.000 317400.000 317400.000 \n",
"1 0.370 0.000 323900.000 323900.000 \n",
"2 0.729 296100.000 296100.000 473809.000 \n",
"3 0.702 793700.000 995900.000 995900.000 \n",
"4 0.781 0.000 495600.000 740300.000 \n",
"\n",
" three_month_total_txn six_month_total_txn one_year_total_txn \\\n",
"0 4 4 4 \n",
"1 0 2 2 \n",
"2 5 5 7 \n",
"3 7 9 9 \n",
"4 0 2 4 \n",
"\n",
" three_month_vinmart_gmv three_month_vinmart_total_txn three_month_mm_gmv \\\n",
"0 317400.000 4.000 nan \n",
"1 nan nan nan \n",
"2 296100.000 5.000 nan \n",
"3 793700.000 7.000 nan \n",
"4 nan nan nan \n",
"\n",
" three_month_mm_total_txn three_month_bill_gmv three_month_bill_total_txn \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" three_month_ticket_gmv three_month_ticket_total_txn \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" three_month_vietlot_gmv three_month_vietlot_total_txn \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" three_month_vinhome_gmv three_month_vinhome_total_txn \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" three_month_giup_viec_gmv three_month_giup_viec_total_txn \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" three_month_insurance_gmv three_month_insurance_total_txn \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" three_month_donation_gmv three_month_donation_total_txn \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" three_month_pay_gmv three_month_pay_total_txn three_month_top_up_gmv \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" three_month_top_up_txn three_month_other_PNLs_gmv six_month_vinmart_gmv \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" six_month_vinmart_total_txn six_month_mm_gmv six_month_mm_total_txn \\\n",
"0 4.000 nan nan \n",
"1 2.000 nan nan \n",
"2 5.000 nan nan \n",
"3 9.000 nan nan \n",
"4 2.000 nan nan \n",
"\n",
" six_month_bill_gmv six_month_bill_total_txn six_month_ticket_gmv \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" six_month_ticket_total_txn six_month_vietlot_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" six_month_vietlot_total_txn six_month_vinhome_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" six_month_vinhome_total_txn six_month_giup_viec_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" six_month_giup_viec_total_txn six_month_insurance_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" six_month_insurance_total_txn six_month_donation_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" six_month_donation_total_txn six_month_pay_gmv six_month_pay_total_txn \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" six_month_top_up_gmv six_month_top_up_txn six_month_other_PNLs_gmv \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" one_year_vinmart_gmv one_year_vinmart_total_txn one_year_mm_gmv \\\n",
"0 nan 4.000 nan \n",
"1 nan 2.000 nan \n",
"2 nan 7.000 nan \n",
"3 nan 9.000 nan \n",
"4 nan 4.000 nan \n",
"\n",
" one_year_mm_total_txn one_year_bill_gmv one_year_bill_total_txn \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" one_year_ticket_gmv one_year_ticket_total_txn one_year_vietlot_gmv \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" one_year_vietlot_total_txn one_year_vinhome_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" one_year_vinhome_total_txn one_year_giup_viec_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" one_year_giup_viec_total_txn one_year_insurance_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" one_year_insurance_total_txn one_year_donation_gmv \\\n",
"0 nan nan \n",
"1 nan nan \n",
"2 nan nan \n",
"3 nan nan \n",
"4 nan nan \n",
"\n",
" one_year_donation_total_txn one_year_pay_gmv one_year_pay_total_txn \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" one_year_top_up_gmv one_year_top_up_txn one_year_other_PNLs_gmv \\\n",
"0 nan nan nan \n",
"1 nan nan nan \n",
"2 nan nan nan \n",
"3 nan nan nan \n",
"4 nan nan nan \n",
"\n",
" one_year_other_PNLs_txn three_month_point six_month_point one_year_point \n",
"0 nan 300.000 300.000 300.000 \n",
"1 nan nan 300.000 300.000 \n",
"2 nan 300.000 300.000 500.000 \n",
"3 nan 700.000 900.000 900.000 \n",
"4 nan nan 500.000 700.000 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['one_year_point'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# utils\n",
"class minMaxScaler(MinMaxScaler):\n",
" def transform(self, X):\n",
" data = super().transform(X)\n",
" return pd.DataFrame(data, columns = X.columns, index = X.index)\n",
"class normalizer(Normalizer):\n",
" def transform(self, X):\n",
" data = super().transform(X)\n",
" return pd.DataFrame(data, columns = X.columns, index = X.index)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean Data"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>churned_or_not_T30</th>\n",
" <th>three_month_total_txn</th>\n",
" <th>six_month_total_txn</th>\n",
" <th>one_year_total_txn</th>\n",
" <th>three_month_gmv</th>\n",
" <th>six_month_gmv</th>\n",
" <th>one_year_gmv</th>\n",
" <th>three_month_point</th>\n",
" <th>six_month_point</th>\n",
" <th>one_year_point</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" <td>4169637.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.710</td>\n",
" <td>9.757</td>\n",
" <td>19.834</td>\n",
" <td>49.164</td>\n",
" <td>1034186.927</td>\n",
" <td>2204088.369</td>\n",
" <td>4783914.052</td>\n",
" <td>1217.495</td>\n",
" <td>2577.613</td>\n",
" <td>11246.178</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.454</td>\n",
" <td>18.648</td>\n",
" <td>35.203</td>\n",
" <td>79.599</td>\n",
" <td>3982159.123</td>\n",
" <td>7628660.591</td>\n",
" <td>20005299.622</td>\n",
" <td>7638.170</td>\n",
" <td>13185.772</td>\n",
" <td>55654.869</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>1.000</td>\n",
" <td>-384500.000</td>\n",
" <td>-1470900.000</td>\n",
" <td>1.000</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>2.000</td>\n",
" <td>7.000</td>\n",
" <td>0.000</td>\n",
" <td>51200.000</td>\n",
" <td>548200.000</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>1000.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.000</td>\n",
" <td>3.000</td>\n",
" <td>7.000</td>\n",
" <td>22.000</td>\n",
" <td>207672.000</td>\n",
" <td>649001.000</td>\n",
" <td>1766582.000</td>\n",
" <td>200.000</td>\n",
" <td>600.000</td>\n",
" <td>3300.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000</td>\n",
" <td>12.000</td>\n",
" <td>24.000</td>\n",
" <td>59.000</td>\n",
" <td>1042732.000</td>\n",
" <td>2297509.000</td>\n",
" <td>5042100.000</td>\n",
" <td>1000.000</td>\n",
" <td>2300.000</td>\n",
" <td>9500.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000</td>\n",
" <td>1701.000</td>\n",
" <td>4952.000</td>\n",
" <td>16267.000</td>\n",
" <td>1957198802.000</td>\n",
" <td>4383585710.000</td>\n",
" <td>16162986677.000</td>\n",
" <td>2500500.000</td>\n",
" <td>7142000.000</td>\n",
" <td>31792000.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" churned_or_not_T30 three_month_total_txn six_month_total_txn \\\n",
"count 4169637.000 4169637.000 4169637.000 \n",
"mean 0.710 9.757 19.834 \n",
"std 0.454 18.648 35.203 \n",
"min 0.000 0.000 0.000 \n",
"25% 0.000 0.000 2.000 \n",
"50% 1.000 3.000 7.000 \n",
"75% 1.000 12.000 24.000 \n",
"max 1.000 1701.000 4952.000 \n",
"\n",
" one_year_total_txn three_month_gmv six_month_gmv one_year_gmv \\\n",
"count 4169637.000 4169637.000 4169637.000 4169637.000 \n",
"mean 49.164 1034186.927 2204088.369 4783914.052 \n",
"std 79.599 3982159.123 7628660.591 20005299.622 \n",
"min 1.000 -384500.000 -1470900.000 1.000 \n",
"25% 7.000 0.000 51200.000 548200.000 \n",
"50% 22.000 207672.000 649001.000 1766582.000 \n",
"75% 59.000 1042732.000 2297509.000 5042100.000 \n",
"max 16267.000 1957198802.000 4383585710.000 16162986677.000 \n",
"\n",
" three_month_point six_month_point one_year_point \n",
"count 4169637.000 4169637.000 4169637.000 \n",
"mean 1217.495 2577.613 11246.178 \n",
"std 7638.170 13185.772 55654.869 \n",
"min 0.000 0.000 0.000 \n",
"25% 0.000 0.000 1000.000 \n",
"50% 200.000 600.000 3300.000 \n",
"75% 1000.000 2300.000 9500.000 \n",
"max 2500500.000 7142000.000 31792000.000 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# clean data\n",
"\n",
"columns = [\n",
"# 'user_id', \n",
" 'churned_or_not_T30',\n",
"# 'churned_or_not_T60',\n",
"# 'churned_or_not_T90',\n",
"# 'used_vinpay_or_not',\n",
"# 'from_card_to_app_or_not',\n",
"# 'promotion_sensitive_score',\n",
" 'three_month_total_txn',\n",
" 'six_month_total_txn',\n",
" 'one_year_total_txn',\n",
" 'three_month_gmv',\n",
" 'six_month_gmv',\n",
" 'one_year_gmv',\n",
" 'three_month_point',\n",
" 'six_month_point',\n",
" 'one_year_point',\n",
"]\n",
"data = df[columns].fillna(0)\n",
"data = data.loc[data['one_year_gmv'] > 0]\n",
"# data = data.loc[data['one_year_point'] > 0]\n",
"# data['one_year_gmv'] = np.where(data['one_year_gmv'] < 0, 0, data['one_year_gmv'])\n",
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['one_year_point'] > 26000].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['one_year_gmv'] > 20000000].shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## GMV Focus"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"sns.displot(data = data, x = 'one_year_gmv', hue = 'from_card_to_app_or_not', kind = 'hist', height = 6, aspect = 2, bins = 50)\n",
"sns.displot(data = data, x = 'one_year_gmv', hue = 'used_vinpay_or_not', kind = 'hist', height = 6, aspect = 2, bins = 50)\n",
"\n",
"plt.ylim(0, 100)\n",
"# plt.title(\"GMV Feature Distribution\", fontsize = 20, fontweight=\"bold\", fontfamily='serif')\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# sns.displot(data = data, x = 'one_year_point', kde = True, kind = 'hist', height = 6, aspect = 2, color = 'r')\n",
"# plt.ylim(0, 100)\n",
"# plt.title(\"Earning Points Feature Distribution\", fontsize = 20, fontweight=\"bold\", fontfamily='serif')\n",
"# plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building Model"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/toandm2/devtools/anaconda3/lib/python3.8/site-packages/sklearn/cluster/_kmeans.py:792: FutureWarning: 'n_jobs' was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n",
" warnings.warn(\"'n_jobs' was deprecated in version 0.23 and will be\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster: ['bronze' 'gold' 'silver' 'platinum']\n"
]
}
],
"source": [
"category_columns = [\n",
" 'Gender'\n",
"]\n",
"\n",
"numeric_columns = [\n",
" 'churned_or_not_T30',\n",
"# 'churned_or_not_T60',\n",
"# 'churned_or_not_T90',\n",
"# 'used_vinpay_or_not', \n",
"# 'from_card_to_app_or_not',\n",
"# 'promotion_sensitive_score',\n",
" 'three_month_total_txn',\n",
" 'six_month_total_txn',\n",
" 'one_year_total_txn',\n",
" 'three_month_gmv',\n",
" 'six_month_gmv',\n",
" 'one_year_gmv',\n",
" 'three_month_point',\n",
" 'six_month_point',\n",
" 'one_year_point',\n",
"]\n",
"\n",
"category_preprocess = Pipeline(steps=[\n",
" ('imputer', simpleImputer(strategy='most_frequent')),\n",
" ('encoder', ce.OneHotEncoder(return_df=True, use_cat_names=True, )),\n",
"# ('normalize', robustScaler()),\n",
"])\n",
"numeric_preprocess = Pipeline(steps = [\n",
" ('imputer', simpleImputer(strategy='constant', fill_value=0)),\n",
"# ('quantile', quantileTransformer()),\n",
" ('normalize_robust', robustScaler()),\n",
" ('normalize', minMaxScaler()),\n",
"])\n",
"\n",
"preprocess = columnTransformer(transformers=[\n",
"# ('category', category_preprocess, category_columns),\n",
" ('numeric', numeric_preprocess, numeric_columns),\n",
"])\n",
"\n",
"\n",
"estimators = [\n",
" KMeans(n_clusters=4, n_jobs = -1, max_iter = 500),\n",
"# SpectralClustering(n_clusters = 4, n_jobs = -1),\n",
"# AgglomerativeClustering(n_clusters = 4),\n",
" Birch(n_clusters = 4),\n",
" GaussianMixture(n_components=4)\n",
" \n",
"]\n",
"pipe = Pipeline(steps = [\n",
" ('preprocess', preprocess),\n",
" ('estimator', KMeans(n_clusters=4, n_jobs = -1, max_iter = 1500))\n",
"])\n",
"# data_transform = pipe[:-1].fit_transform(data)\n",
"\n",
"# results = []\n",
"# print('FIT MODEL')\n",
"# for estimator in tqdm(estimators):\n",
"# pipe.set_params(estimator = estimator)\n",
"# labels = pipe[-1].fit_predict(data_transform)\n",
"# score = silhouette_score(data_transform, labels)\n",
"# print(score, estimator.__class__.__name__)\n",
"# results.append({'model': estimator.__class__.__name__, 'silhouette_score': score})\n",
"\n",
"# pd.DataFrame.from_dict(results).style.background_gradient()\n",
"\n",
"\n",
"\n",
"pipe.set_params(estimator = estimators[0])\n",
"labels = pipe.fit_predict(data)\n",
"data['cluster'] = labels\n",
"cluster = ['bronze', 'silver', 'gold', 'platinum']\n",
"m_cluster = {}\n",
"for i, i_c in enumerate(data.groupby('cluster').mean().sort_values(by = ['one_year_gmv']).index.tolist()):\n",
" m_cluster[i_c] = cluster[i]\n",
"data['cluster'] = data['cluster'].replace(m_cluster)\n",
"\n",
"print('Cluster: ', data['cluster'].unique())"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
"#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col0,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col1,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col2,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col3,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col4,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col5,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col6,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col7,#T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col8{\n",
" background-color: #fff7fb;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col0,#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col1,#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col2{\n",
" background-color: #eae6f1;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col3,#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col4,#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col7{\n",
" background-color: #e8e4f0;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col5{\n",
" background-color: #ebe6f2;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col6{\n",
" background-color: #e7e3f0;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col8{\n",
" background-color: #e9e5f1;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col0{\n",
" background-color: #a1bbda;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col1{\n",
" background-color: #a2bcda;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col2{\n",
" background-color: #a4bcda;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col3{\n",
" background-color: #9ab8d8;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col4{\n",
" background-color: #9ebad9;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col5{\n",
" background-color: #a8bedc;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col6{\n",
" background-color: #93b5d6;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col7{\n",
" background-color: #99b8d8;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col8{\n",
" background-color: #9cb9d9;\n",
" color: #000000;\n",
" }#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col0,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col1,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col2,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col3,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col4,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col5,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col6,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col7,#T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col8{\n",
" background-color: #023858;\n",
" color: #f1f1f1;\n",
" }</style><table id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >three_month_total_txn</th> <th class=\"col_heading level0 col1\" >six_month_total_txn</th> <th class=\"col_heading level0 col2\" >one_year_total_txn</th> <th class=\"col_heading level0 col3\" >three_month_gmv</th> <th class=\"col_heading level0 col4\" >six_month_gmv</th> <th class=\"col_heading level0 col5\" >one_year_gmv</th> <th class=\"col_heading level0 col6\" >three_month_point</th> <th class=\"col_heading level0 col7\" >six_month_point</th> <th class=\"col_heading level0 col8\" >one_year_point</th> </tr> <tr> <th class=\"index_name level0\" >cluster</th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> </tr></thead><tbody>\n",
" <tr>\n",
" <th id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9level0_row0\" class=\"row_heading level0 row0\" >bronze</th>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col0\" class=\"data row0 col0\" >2.701729</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col1\" class=\"data row0 col1\" >6.700923</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col2\" class=\"data row0 col2\" >21.956505</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col3\" class=\"data row0 col3\" >320713.389746</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col4\" class=\"data row0 col4\" >823023.603292</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col5\" class=\"data row0 col5\" >2155472.730108</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col6\" class=\"data row0 col6\" >369.044780</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col7\" class=\"data row0 col7\" >946.690385</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row0_col8\" class=\"data row0 col8\" >5230.532339</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9level0_row1\" class=\"row_heading level0 row1\" >silver</th>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col0\" class=\"data row1 col0\" >22.464050</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col1\" class=\"data row1 col1\" >43.734160</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col2\" class=\"data row1 col2\" >98.941192</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col3\" class=\"data row1 col3\" >2364793.850729</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col4\" class=\"data row1 col4\" >4798737.273323</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col5\" class=\"data row1 col5\" >9631410.476191</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col6\" class=\"data row1 col6\" >2784.201536</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col7\" class=\"data row1 col7\" >5597.072994</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row1_col8\" class=\"data row1 col8\" >22185.260670</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9level0_row2\" class=\"row_heading level0 row2\" >gold</th>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col0\" class=\"data row2 col0\" >60.316530</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col1\" class=\"data row2 col1\" >112.902515</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col2\" class=\"data row2 col2\" >240.704286</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col3\" class=\"data row2 col3\" >6055547.286043</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col4\" class=\"data row2 col4\" >11827199.851573</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col5\" class=\"data row2 col5\" >23038915.954751</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col6\" class=\"data row2 col6\" >7314.702364</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col7\" class=\"data row2 col7\" >14167.331196</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row2_col8\" class=\"data row2 col8\" >54350.761006</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9level0_row3\" class=\"row_heading level0 row3\" >platinum</th>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col0\" class=\"data row3 col0\" >151.306444</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col1\" class=\"data row3 col1\" >283.695739</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col2\" class=\"data row3 col2\" >597.311837</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col3\" class=\"data row3 col3\" >14551871.508601</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col4\" class=\"data row3 col4\" >28525250.009315</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col5\" class=\"data row3 col5\" >58497706.650147</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col6\" class=\"data row3 col6\" >16776.764328</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col7\" class=\"data row3 col7\" >33307.845530</td>\n",
" <td id=\"T_947b6d04_c765_11eb_8050_db1bb70185f9row3_col8\" class=\"data row3 col8\" >128436.034317</td>\n",
" </tr>\n",
" </tbody></table>"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7f97cfb6ee80>"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('cluster').mean().sort_values(by = ['one_year_gmv']).loc[:, numeric_columns].style.background_gradient()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
"#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col0,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col1,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col2,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col3,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col4,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col5,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col6,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col7,#T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col8{\n",
" background-color: #fff7fb;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col0{\n",
" background-color: #e5e1ef;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col1{\n",
" background-color: #e7e3f0;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col2{\n",
" background-color: #e8e4f0;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col3{\n",
" background-color: #e1dfed;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col4,#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col7{\n",
" background-color: #e3e0ee;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col5,#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col8{\n",
" background-color: #e4e1ef;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col6{\n",
" background-color: #e0deed;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col0{\n",
" background-color: #96b6d7;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col1{\n",
" background-color: #99b8d8;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col2{\n",
" background-color: #9ebad9;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col3{\n",
" background-color: #80aed2;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col4{\n",
" background-color: #84b0d3;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col5{\n",
" background-color: #88b1d4;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col6{\n",
" background-color: #7dacd1;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col7{\n",
" background-color: #81aed2;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col8{\n",
" background-color: #86b0d3;\n",
" color: #000000;\n",
" }#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col0,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col1,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col2,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col3,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col4,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col5,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col6,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col7,#T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col8{\n",
" background-color: #023858;\n",
" color: #f1f1f1;\n",
" }</style><table id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >three_month_total_txn</th> <th class=\"col_heading level0 col1\" >six_month_total_txn</th> <th class=\"col_heading level0 col2\" >one_year_total_txn</th> <th class=\"col_heading level0 col3\" >three_month_gmv</th> <th class=\"col_heading level0 col4\" >six_month_gmv</th> <th class=\"col_heading level0 col5\" >one_year_gmv</th> <th class=\"col_heading level0 col6\" >three_month_point</th> <th class=\"col_heading level0 col7\" >six_month_point</th> <th class=\"col_heading level0 col8\" >one_year_point</th> </tr> <tr> <th class=\"index_name level0\" >cluster</th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> </tr></thead><tbody>\n",
" <tr>\n",
" <th id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9level0_row0\" class=\"row_heading level0 row0\" >bronze</th>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col0\" class=\"data row0 col0\" >0</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col1\" class=\"data row0 col1\" >4</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col2\" class=\"data row0 col2\" >13</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col3\" class=\"data row0 col3\" >0.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col4\" class=\"data row0 col4\" >306250.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col5\" class=\"data row0 col5\" >1067744.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col6\" class=\"data row0 col6\" >0.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col7\" class=\"data row0 col7\" >300.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row0_col8\" class=\"data row0 col8\" >2000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9level0_row1\" class=\"row_heading level0 row1\" >silver</th>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col0\" class=\"data row1 col0\" >21</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col1\" class=\"data row1 col1\" >40</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col2\" class=\"data row1 col2\" >88</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col3\" class=\"data row1 col3\" >1709988.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col4\" class=\"data row1 col4\" >3470049.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col5\" class=\"data row1 col5\" >7017044.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col6\" class=\"data row1 col6\" >1700.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col7\" class=\"data row1 col7\" >3400.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row1_col8\" class=\"data row1 col8\" >12400.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9level0_row2\" class=\"row_heading level0 row2\" >gold</th>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col0\" class=\"data row2 col0\" >56</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col1\" class=\"data row2 col1\" >105</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col2\" class=\"data row2 col2\" >224</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col3\" class=\"data row2 col3\" >4633638.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col4\" class=\"data row2 col4\" >9040240.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col5\" class=\"data row2 col5\" >17637989.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col6\" class=\"data row2 col6\" >4600.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col7\" class=\"data row2 col7\" >8948.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row2_col8\" class=\"data row2 col8\" >31300.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9level0_row3\" class=\"row_heading level0 row3\" >platinum</th>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col0\" class=\"data row3 col0\" >134</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col1\" class=\"data row3 col1\" >252</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col2\" class=\"data row3 col2\" >544</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col3\" class=\"data row3 col3\" >9829252.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col4\" class=\"data row3 col4\" >19403568.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col5\" class=\"data row3 col5\" >37672558.500000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col6\" class=\"data row3 col6\" >9600.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col7\" class=\"data row3 col7\" >18900.000000</td>\n",
" <td id=\"T_94d1f502_c765_11eb_8050_db1bb70185f9row3_col8\" class=\"data row3 col8\" >66565.000000</td>\n",
" </tr>\n",
" </tbody></table>"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x7f97cfb8e040>"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('cluster').median().sort_values(by = ['one_year_gmv']).loc[:, numeric_columns].style.background_gradient()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data['cluster'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['one_year_gmv'] > 92313835.221980].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipe[:-1].fit_transform(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Finding Optimal Number of Cluster"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Elbow "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from yellowbrick.cluster import KElbowVisualizer\n",
"\n",
"visualizer = KElbowVisualizer(pipe[-1], k=(2,9), metric = 'distortion')\n",
"\n",
"visualizer.fit(pipe[:-1].fit_transform(data))\n",
"visualizer.show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from yellowbrick.cluster import SilhouetteVisualizer\n",
"pipe.set_params(estimator__n_clusters = 4)\n",
"visualizer = SilhouetteVisualizer(pipe[-1], colors='yellowbrick')\n",
"visualizer.fit(pipe[:-1].fit_transform(data))\n",
"visualizer.show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from yellowbrick.cluster import InterclusterDistance\n",
"\n",
"pipe.set_params(estimator__n_clusters = 4)\n",
"visualizer = InterclusterDistance(pipe[-1], colors='yellowbrick')\n",
"visualizer.fit(pipe[:-1].fit_transform(data))\n",
"visualizer.show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['one_year_point'] > 7900.0].shape"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1508122.0, 4676472.0, 14285082.5, 2900.0, 8000.0, 25400.0)"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_cluster_bronze = data.loc[data.cluster == 'bronze']\n",
"data_cluster_silver = data.loc[data.cluster == 'silver']\n",
"data_cluster_gold = data.loc[data.cluster == 'gold']\n",
"data_cluster_platinum = data.loc[data.cluster == 'platinum']\n",
"\n",
"def intersect(A, B, column = 'one_year_gmv'):\n",
" if B[column].min() >= A[column].max():\n",
" a2b = (B[column].min() - A[column].max()) / 2.0\n",
" return a2b\n",
" \n",
" a2b = A.loc[A[column] > B[column].min(), column].tolist()\n",
" a2b += B.loc[B[column] < A[column].max(), column].tolist()\n",
" return np.median(a2b)\n",
"xx_gmv = intersect(data_cluster_bronze, data_cluster_silver)\n",
"yy_gmv = intersect(data_cluster_silver, data_cluster_gold)\n",
"zz_gmv = intersect(data_cluster_gold, data_cluster_platinum)\n",
"\n",
"\n",
"xx_point = intersect(data_cluster_bronze, data_cluster_silver, column = 'one_year_point')\n",
"yy_point = intersect(data_cluster_silver, data_cluster_gold, column = 'one_year_point')\n",
"zz_point = intersect(data_cluster_gold, data_cluster_platinum, column = 'one_year_point')\n",
"\n",
"xx_gmv, yy_gmv, zz_gmv, xx_point, yy_point, zz_point"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['one_year_gmv'] > 18972695.0].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[(8337037.0<data['one_year_gmv'])&(data['one_year_gmv'] < 18972695.0)].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data.loc[(1618057.0<data['one_year_gmv'])&(data['one_year_gmv'] < 8337037.0)].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from yellowbrick.features import PCA, Manifold\n",
"\n",
"\n",
"visualizer = PCA(scale=True)\n",
"visualizer = Manifold(manifold=\"tsne\")\n",
"\n",
"visualizer.fit_transform(data_transform, labels)\n",
"visualizer.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment