"def label_customer(customer_id,logins, prediction_date, churn_days, return_trans = False):\n",
" \"\"\"\n",
" Make label times for a single customer. Returns a dataframe of labels with times, the binary label, \n",
" and the number of days until the next churn.\n",
" \n",
" Params\n",
" --------\n",
" customer_id (str): unique id for the customer\n",
" customer_logs (dataframe): logs dataframe for the customer\n",
" prediction_date (str): time at which predictions are made. Either \"MS\" for the first of the month\n",
" or \"SMS\" for the first and fifteenth of each month \n",
" churn_days (int): integer number of days without an active membership required for a churn. A churn is\n",
" defined by exceeding this number of 5 weeks without being an active player.\n",
" lead_time (int): number of periods in advance to make predictions for. 3 weeks\n",
" prediction_window(int): number of periods over which to consider churn. 5 weeks\n",
" return_trans (boolean): whether or not to return the transactions for analysis. Defaults to False.\n",
" \n",
" Return\n",
" --------\n",
" label_times (dataframe): a table of customer id, the cutoff times at the specified frequency, the \n",
" label for each cutoff time, the number of days until the next churn for each\n",
" cutoff time, and the date on which the churn itself occurred.\n",
" \n",
" \n",
" \"\"\"\n",
" \n",
" assert(prediction_date in ['MS', 'SMS']), \"Prediction day must be either 'MS' or 'SMS'\"\n",
" assert(logins['actor_account_id'].unique() == [customer_id]), \"Transactions must be for only customer\"\n",
" \n",
" # Don't modify original\n",
" logs = logins.copy()\n",
" \n",
" #Range for cutoff times is from first to last log\n",
" first_log = logs['min_time']\n",
" last_log = logs['max_time']\n",
" start_date = first_log\n",
" # pd.datetime(first_log.month,\n",
" \n",
" # Find number of days between last log and cutoff\n",
" \n",
" logs['difference_days'] = logs['cutof']- logs['max_time']\n",
" # Determine which actor are associated with a churn\n",
" logs['churn'] = logs['difference_days'].astype('timedelta64[D]') > churn_days\n",
" logs['last_day_active']=last_log\n",
" \n",
" # Find date of each churn\n",
" logs.loc[logs['churn'] == True, \n",
" 'churn_date'] = logs.loc[logs['churn'] == True, \n",
" 'last_day_active'] + pd.Timedelta(churn_days, 'd')\n",
" \n",
" #time labels\n",
" label_times=pd.DataFrame({'actor_account_id':[customer_id]})\n",
" label_times.insert(1, \"cutoff_time\", \"05-11-2016\")\n",
" label_times['cutoff_time'] = label_times['cutoff_time'].astype('datetime64[ns]')\n",
" lead_time=21\n",
" prediction_window=35\n",
" # Use the lead time and prediction window parameters to establish the prediction window \n",
" # Prediction window is for each cutoff time\n",
" label_times['prediction_window_start'] = label_times['cutoff_time'] + pd.Timedelta(lead_time, 'd')\n",
" label_times['prediction_window_end'] = label_times['cutoff_time'] + pd.Timedelta(lead_time, 'd') + pd.Timedelta(prediction_window, 'd')\n",
" \n",
" previous_churn_date = None\n",
" \n",
" #when no churn\n",
" if (logs['churn']==False).all():\n",
" label_times['label']=0\n",
" label_times['days_to_churn']=np.nan\n",
" label_times['churn_date']=np.nan\n",
" if return_trans:\n",
" return label_times[['actor_account_id', 'cutoff_time', 'label', 'days_to_churn', 'churn_date']], logs\n",
" return label_times[['actor_account_id', 'cutoff_time', 'label', 'days_to_churn', 'churn_date']]\n",
" # Iterate through every cutoff time\n",
" for i, row in label_times.iterrows():\n",
" \n",
" # Default values if unknown\n",
" churn_date = pd.NaT\n",
" label = np.nan\n",
" # Find the window start and end\n",
" window_start = row['prediction_window_start']\n",
" window_end = row['prediction_window_end']\n",
" # Determine if there were any churns during the prediction window\n",
" churns = logs.loc[(logs['churn_date'] >= window_start) & \n",
" (logs['churn_date'] < window_end), 'churn_date']\n",
" # Positive label if there was a churn during window\n",
" if not churns.empty:\n",
" label = 1\n",
" churn_date = churns.values[0]\n",
" # Find number of days until next churn by \n",
" # subsetting to cutoff times before current churn and after previous churns\n",
" if not previous_churn_date:\n",
" before_idx = label_times.loc[(label_times['cutoff_time'] <= churn_date)].index\n",
" else:\n",
" before_idx = label_times.loc[(label_times['cutoff_time'] <= churn_date) & \n",
" (label_times['cutoff_time'] > previous_churn_date)].index\n",
" # Calculate days to next churn for cutoff times before current churn\n",
" label_times.loc[before_idx, 'days_to_churn'] = (churn_date - label_times.loc[before_idx, \n",
" 'cutoff_time']).\\\n",
" dt.total_seconds() / (3600 * 24)\n",
" previous_churn_date = churn_date\n",
" # No churns, but need to determine if an active member\n",
" else:\n",
" # Find transactions before the end of the window that were not cancelled ### here the carracter deleted\n",
" logs_before = logs.loc[(logs['time'] < window_end)].copy()\n",
" # If the membership expiration date for this membership is after the window start, the custom has not churned\n",
" if np.any(logs_before['time'] >= window_start):\n",
" label = 0\n",
" # Assign values\n",
" label_times.loc[i, 'label'] = label\n",
" label_times.loc[i, 'churn_date'] = churn_date\n",
" \n",
" # Handle case with no churns\n",
" if not np.any(label_times['label'] == 1):\n",
" label_times['days_to_churn'] = np.nan\n",
" label_times['churn_date'] = pd.NaT\n",
" \n",
" if return_trans:\n",
" return label_times.drop(columns = ['actor_account_id']), logs\n",
" \n",
" return label_times[['actor_account_id', 'cutoff_time', 'label', 'days_to_churn', 'churn_date']].copy()"
