Created
June 18, 2019 14:24
-
-
Save DanielaLaura/e507285ab1a3c9670e0c2d8afd1b4ebf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def make_label_times(logs, prediction_date, churn_days):\n", | |
" \"\"\"\n", | |
" Make labels for an entire series of transactions. \n", | |
" \n", | |
" Params\n", | |
" --------\n", | |
" transactions (dataframe): table of customer transactions\n", | |
" prediction_date (str): time at which predictions are made. Either \"MS\" for the first of the month\n", | |
" or \"SMS\" for the first and fifteenth of each month \n", | |
" churn_days (int): integer number of days without an active membership required for a churn. A churn is\n", | |
" defined by exceeding this number of days without an active membership.\n", | |
" lead_time (int): number of periods in advance to make predictions for. Defaults to 1 (preditions for one offset)\n", | |
" prediction_window(int): number of periods over which to consider churn. Defaults to 1.\n", | |
" Return\n", | |
" --------\n", | |
" label_times (dataframe): a table with customer ids, cutoff times, binary label, regression label, \n", | |
" and date of churn. This table can then be used for feature engineering.\n", | |
" \"\"\"\n", | |
" \n", | |
" label_times = []\n", | |
" logs = logs.sort_values(['actor_account_id'])\n", | |
" \n", | |
" # Iterate through each customer and find labels\n", | |
" for customer_id, logins in logs.groupby('actor_account_id'):\n", | |
" lt_cust = label_customer(customer_id, logins, prediction_date, churn_days, \n", | |
" )\n", | |
" \n", | |
" label_times.append(lt_cust)\n", | |
" \n", | |
" # Concatenate into a single dataframe\n", | |
" return pd.concat(label_times)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment