Skip to content

Instantly share code, notes, and snippets.

@achad4
Created July 12, 2021 17:46
Show Gist options
  • Save achad4/6c888bf98914c54ea5e7c126a994dc66 to your computer and use it in GitHub Desktop.
Save achad4/6c888bf98914c54ea5e7c126a994dc66 to your computer and use it in GitHub Desktop.
now = datetime.now()
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
start = start + timedelta(days=31)
stop = start + timedelta(days=30 * 36)
synth_certs = pd.DataFrame()
for dt in rrule.rrule(rrule.MONTHLY, dtstart=start, until=stop):
#THE SEASONALITY CURVE IS MEASURED (in SQL) DIRECTLY FROM OUR DATA
cohort_size = (
cert_ramp_seasonality_df[
(cert_ramp_seasonality_df.month_num == dt.month) ]
.fillna(0)
.iloc[0]["total_new_certs_per_carrier"]
.astype(int)
)
#NEW SALES EXPECTATIONS DRIVEN BY BUSINESS STAKEHOLDERS
running_new_launched_units += new_units_per_month
ramp_perc = (
cert_ramp_seasonality_df[
(cert_ramp_seasonality_df.month_num == dt.month)
]
.fillna(0)
.iloc[0]["avg_new_cert_perc"]
)
cohort_size += int(ramp_perc * running_new_launched_units)
#ONCE WE KNOW HOW MANY LEASES WE EXPECT LET'S SAMPLE FROM THE HISTORICAL DATA AND SIMULATE THEIR LIFECYCLES
cohort = (
lease_df
.sample(cohort_size, replace=True)
.reset_index()
)
cohort.move_in_date = dt
surv_samples = trained_survival_curve_for_propert.sample(cohort_size)
surv_samples["offsets"] = surv_samples.months_into_term.apply(
lambda x: pd.offsets.DateOffset(months=x)
)
cohort.move_out_date = (
cohort.move_in_date + surv_samples["offsets"]
)
#THIS DATAFRAME WILL BECOME OUR “SYNTHETIC LEASE” DIMENSION
synth_certs = pd.concat([synth_certs] + [cohort])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment