Skip to content

Instantly share code, notes, and snippets.

@michellemho
Last active September 19, 2018 18:59
Show Gist options
  • Save michellemho/82f1b0300bcc2d3702db154908524151 to your computer and use it in GitHub Desktop.
Save michellemho/82f1b0300bcc2d3702db154908524151 to your computer and use it in GitHub Desktop.
import numpy as np
from datetime import datetime, timedelta
start_date=datetime(2017,1,1)
end_date=datetime.today()
spring_peak = datetime(2017,4,5)
fall_peak = datetime(2017,9,15)
num_visits = 10000
def hour_prob(h):
# 10 AM and 16 PM are peak
hour_prob = np.exp(-(h - 10)**2/(2*4)) + 0.7*np.exp(-(h - 16)**2/(2*4)) + 0.05
return hour_prob
def daily_prob(d, start_date=start_date, end_date=end_date, spring_peak=spring_peak, fall_peak=fall_peak):
date = start_date + timedelta(days=d)
date_range_domain = (end_date - start_date).days
if date.weekday() > 5: # weekends
return 0.1
else:
days_to_spring_peak = spring_peak.timetuple().tm_yday - start_date.timetuple().tm_yday
days_to_fall_peak = fall_peak.timetuple().tm_yday - start_date.timetuple().tm_yday
eq_components = {}
for peak in [days_to_spring_peak, days_to_fall_peak]:
counter = peak
year = 0
while counter < date_range_domain:
equation = np.exp(-(d - (365*year + peak))**2/(2*5000))
eq_components[counter+peak] = equation
counter += 365
year += 1
return sum(eq_components.values(),0.05)
def calc_cpd(prob, domain):
norm_prob = [prob(a) for a in range(domain)] / np.sum([prob(a) for a in range(domain)])
cpd = np.cumsum(norm_prob)
return cpd
def sample_cpd(cpd, n_samples):
samples = np.random.uniform(size=n_samples)
return np.digitize(samples,cpd)
n_samples = num_visits
hours = sample_cpd(calc_cpd(hour_prob,24), n_samples)
days = sample_cpd(calc_cpd(daily_prob,(end_date - start_date).days), n_samples)
visit_dates = [start_date + timedelta(hours = int(h)) + timedelta(days = int(d)) for h, d in list(zip(hours, days))]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment