-
-
Save janzill/a961da02ac80cd961fe4688ab1398e25 to your computer and use it in GitHub Desktop.
larch.numba idce problems
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import larch | |
import numpy as np | |
import pandas as pd | |
import larch.numba as lx | |
from larch import P, X | |
# larch example data | |
hh, pp, tour, skims, emp = lx.example(200, ['hh', 'pp', 'tour', 'skims', 'emp']) | |
tour = tour.drop(columns=["TOURMODE", "TOURPURP", "N_STOPS", "N_TRIPS", "N_TRIPS_HBW", "N_TRIPS_HBO", "N_TRIPS_NHB"]) | |
tour = tour.merge(hh[["HHID", "HOMETAZ"]], on="HHID") | |
observations = tour[["TOURID", "DTAZ", "HOMETAZ"]].copy() | |
observations.TOURID += 1 | |
# turn idco tours into idca and attach distance and employment | |
distance = pd.DataFrame( | |
np.array(skims['AUTO_DIST']) | |
).rename_axis(index='otaz').unstack().reset_index().rename(columns={"level_0": "dtaz", 0: "distance"}) | |
distance.dtaz += 1 | |
distance.otaz += 1 | |
obs_ca = pd.merge( | |
emp[["TOTAL_EMP"]].reset_index(), | |
observations, | |
how="cross" | |
) | |
assert observations.shape[0] * emp.shape[0] == obs_ca.shape[0] | |
# for each O: grab all d, merge on TAZ | |
obs_ca = obs_ca.merge(distance.rename(columns={"otaz": "HOMETAZ", "dtaz": "TAZ"}), on=["HOMETAZ", "TAZ"], how="left") | |
# identify chosen alternative | |
obs_ca["chosen"] = (obs_ca.DTAZ == obs_ca.TAZ).astype('int') | |
obs_ca = obs_ca.set_index(["TOURID", "TAZ"]) | |
# technically not needed since all are available | |
obs_ca['avail'] = np.where((obs_ca.distance > 0) & (obs_ca.distance < np.inf) & (obs_ca.TOTAL_EMP > 0), 1, 0) | |
# Sample x% to produce idce data | |
frac_to_keep = 0.9 | |
obs_idce = pd.concat([ | |
obs_ca.loc[obs_ca.chosen == 0].sample(frac=frac_to_keep, replace=False, random_state=1), | |
obs_ca.loc[obs_ca.chosen == 1] | |
]).sort_index() | |
# Larch model | |
tree = lx.DataTree( | |
obs=lx.Dataset.construct.from_idce(obs_idce, crack=False), | |
#obs=lx.Dataset.construct.from_idca(obs_ca, crack=False), # idca for comparison - this works | |
) | |
m = lx.Model(datatree=tree) | |
m.choice_ca_var = "chosen" | |
m.quantity_ca = P.emp_p * X('TOTAL_EMP') | |
m.quantity_scale = P.Theta | |
m.utility_ca = P.distance * X.distance | |
#m.availability_ca_var = '(distance > 0) & (distance < np.inf) & (TOTAL_EMP > 0)' | |
m.availability_var = 'avail' | |
m.lock_values(emp_p=0, Theta=1) | |
m.set_cap(10) | |
ll_init = m.loglike() | |
print(f"init loglike = {ll_init}") | |
m.maximize_loglike() | |
print(f"final loglike = {m.loglike()}") | |
print(m.pfo()) | |
# m.calculate_parameter_covariance() | |
# m.parameter_summary() | |
# compare this to larch cython: | |
print("\n### pre numba larch ###") | |
d = larch.DataFrames(ce=obs_idce, ch='chosen', crack=False, av_name='avail') | |
m = larch.Model(dataservice=d) | |
m.quantity_ca = P.emp_p * X('TOTAL_EMP') | |
m.quantity_scale = P.Theta | |
m.utility_ca = P.distance * X.distance | |
m.lock_values(emp_p=0, Theta=1) | |
m.set_cap(10) | |
m.load_data() | |
ll_init = m.loglike() | |
print(f"init loglike = {ll_init}") | |
m.maximize_loglike() | |
print(f"final loglike = {m.loglike()}") | |
print(m.pfo()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment