Skip to content

Instantly share code, notes, and snippets.

@janzill
Created July 15, 2022 02:39
Show Gist options
  • Save janzill/a961da02ac80cd961fe4688ab1398e25 to your computer and use it in GitHub Desktop.
Save janzill/a961da02ac80cd961fe4688ab1398e25 to your computer and use it in GitHub Desktop.
larch.numba idce problems
import larch
import numpy as np
import pandas as pd
import larch.numba as lx
from larch import P, X
# larch example data
hh, pp, tour, skims, emp = lx.example(200, ['hh', 'pp', 'tour', 'skims', 'emp'])
tour = tour.drop(columns=["TOURMODE", "TOURPURP", "N_STOPS", "N_TRIPS", "N_TRIPS_HBW", "N_TRIPS_HBO", "N_TRIPS_NHB"])
tour = tour.merge(hh[["HHID", "HOMETAZ"]], on="HHID")
observations = tour[["TOURID", "DTAZ", "HOMETAZ"]].copy()
observations.TOURID += 1
# turn idco tours into idca and attach distance and employment
distance = pd.DataFrame(
np.array(skims['AUTO_DIST'])
).rename_axis(index='otaz').unstack().reset_index().rename(columns={"level_0": "dtaz", 0: "distance"})
distance.dtaz += 1
distance.otaz += 1
obs_ca = pd.merge(
emp[["TOTAL_EMP"]].reset_index(),
observations,
how="cross"
)
assert observations.shape[0] * emp.shape[0] == obs_ca.shape[0]
# for each O: grab all d, merge on TAZ
obs_ca = obs_ca.merge(distance.rename(columns={"otaz": "HOMETAZ", "dtaz": "TAZ"}), on=["HOMETAZ", "TAZ"], how="left")
# identify chosen alternative
obs_ca["chosen"] = (obs_ca.DTAZ == obs_ca.TAZ).astype('int')
obs_ca = obs_ca.set_index(["TOURID", "TAZ"])
# technically not needed since all are available
obs_ca['avail'] = np.where((obs_ca.distance > 0) & (obs_ca.distance < np.inf) & (obs_ca.TOTAL_EMP > 0), 1, 0)
# Sample x% to produce idce data
frac_to_keep = 0.9
obs_idce = pd.concat([
obs_ca.loc[obs_ca.chosen == 0].sample(frac=frac_to_keep, replace=False, random_state=1),
obs_ca.loc[obs_ca.chosen == 1]
]).sort_index()
# Larch model
tree = lx.DataTree(
obs=lx.Dataset.construct.from_idce(obs_idce, crack=False),
#obs=lx.Dataset.construct.from_idca(obs_ca, crack=False), # idca for comparison - this works
)
m = lx.Model(datatree=tree)
m.choice_ca_var = "chosen"
m.quantity_ca = P.emp_p * X('TOTAL_EMP')
m.quantity_scale = P.Theta
m.utility_ca = P.distance * X.distance
#m.availability_ca_var = '(distance > 0) & (distance < np.inf) & (TOTAL_EMP > 0)'
m.availability_var = 'avail'
m.lock_values(emp_p=0, Theta=1)
m.set_cap(10)
ll_init = m.loglike()
print(f"init loglike = {ll_init}")
m.maximize_loglike()
print(f"final loglike = {m.loglike()}")
print(m.pfo())
# m.calculate_parameter_covariance()
# m.parameter_summary()
# compare this to larch cython:
print("\n### pre numba larch ###")
d = larch.DataFrames(ce=obs_idce, ch='chosen', crack=False, av_name='avail')
m = larch.Model(dataservice=d)
m.quantity_ca = P.emp_p * X('TOTAL_EMP')
m.quantity_scale = P.Theta
m.utility_ca = P.distance * X.distance
m.lock_values(emp_p=0, Theta=1)
m.set_cap(10)
m.load_data()
ll_init = m.loglike()
print(f"init loglike = {ll_init}")
m.maximize_loglike()
print(f"final loglike = {m.loglike()}")
print(m.pfo())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment