Last active
June 10, 2024 19:00
-
-
Save peter-herman/faeea8ec032c4c2c13bcbc9c400cca9b to your computer and use it in GitHub Desktop.
OneSectroGE Example for "gegravity: General Equilibrium Gravity Modeling in Python"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__Author__ = "Peter Herman" | |
__Project__ = "gegravity" | |
__Created__ = "March 01, 2021" | |
__Updated__ = "June 06, 2024" | |
__Description__ = """A demonstration of the OneSectorGE model and module""" | |
# --- | |
# Load Packages | |
# --- | |
# Note: The import statements are set to run from the github repository. To run the example code using the packaged | |
# version of gegravity, remove "src." from the gegravity import statement. | |
import gegravity as ge | |
import pandas as pd | |
pd.set_option("display.max_columns", None) | |
pd.set_option('display.width', 1000) | |
# ---- | |
# Load some gravity data (data available on github: https://gist.github.com/peter-herman/13b056e52105008c53faa482db67ed4a) | |
# ---- | |
gravity_data_location = "examples/sample_data_set.dlm" | |
grav_data = pd.read_csv(gravity_data_location) | |
print(grav_data.head()) | |
''' | |
exporter importer year ... common_language lndist international | |
0 GBR AUS 2006 ... 1 9.7126 1 | |
1 FIN AUS 2006 ... 0 9.5997 1 | |
2 USA AUS 2006 ... 1 9.5963 1 | |
3 IND AUS 2006 ... 1 9.1455 1 | |
4 SGP AUS 2006 ... 1 8.6732 1 | |
''' | |
# Add a constant to the data | |
grav_data['constant'] = 1 | |
# ---- | |
# Prepare data and parameter inputs for GE Model | |
# ---- | |
# The model requires two main types of inputs: baseline data and cost parameters. Data frames contianing both sets of | |
# information are used to create two special objects in the gegravity package, which organize the data and ensure that | |
# all needed pieces can be consistently found and drawn on while solving the model. Creating the objects largely | |
# requires identifying the columns in which certain specific pieces of information can be found | |
# 1. Baseline data: | |
# Create a new BaselineData object to manage the input trade, output, expenditure, and cost variable data for the | |
# GE model. | |
baseline = ge.BaselineData(grav_data, | |
imp_var_name='importer', # Columns with importer identifiers | |
exp_var_name='exporter', # Column with exporter identifiers | |
year_var_name='year', # Column with year (time) identifier | |
trade_var_name='trade', # Column with trade values | |
expend_var_name='E', # Column with importer total expenditure values | |
output_var_name='Y') # Column with exporter total output values | |
# 2. Cost parameters: | |
# Define the parameters that will be used to construct trade costs. These values were estimated separately via PPML. | |
# These estimates were produced with Stata via the following code, although any appropriate software could be used to | |
# derive these values. | |
''' | |
import delimited "D:\work\Peter_Herman\projects\gegravity\examples\sample_data_set.dlm" | |
encode importer, gen(imp_fe) | |
encode exporter, gen(exp_fe) | |
ppmlhdfe trade lndist contiguity common_language pta international, absorb(i.exp_fe i.imp_fe) | |
''' | |
# Create DataFrame of coefficient estimates and standard errors. Variable names ('var') should match the column | |
# names in the baseline data above. Standard errors are used in the MonteCarloGE model but are not used by OneSectorGE | |
# model (but can still be provided) | |
ests = [ | |
# 'var', 'beta', 'stderr' | |
( 'lndist', -0.3898623, 0.0729915), | |
( 'contiguity', 0.891577, 0.1327354), | |
('common_language', 0.0326249, 0.0840702), | |
( 'pta', 0.4711383, 0.1076578), | |
( 'international', -3.412584, 0.2151235), | |
( 'constant', 16.32434, 0.4844137)] | |
ests = pd.DataFrame(ests, columns = ['var', 'beta', 'stderr']) | |
# Use the ests dataframe to define a CostCoeff object for the gegravity model, specifying the columns containing the variable identifiers, coefficient estimates, and standard errors | |
cost_params = ge.CostCoeffs(estimates = ests, # Dataframe with estimate values | |
identifier_col ='var', # Column with variable identifier | |
coeff_col ='beta', # Column with estimated coefficient values | |
stderr_col ='stderr') # Column with estimated standard errors | |
# ---- | |
# Define a GE model and calibrate the baseline version of the model | |
# ---- | |
# Define OneSectorGE model | |
ge_model = ge.OneSectorGE(baseline = baseline, # BaselineData input | |
cost_coeff_values = cost_params, # CostCoeff input | |
cost_variables = ['lndist', 'contiguity', # Variables to use to construct trade costs | |
'common_language', 'pta', | |
'international', 'constant'], | |
year = "2006", # Year to use for model | |
reference_importer = "DEU", # Reference importer to use (normalizes IMRs to DEU's IMR) | |
sigma = 5) # Elasticity of substitution | |
# The scaling of the outward multilateral resistance term can potentially cause issues when solving the model. Rescaling | |
# the terms can mitigate this numerical issue. The following method helps identify rescale factors that are likely to | |
# result in the model being solveable. | |
potential_factors = ge_model.check_omr_rescale(omr_rescale_range=4) | |
print(potential_factors) | |
''' | |
omr_rescale omr_rescale (alt format) solved message max_func_value mean_func_value reference_importer_omr | |
0 0.0001 10^-4 True The solution converged. 1.706264e-08 -2.081753e-10 0.050346 | |
1 0.0010 10^-3 True The solution converged. 9.829577e-10 5.869294e-11 0.050346 | |
2 0.0100 10^-2 True The solution converged. 1.566544e-09 1.595307e-10 0.050346 | |
3 0.1000 10^-1 True The solution converged. 3.543053e-08 -1.622280e-11 0.050346 | |
4 1.0000 10^0 True The solution converged. 1.663875e-09 -1.471645e-11 0.050346 | |
5 10.0000 10^1 True The solution converged. 1.648595e-09 1.652183e-10 0.050346 | |
6 100.0000 10^2 True The solution converged. 1.260880e-12 6.217249e-15 0.050346 | |
7 1000.0000 10^3 False The iteration is not making good progress, as ... 4.666270e-01 -3.827271e-02 0.053669 | |
8 10000.0000 10^4 False The iteration is not making good progress, as ... 4.664453e-01 -3.825787e-02 0.053667 | |
''' | |
# It looks like rescale factors between 0.0001 and 100 all yield a solveable model, produce similar solutions to the | |
# baseline model (reference importer OMR = 0.050346), and result in function values that are consistently close to zero, | |
# which are all good signs. Going forward, we'll use a factor of 1, which is the default (i.e. OMRs will not be | |
# rescaled) | |
# Build baseline (solves for baseline multilateral resistance trms and calibrates other model parameters) | |
ge_model.build_baseline(omr_rescale = 1) | |
''' | |
Solving for baseline MRs... | |
The solution converged. | |
''' | |
# Examine the solutions for the baseline multilateral resistances | |
print(ge_model.baseline_mr.head(12)) | |
# --- | |
# Define counterfactual experiment | |
# --- | |
# Create a copy of the baseline data | |
exp_data = ge_model.baseline_data.copy() | |
# Modify the copied data to reflect a counterfactual experiment in which Canada (CAN) and Japan (JPN) sign a | |
# preferential trade agreement (pta) by setting the 'pta' variable to one for these countries | |
exp_data.loc[(exp_data["importer"] == "CAN") & (exp_data["exporter"] == "JPN"), "pta"] = 1 | |
exp_data.loc[(exp_data["importer"] == "JPN") & (exp_data["exporter"] == "CAN"), "pta"] = 1 | |
# Define the model experiment by inputting this data into the GE model | |
ge_model.define_experiment(exp_data) | |
# Counterfactual experiment trade costs are constructed when defining the experiment and can be examined. Trade costs | |
# in the GE model are represented similarly to trade costs estimated in the econometric model (trade = exp(BX), where | |
# BX is the trade cost estimate as captured by the model covariates). As a result, the cost values are generally | |
# positive and higher values imply more trade. | |
print(ge_model.bilateral_costs.head()) | |
# Check the costs of Canadian exports to Japan | |
print(ge_model.bilateral_costs.loc[('CAN','JPN'),:]) | |
''' | |
baseline trade cost 11305.165421 | |
experiment trade cost 18108.800547 | |
trade cost change (%) 60.181650 | |
Name: (CAN, JPN), dtype: float64 | |
''' | |
## | |
# Simulate the counterfactual model | |
## | |
# Finally, we can simulate the effects of the counterfactual experiment | |
ge_model.simulate() | |
# ----- | |
# Access Results | |
# ----- | |
## | |
# Retrieve many of the different sets of model results | |
## | |
# A collection of many of the key country-level results (prices, total imports/exports, GDP, welfare, etc.) | |
country_results = ge_model.country_results | |
# The bilateral trade results | |
bilateral_results = ge_model.bilateral_trade_results | |
# A wider selection of aggregate, country-level trade results | |
agg_trade = ge_model.aggregate_trade_results | |
# country multilateral resistance (MR) terms | |
mr_terms = ge_model.country_mr_terms | |
# Get the solver diaganoistics, which is a dictionary containing many types of solver diagnostic info | |
solver_diagnostics = ge_model.solver_diagnostics | |
# ---- | |
# Export results | |
# ---- | |
# Export the results to a collection of spreadsheet (.csv) files and add trade values in levels to the outputs. | |
ge_model.export_results(directory="examples//",name="CAN_JPN_PTA_experiment", include_levels = True) | |
# It is also possible to add alternative country identifies such as full country names using the country_names argument. | |
# See the technical documentation for details | |
# ------- | |
# Post Estimation Analysis | |
# ------- | |
# Examine how the counterfactual experiment affected Canadian imports from NAFTA members USA and Mexico | |
nafta_share = ge_model.trade_share(importers = ['CAN'],exporters = ['USA','MEX']) | |
print(nafta_share) | |
# Calculate counterfactual experiment trade based on observed levels and estimated changes in trade | |
levels = ge_model.calculate_levels() | |
print(levels.head()) | |
# Calculate trade weighted shocks | |
# Start with bilateral level shocks | |
bilat_cost_shock = ge_model.trade_weighted_shock(how = 'bilateral') | |
print(bilat_cost_shock.head()) | |
# Now at country-level, which summarizes the bilateral shocks | |
country_cost_shock = ge_model.trade_weighted_shock(how='country', aggregations = ['mean', 'sum', 'max']) | |
print(country_cost_shock.head()) | |
# ---- | |
# Diagnose model solver problems | |
# ---- | |
# Define a new model to work with | |
new_model = ge.OneSectorGE(baseline = baseline, | |
cost_coeff_values = cost_params, | |
cost_variables = ['lndist', 'contiguity', | |
'common_language', 'pta', | |
'international', 'constant'], | |
year = "2006", | |
reference_importer = "DEU", | |
sigma = 5) | |
# In addition to the check_omr_rescale method, There are additional tools that can be used to help diagnose issues when | |
# a model fails to solve. One option is to test that the model system of equations is computable from the supplied data | |
# and parameters using the following method. | |
test_diagnostics = new_model.test_baseline_mr_function() | |
# This method computes the values of the MR system of equation at the initial values (not the solution) to insure | |
# that the functions are fully parameterized and can be computed. The various vectors and matrices should be filled | |
# completely with numeric values. The presence of any missing values (nan) is an indication that the baseline data | |
# and/or trade cost parameters are not complete or defined incorrectly. | |
# Check one set of parameters (trade cost x expenditure share), for example: | |
input_params = test_diagnostics['mr_params'] | |
print(input_params['cost_exp_shr']) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment