Skip to content

Instantly share code, notes, and snippets.

@xizzzz
Last active August 6, 2024 02:46
Show Gist options
  • Save xizzzz/02caadbaf760510e267cc1fa9d3a7971 to your computer and use it in GitHub Desktop.
Save xizzzz/02caadbaf760510e267cc1fa9d3a7971 to your computer and use it in GitHub Desktop.
from math import exp
import numpy as np
def entbal_in_mem_one_iteration(
c_array: np.array, t_x: np.array, z: np.array,
v: np.array, alpha: float, beta: float
) -> np.array:
"""
In memory implementation of entropy balancing
:param c_array: numpy array for the matrix of control users.
Each row is the covariates array of one user.
:param t_x: An array of covariates' mean on the treated users
:param z: parameter
:param v: parameter
:param alpha: learning rate
:param beta: momentum parameter
:return: updated parameters
"""
denominator = np.sum(np.exp(
np.minimum(-c_array.dot(z), np.log(1e8))
)) # scalar
numerator = c_array.T.dot(np.exp(
np.minimum(-c_array.dot(z), np.log(1e8))
).T)
c_x = numerator / denominator
next_v = - alpha * (t_x - c_x) + beta * v
next_z = z + next_v
return next_v, next_z
def entbal_map_reduce_one_iteration(
c_df: DataFrame, t_x: np.array, z: np.array,
v: np.array, alpha: float, beta: float
) -> np.array:
"""
Spark implementation of entropy balancing
:param c_df: Dataframe for the matrix of control users. Column x is an array of covariates.
Each row represents one user.
:param t_x: An array of covariates' mean on the treated users
:param z: parameter
:param v: parameter
:param alpha: learning rate
:param beta: momentum parameter
:return: updated parameters
"""
res = c_df.rdd.map(
lambda p: (
exp(np.minimum(-z.dot(p.x), np.log(1e8))),
exp(
np.minimum(-z.dot(p.x), np.log(1e8))
) * np.array(p.x))
).reduce(
lambda a, b: (a[0] + b[0], a[1] + b[1])
)
c_x = np.array(res[1]) / res[0]
next_v = - alpha * (t_x - c_x) + beta * v
next_z = z + next_v
return next_v, next_z
def microsynth_in_mem_one_iteration(
c_array: np.array, t_x: np.array, c_count: int,
z: np.array, v: np.array, alpha: float, beta: float
):
"""
In memory implementation of MicroSynth
:param c_array: numpy array for the matrix of control users.
Each row is the covariates array of one user.
:param t_x: An array of covariates' mean on the treated users
:param c_count: the number of rows in c_array
:param z: parameter
:param v: parameter
:param alpha: learning rate
:param beta: momentum parameter
:return: updated parameters
"""
weight = np.maximum(0, 1 - np.dot(c_array, z))
Aw = np.dot(c_array.T, weight)
gradient = (Aw - t_x) / c_count
prev_v = v
v = z + alpha * gradient
prev_beta = beta
beta = (1 + np.sqrt(1 + 4 * beta * beta)) / 2
z = v + (prev_beta - 1) / beta * (v - prev_v)
return v, z, beta
def microsynth_map_reduce_one_iteration(
c_df: DataFrame, t_x: np.array, c_count: int,
z: np.array, v: np.array, alpha: float, beta: float
):
"""
Spark implementation of MicroSynth
:param c_df: Dataframe for the matrix of control users. Column x is an array of covariates.
Each row represents one user.
:param t_x: An array of covariates' mean on the treated users
:param c_count: the number of rows in c_df
:param z: parameter
:param v: parameter
:param alpha: float
:param beta: momentum parameter
:return: updated parameters
"""
c_rdd = c_df.rdd.map(
lambda p: (np.array(p.x), p.weight)
)
next_c_rdd = c_rdd.map(lambda p: (
p[0], max(0, 1 - np.dot(p[0], z)))
)
Aw = next_c_rdd.map(
lambda p: p[0] * p[1]
).reduce(lambda x, y: x + y)
gradient = (Aw - t_x) / c_count
prev_v = v
v = z + alpha * gradient
prev_beta = beta
beta = (1 + np.sqrt(1 + 4 * beta * beta)) / 2
z = v + (prev_beta - 1) / beta * (v - prev_v)
return v, z, beta
@mvejaraguayo
Copy link

Hello! I read your paper, thank you for your findings. I am going to be honest, I think I have to read it several more times to fully understand it. I am currently a student at Rady School of Management coursing their MSBA program, my team and I will expose to the best of our capabilities what you created: if the problem is face by other companies, and how you solved it. Do you have any tips to help us expose correctly your work. Also thank you for posting the code, I will try to run it in my VS Code to test it and try to understand a little better.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment