-
-
Save xizzzz/02caadbaf760510e267cc1fa9d3a7971 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import exp | |
import numpy as np | |
def entbal_in_mem_one_iteration( | |
c_array: np.array, t_x: np.array, z: np.array, | |
v: np.array, alpha: float, beta: float | |
) -> np.array: | |
""" | |
In memory implementation of entropy balancing | |
:param c_array: numpy array for the matrix of control users. | |
Each row is the covariates array of one user. | |
:param t_x: An array of covariates' mean on the treated users | |
:param z: parameter | |
:param v: parameter | |
:param alpha: learning rate | |
:param beta: momentum parameter | |
:return: updated parameters | |
""" | |
denominator = np.sum(np.exp( | |
np.minimum(-c_array.dot(z), np.log(1e8)) | |
)) # scalar | |
numerator = c_array.T.dot(np.exp( | |
np.minimum(-c_array.dot(z), np.log(1e8)) | |
).T) | |
c_x = numerator / denominator | |
next_v = - alpha * (t_x - c_x) + beta * v | |
next_z = z + next_v | |
return next_v, next_z | |
def entbal_map_reduce_one_iteration( | |
c_df: DataFrame, t_x: np.array, z: np.array, | |
v: np.array, alpha: float, beta: float | |
) -> np.array: | |
""" | |
Spark implementation of entropy balancing | |
:param c_df: Dataframe for the matrix of control users. Column x is an array of covariates. | |
Each row represents one user. | |
:param t_x: An array of covariates' mean on the treated users | |
:param z: parameter | |
:param v: parameter | |
:param alpha: learning rate | |
:param beta: momentum parameter | |
:return: updated parameters | |
""" | |
res = c_df.rdd.map( | |
lambda p: ( | |
exp(np.minimum(-z.dot(p.x), np.log(1e8))), | |
exp( | |
np.minimum(-z.dot(p.x), np.log(1e8)) | |
) * np.array(p.x)) | |
).reduce( | |
lambda a, b: (a[0] + b[0], a[1] + b[1]) | |
) | |
c_x = np.array(res[1]) / res[0] | |
next_v = - alpha * (t_x - c_x) + beta * v | |
next_z = z + next_v | |
return next_v, next_z | |
def microsynth_in_mem_one_iteration( | |
c_array: np.array, t_x: np.array, c_count: int, | |
z: np.array, v: np.array, alpha: float, beta: float | |
): | |
""" | |
In memory implementation of MicroSynth | |
:param c_array: numpy array for the matrix of control users. | |
Each row is the covariates array of one user. | |
:param t_x: An array of covariates' mean on the treated users | |
:param c_count: the number of rows in c_array | |
:param z: parameter | |
:param v: parameter | |
:param alpha: learning rate | |
:param beta: momentum parameter | |
:return: updated parameters | |
""" | |
weight = np.maximum(0, 1 - np.dot(c_array, z)) | |
Aw = np.dot(c_array.T, weight) | |
gradient = (Aw - t_x) / c_count | |
prev_v = v | |
v = z + alpha * gradient | |
prev_beta = beta | |
beta = (1 + np.sqrt(1 + 4 * beta * beta)) / 2 | |
z = v + (prev_beta - 1) / beta * (v - prev_v) | |
return v, z, beta | |
def microsynth_map_reduce_one_iteration( | |
c_df: DataFrame, t_x: np.array, c_count: int, | |
z: np.array, v: np.array, alpha: float, beta: float | |
): | |
""" | |
Spark implementation of MicroSynth | |
:param c_df: Dataframe for the matrix of control users. Column x is an array of covariates. | |
Each row represents one user. | |
:param t_x: An array of covariates' mean on the treated users | |
:param c_count: the number of rows in c_df | |
:param z: parameter | |
:param v: parameter | |
:param alpha: float | |
:param beta: momentum parameter | |
:return: updated parameters | |
""" | |
c_rdd = c_df.rdd.map( | |
lambda p: (np.array(p.x), p.weight) | |
) | |
next_c_rdd = c_rdd.map(lambda p: ( | |
p[0], max(0, 1 - np.dot(p[0], z))) | |
) | |
Aw = next_c_rdd.map( | |
lambda p: p[0] * p[1] | |
).reduce(lambda x, y: x + y) | |
gradient = (Aw - t_x) / c_count | |
prev_v = v | |
v = z + alpha * gradient | |
prev_beta = beta | |
beta = (1 + np.sqrt(1 + 4 * beta * beta)) / 2 | |
z = v + (prev_beta - 1) / beta * (v - prev_v) | |
return v, z, beta |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello! I read your paper, thank you for your findings. I am going to be honest, I think I have to read it several more times to fully understand it. I am currently a student at Rady School of Management coursing their MSBA program, my team and I will expose to the best of our capabilities what you created: if the problem is face by other companies, and how you solved it. Do you have any tips to help us expose correctly your work. Also thank you for posting the code, I will try to run it in my VS Code to test it and try to understand a little better.