Skip to content

Instantly share code, notes, and snippets.

@jtrecenti
Last active September 14, 2023 09:02
Show Gist options
  • Save jtrecenti/b39435913b0765c0ff1c1f2f9874dffc to your computer and use it in GitHub Desktop.
Save jtrecenti/b39435913b0765c0ff1c1f2f9874dffc to your computer and use it in GitHub Desktop.
Exemplo simulado PSM
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
# Seed for reproducibility
np.random.seed(42)
# Generate synthetic data
n = 500 # Number of samples
# Generate age (between 20 and 60) and sex (0: Female, 1: Male)
age = np.random.randint(20, 61, n)
sex = np.random.randint(0, 2, n)
# Generate a treatment variable (0: control, 1: treatment) that depends on age and sex
# Older and male individuals are more likely to receive treatment
prob_treatment = 1 / (1 + np.exp(-(0.05 * age + 0.5 * sex - 4)))
treatment = np.random.binomial(1, prob_treatment)
# Create a DataFrame
df = pd.DataFrame({'age': age, 'sex': sex, 'treatment': treatment})
df.head()
# Create subplots for Age distribution: Control and Treatment (Before and After Matching)
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
fig.suptitle('Age Distribution: Control and Treatment (Before and After Matching)')
# Age distribution for Control group before and after matching
axes[0, 0].hist(df[df['treatment'] == 0]['age'], alpha=0.7, label='Control (Before)', bins=15, color='blue')
axes[0, 0].set_title('Control Group: Before Matching')
axes[0, 0].set_xlabel('Age')
axes[0, 0].set_ylabel('Frequency')
axes[0, 1].hist(matched_data[matched_data['treatment'] == 0]['age'], alpha=0.7, label='Control (After)', bins=15, color='blue')
axes[0, 1].set_title('Control Group: After Matching')
axes[0, 1].set_xlabel('Age')
axes[0, 1].set_ylabel('Frequency')
# Age distribution for Treatment group before and after matching
axes[1, 0].hist(df[df['treatment'] == 1]['age'], alpha=0.7, label='Treatment (Before)', bins=15, color='orange')
axes[1, 0].set_title('Treatment Group: Before Matching')
axes[1, 0].set_xlabel('Age')
axes[1, 0].set_ylabel('Frequency')
axes[1, 1].hist(matched_data[matched_data['treatment'] == 1]['age'], alpha=0.7, label='Treatment (After)', bins=15, color='orange')
axes[1, 1].set_title('Treatment Group: After Matching')
axes[1, 1].set_xlabel('Age')
axes[1, 1].set_ylabel('Frequency')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment