jtrecenti/gist:b39435913b0765c0ff1c1f2f9874dffc

## gistfile1.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

# Seed for reproducibility
np.random.seed(42)

# Generate synthetic data
n = 500  # Number of samples
# Generate age (between 20 and 60) and sex (0: Female, 1: Male)
age = np.random.randint(20, 61, n)
sex = np.random.randint(0, 2, n)

# Generate a treatment variable (0: control, 1: treatment) that depends on age and sex
# Older and male individuals are more likely to receive treatment
prob_treatment = 1 / (1 + np.exp(-(0.05 * age + 0.5 * sex - 4)))
treatment = np.random.binomial(1, prob_treatment)

# Create a DataFrame
df = pd.DataFrame({'age': age, 'sex': sex, 'treatment': treatment})

df.head()


# Create subplots for Age distribution: Control and Treatment (Before and After Matching)
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
fig.suptitle('Age Distribution: Control and Treatment (Before and After Matching)')

# Age distribution for Control group before and after matching
axes[0, 0].hist(df[df['treatment'] == 0]['age'], alpha=0.7, label='Control (Before)', bins=15, color='blue')
axes[0, 0].set_title('Control Group: Before Matching')
axes[0, 0].set_xlabel('Age')
axes[0, 0].set_ylabel('Frequency')

axes[0, 1].hist(matched_data[matched_data['treatment'] == 0]['age'], alpha=0.7, label='Control (After)', bins=15, color='blue')
axes[0, 1].set_title('Control Group: After Matching')
axes[0, 1].set_xlabel('Age')
axes[0, 1].set_ylabel('Frequency')

# Age distribution for Treatment group before and after matching
axes[1, 0].hist(df[df['treatment'] == 1]['age'], alpha=0.7, label='Treatment (Before)', bins=15, color='orange')
axes[1, 0].set_title('Treatment Group: Before Matching')
axes[1, 0].set_xlabel('Age')
axes[1, 0].set_ylabel('Frequency')

axes[1, 1].hist(matched_data[matched_data['treatment'] == 1]['age'], alpha=0.7, label='Treatment (After)', bins=15, color='orange')
axes[1, 1].set_title('Treatment Group: After Matching')
axes[1, 1].set_xlabel('Age')
axes[1, 1].set_ylabel('Frequency')

plt.show()
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.linear_model import LogisticRegression

	# Seed for reproducibility
	np.random.seed(42)

	# Generate synthetic data
	n = 500 # Number of samples
	# Generate age (between 20 and 60) and sex (0: Female, 1: Male)
	age = np.random.randint(20, 61, n)
	sex = np.random.randint(0, 2, n)

	# Generate a treatment variable (0: control, 1: treatment) that depends on age and sex
	# Older and male individuals are more likely to receive treatment
	prob_treatment = 1 / (1 + np.exp(-(0.05 * age + 0.5 * sex - 4)))
	treatment = np.random.binomial(1, prob_treatment)

	# Create a DataFrame
	df = pd.DataFrame({'age': age, 'sex': sex, 'treatment': treatment})

	df.head()


	# Create subplots for Age distribution: Control and Treatment (Before and After Matching)
	fig, axes = plt.subplots(2, 2, figsize=(14, 12))
	fig.suptitle('Age Distribution: Control and Treatment (Before and After Matching)')

	# Age distribution for Control group before and after matching
	axes[0, 0].hist(df[df['treatment'] == 0]['age'], alpha=0.7, label='Control (Before)', bins=15, color='blue')
	axes[0, 0].set_title('Control Group: Before Matching')
	axes[0, 0].set_xlabel('Age')
	axes[0, 0].set_ylabel('Frequency')

	axes[0, 1].hist(matched_data[matched_data['treatment'] == 0]['age'], alpha=0.7, label='Control (After)', bins=15, color='blue')
	axes[0, 1].set_title('Control Group: After Matching')
	axes[0, 1].set_xlabel('Age')
	axes[0, 1].set_ylabel('Frequency')

	# Age distribution for Treatment group before and after matching
	axes[1, 0].hist(df[df['treatment'] == 1]['age'], alpha=0.7, label='Treatment (Before)', bins=15, color='orange')
	axes[1, 0].set_title('Treatment Group: Before Matching')
	axes[1, 0].set_xlabel('Age')
	axes[1, 0].set_ylabel('Frequency')

	axes[1, 1].hist(matched_data[matched_data['treatment'] == 1]['age'], alpha=0.7, label='Treatment (After)', bins=15, color='orange')
	axes[1, 1].set_title('Treatment Group: After Matching')
	axes[1, 1].set_xlabel('Age')
	axes[1, 1].set_ylabel('Frequency')

	plt.show()