Skip to content

Instantly share code, notes, and snippets.

@mplatzer
Created November 17, 2024 17:23
Show Gist options
  • Save mplatzer/35d84c72e19a1b802f1f5dfc87322b91 to your computer and use it in GitHub Desktop.
Save mplatzer/35d84c72e19a1b802f1f5dfc87322b91 to your computer and use it in GitHub Desktop.
US Census Income Dataset - Differentially Privacy Synthetic Data with MOSTLY AI
# LOAD original data
import pandas as pd
census_df = pd.read_csv('https://github.com/mostly-ai/public-demo-data/raw/refs/heads/dev/census/census.csv.gz')
# INITIALIZE python client
from mostlyai import MostlyAI
mostly = MostlyAI()
# TRAIN with Differential Privacy
for m in [0.25, 0.5, 1, 1.5, 2, 4, 8, 16, 32]: # noise multipliers
for g in [0.5, 1, 2]: # grad norms
mostly.train(
config = {
"name": f"Census with DP - {m} {g}",
"tables": [
{
"name": "census",
"data": census_df,
"modelConfiguration": {
"maxTrainingTime": 60,
"differentialPrivacy": {
"maxEpsilon": None,
"noiseMultiplier": m,
"maxGradNorm": g
},
},
},
]
}, start=True, wait=False
)
# TRAIN without Differential Privacy
for e in [1, 2, 4, 8, 16, 100]: # max epochs
mostly.train(
config = {
"name": f"Census without DP - {e}",
"tables": [
{
"name": "census",
"data": census_df,
"modelConfiguration": {
"maxTrainingTime": 60,
"maxEpochs": e,
},
},
]
}, start=True, wait=False
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment