Last active
July 22, 2020 17:20
-
-
Save zredlined/8298a5a5b7b9e4443aedcf8e5330cb1a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!pip install gretel-synthetics --upgrade | |
from gretel_synthetics.batch import DataFrameBatch | |
from pathlib import Path | |
config_template = { | |
"max_lines": 0, | |
"max_line_len": 2048, | |
"epochs": 7, | |
"vocab_size": 20000, | |
"gen_lines": 1000, | |
"dp": False, | |
"field_delimiter": ",", | |
"overwrite": True, | |
"checkpoint_dir": str(Path.cwd() / "checkpoints") | |
} | |
# train synthetic model | |
batcher = DataFrameBatch(df=training_set, batch_size=32, config=config_template) | |
batcher.create_training_data() | |
batcher.train_all_batches() | |
# generate synthetic dataset | |
status = batcher.generate_all_batch_lines(max_invalid=5000) | |
df_synthetic = batcher.batches_to_df() | |
# only keep fraudulent records created by our model | |
df_synthetic = df_synthetic[df_synthetic['Class'] == 1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment