zredlined/ehr_config.py

## ehr_config.py
from gretel_synthetics.config import LocalConfig

# EHR configuration, optimal settings
# Note: this config is optimized for calculation on a GPU
config = LocalConfig(
    max_lines=0, # read all lines (zero)
    epochs=30, # 30 epochs for production
    vocab_size=25000, # vocabulary size
    character_coverage=1.0, # tokenizer model character coverage percent
    gen_chars=0, # the maximum number of characters possible per-generated line of text
    gen_lines=0, # generate dataset equal in size to training set (zero)
    rnn_units=256, # dimensionality of LSTM output space
    dropout_rate=0.2, # fraction of the inputs to drop
    dp=True, # let's use differential privacy
    dp_learning_rate=0.015, # learning rate
    dp_noise_multiplier=1.1, # control how much noise is added to gradients
    dp_l2_norm_clip=1.0, # bound optimizer's sensitivity to individual training points
    dp_microbatches=256, # split batches into minibatches for parallelism
    checkpoint_dir=(Path.cwd() / 'checkpoints').as_posix(),
    input_data_path=dest_file # filepath or S3
)
	from gretel_synthetics.config import LocalConfig

	# EHR configuration, optimal settings
	# Note: this config is optimized for calculation on a GPU
	config = LocalConfig(
	max_lines=0, # read all lines (zero)
	epochs=30, # 30 epochs for production
	vocab_size=25000, # vocabulary size
	character_coverage=1.0, # tokenizer model character coverage percent
	gen_chars=0, # the maximum number of characters possible per-generated line of text
	gen_lines=0, # generate dataset equal in size to training set (zero)
	rnn_units=256, # dimensionality of LSTM output space
	dropout_rate=0.2, # fraction of the inputs to drop
	dp=True, # let's use differential privacy
	dp_learning_rate=0.015, # learning rate
	dp_noise_multiplier=1.1, # control how much noise is added to gradients
	dp_l2_norm_clip=1.0, # bound optimizer's sensitivity to individual training points
	dp_microbatches=256, # split batches into minibatches for parallelism
	checkpoint_dir=(Path.cwd() / 'checkpoints').as_posix(),
	input_data_path=dest_file # filepath or S3
	)