Alexander Watson zredlined

## bike-orders.json
[
    {
        "CustomerID": 26159,
        "Title": null,
        "FirstName": "Virginia",
        "MiddleName": null,
        "LastName": "Raman",
        "Suffix": null,
        "AddressLine1": "3242 Coralie Drive",
        "AddressLine2": null,

## setup-tensorflow-gpu-ubuntu-18_04.sh
# Shell script to setup GPU acceleration for TensorFlow on Ubuntu 18.04
# Tested on a default Ubuntu 18.04 VM image in Google Compute

# Install CUDA
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
sudo apt-get update
sudo apt-get -y install cuda

## synthetics_creditcard_fraud_config.py
#!pip install gretel-synthetics --upgrade

from gretel_synthetics.batch import DataFrameBatch
from pathlib import Path

config_template = {
    "max_lines": 0,
    "max_line_len": 2048,
    "epochs": 7,
    "vocab_size": 20000,

## create_knn_dataset.py
#!pip install s3fs smart_open pandas sklearn

import pandas as pd
from smart_open import open
from sklearn.neighbors import NearestNeighbors

# Set params
NEAREST_NEIGHBOR_COUNT = 5
TRAINING_SET = 's3://gretel-public-website/datasets/creditcard_train.csv'

## customer_validator-uci.py
# Validate each generated record
# Note: This custom validator verifies the record structure matches
# the expected format for UCI healthcare data, and also that
# generated records are Female (e.g. column 1 is 0)

def validate_record(line):
    rec = line.strip().split(",")
    if not int(rec[1]) == 0:
        raise Exception("record generated must be female")
    if len(rec) == 14:

## gretel_synthetics_config-uci.py
from pathlib import Path

from gretel_synthetics.config import LocalConfig

# Create a config that we can use for both training and generating, with CPU-friendly settings
# The default values for ``max_chars`` and ``epochs`` are better suited for GPUs
config = LocalConfig(
    max_lines=0, # read all lines (zero)
    epochs=15, # 15-30 epochs for production
    vocab_size=20000, # tokenizer model vocabulary size

## ehr_config.py
from gretel_synthetics.config import LocalConfig

# EHR configuration, optimal settings
# Note: this config is optimized for calculation on a GPU
config = LocalConfig(
    max_lines=0, # read all lines (zero)
    epochs=30, # 30 epochs for production
    vocab_size=25000, # vocabulary size
    character_coverage=1.0, # tokenizer model character coverage percent
    gen_chars=0, # the maximum number of characters possible per-generated line of text

## example_ontonotes5_spacy_format.json
{
    "id": "fake",
    "paragraphs": [
        {
            "raw": "Israel has blockaded all West Bank cities after 10 people died in one of the worst days of Israeli-Palestinian violence in more than 10 weeks. Israeli tank-fire killed five Palestinians including four policemen in the West Bank town of Jenine. Israeli forces killed one Palestinian near Bethlehem and another in Arab East Jerusalem. Palestinian gunmen in the West Bank killed two Jewish settlers in a roadside ambush near Hebron and a third Israeli in an attack against a bus outside of Jericho.",
            "sentences": [
                {
                    "tokens": [
                        {
                            "dep": "",

## diffpriv_config.yml
# training settings
max_chars: 0 # use a non-zero number to limit training data
epochs: 30 # number of training epochs (typically 15-30)

# RNN settings
batch_size: 64 # training batches
buffer_size: 10000 # maximum buffer size
seq_length: 100 # max length sentence for a single input in characters
embedding_dim: 256 # the embedding dimension
rnn_units: 256 #1024 # number of RNN units

## dp_rmsprop_optimizer.py

logging.info("Utilizing differential privacy in optimizer"

RMSPropOptimizer = tf.compat.v1.train.RMSPropOptimizer
DPRmsPropGaussianOptimizer = make_dp_gaussian_optimizer(RMSPropOptimizer)

optimizer = DPRmsPropGaussianOptimizer(
    l2_norm_clip=store.l2_norm_clip,
    noise_multiplier=store.noise_multiplier,
    num_microbatches=store.microbatches,
	[
	{
	"CustomerID": 26159,
	"Title": null,
	"FirstName": "Virginia",
	"MiddleName": null,
	"LastName": "Raman",
	"Suffix": null,
	"AddressLine1": "3242 Coralie Drive",
	"AddressLine2": null,
	# Shell script to setup GPU acceleration for TensorFlow on Ubuntu 18.04
	# Tested on a default Ubuntu 18.04 VM image in Google Compute

	# Install CUDA
	wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
	sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
	sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
	sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
	sudo apt-get update
	sudo apt-get -y install cuda
	#!pip install gretel-synthetics --upgrade

	from gretel_synthetics.batch import DataFrameBatch
	from pathlib import Path

	config_template = {
	"max_lines": 0,
	"max_line_len": 2048,
	"epochs": 7,
	"vocab_size": 20000,
	#!pip install s3fs smart_open pandas sklearn

	import pandas as pd
	from smart_open import open
	from sklearn.neighbors import NearestNeighbors

	# Set params
	NEAREST_NEIGHBOR_COUNT = 5
	TRAINING_SET = 's3://gretel-public-website/datasets/creditcard_train.csv'
	# Validate each generated record
	# Note: This custom validator verifies the record structure matches
	# the expected format for UCI healthcare data, and also that
	# generated records are Female (e.g. column 1 is 0)

	def validate_record(line):
	rec = line.strip().split(",")
	if not int(rec[1]) == 0:
	raise Exception("record generated must be female")
	if len(rec) == 14:
	from pathlib import Path

	from gretel_synthetics.config import LocalConfig

	# Create a config that we can use for both training and generating, with CPU-friendly settings
	# The default values for ``max_chars`` and ``epochs`` are better suited for GPUs
	config = LocalConfig(
	max_lines=0, # read all lines (zero)
	epochs=15, # 15-30 epochs for production
	vocab_size=20000, # tokenizer model vocabulary size
	from gretel_synthetics.config import LocalConfig

	# EHR configuration, optimal settings
	# Note: this config is optimized for calculation on a GPU
	config = LocalConfig(
	max_lines=0, # read all lines (zero)
	epochs=30, # 30 epochs for production
	vocab_size=25000, # vocabulary size
	character_coverage=1.0, # tokenizer model character coverage percent
	gen_chars=0, # the maximum number of characters possible per-generated line of text
	{
	"id": "fake",
	"paragraphs": [
	{
	"raw": "Israel has blockaded all West Bank cities after 10 people died in one of the worst days of Israeli-Palestinian violence in more than 10 weeks. Israeli tank-fire killed five Palestinians including four policemen in the West Bank town of Jenine. Israeli forces killed one Palestinian near Bethlehem and another in Arab East Jerusalem. Palestinian gunmen in the West Bank killed two Jewish settlers in a roadside ambush near Hebron and a third Israeli in an attack against a bus outside of Jericho.",
	"sentences": [
	{
	"tokens": [
	{
	"dep": "",
	# training settings
	max_chars: 0 # use a non-zero number to limit training data
	epochs: 30 # number of training epochs (typically 15-30)

	# RNN settings
	batch_size: 64 # training batches
	buffer_size: 10000 # maximum buffer size
	seq_length: 100 # max length sentence for a single input in characters
	embedding_dim: 256 # the embedding dimension
	rnn_units: 256 #1024 # number of RNN units

	logging.info("Utilizing differential privacy in optimizer"

	RMSPropOptimizer = tf.compat.v1.train.RMSPropOptimizer
	DPRmsPropGaussianOptimizer = make_dp_gaussian_optimizer(RMSPropOptimizer)

	optimizer = DPRmsPropGaussianOptimizer(
	l2_norm_clip=store.l2_norm_clip,
	noise_multiplier=store.noise_multiplier,
	num_microbatches=store.microbatches,