richardliaw/synthetic_data.py

## synthetic_data.py
import ray.data

class DataGenerator:
    def __init__(self, permute_config):
        device = torch.device("cuda")
        self.model = Model().to(device)
        self.config = permute_config

    def __call__(self, input):
        for test_input in self.permute(permute_config, input):
            yield self.model(input)

ds = ray.data.read_json("s3://path_to_bucket/user_prompts.json")
ds.map_batches(DataGenerator, num_gpus=1, concurrency=100) # run on 100 GPUs
ds.write("s3://path_to_bucket/generated_prompts.json")
	import ray.data

	class DataGenerator:
	def __init__(self, permute_config):
	device = torch.device("cuda")
	self.model = Model().to(device)
	self.config = permute_config

	def __call__(self, input):
	for test_input in self.permute(permute_config, input):
	yield self.model(input)

	ds = ray.data.read_json("s3://path_to_bucket/user_prompts.json")
	ds.map_batches(DataGenerator, num_gpus=1, concurrency=100) # run on 100 GPUs
	ds.write("s3://path_to_bucket/generated_prompts.json")