Skip to content

Instantly share code, notes, and snippets.

@clarng
Created August 20, 2022 06:37
Show Gist options
  • Save clarng/b46a5543adfc770bd6f49cc706ed62e6 to your computer and use it in GitHub Desktop.
Save clarng/b46a5543adfc770bd6f49cc706ed62e6 to your computer and use it in GitHub Desktop.
python release/air_tests/air_benchmarks/workloads/data_benchmark.py --dataset-size-gb=100 --num-workers=10
diff --git a/release/air_tests/air_benchmarks/workloads/data_benchmark.py b/release/air_tests/air_benchmarks/workloads/data_benchmark.py
index 0f65a8ff83aee..01b20c031c2a0 100644
--- a/release/air_tests/air_benchmarks/workloads/data_benchmark.py
+++ b/release/air_tests/air_benchmarks/workloads/data_benchmark.py
@@ -17,7 +17,7 @@ def make_ds(size_gb: int):
record_dim = 1280
record_size = record_dim * 8
num_records = int(total_size / record_size)
- dataset = ray.data.range_tensor(num_records, shape=(record_dim,))
+ dataset = ray.data.range_tensor(num_records, shape=(record_dim,), parallelism=20)
print("Created dataset", dataset, "of size", dataset.size_bytes())
return dataset
@@ -29,7 +29,7 @@ def run_ingest_bulk(dataset, num_workers, num_cpus_per_worker):
num_workers=num_workers,
trainer_resources={"CPU": 0},
resources_per_worker={"CPU": num_cpus_per_worker},
- _max_cpu_fraction_per_node=0.1,
+ _max_cpu_fraction_per_node=0.8,
),
datasets={"train": dataset},
preprocessor=dummy_prep,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment