radekosmulski/nvtabular_gpu_cluster.py

## nvtabular_gpu_cluster.py
# Deploy a Single-Machine Multi-GPU Cluster

protocol = "tcp"  # "tcp" or "ucx"
visible_devices = "0,1,2,3"  # Delect devices to place workers
device_spill_frac = 0.9  # Spill GPU-Worker memory to host at this limit.
# Reduce if spilling fails to prevent
# device memory errors.
cluster = None  # (Optional) Specify existing scheduler port
if cluster is None:
    cluster = LocalCUDACluster(
        protocol=protocol,
        CUDA_VISIBLE_DEVICES=visible_devices,
        local_directory=dask_workdir,
        device_memory_limit=capacity * device_spill_frac,
    )

# Create the distributed client
client = Client(cluster)
	# Deploy a Single-Machine Multi-GPU Cluster

	protocol = "tcp" # "tcp" or "ucx"
	visible_devices = "0,1,2,3" # Delect devices to place workers
	device_spill_frac = 0.9 # Spill GPU-Worker memory to host at this limit.
	# Reduce if spilling fails to prevent
	# device memory errors.
	cluster = None # (Optional) Specify existing scheduler port
	if cluster is None:
	cluster = LocalCUDACluster(
	protocol=protocol,
	CUDA_VISIBLE_DEVICES=visible_devices,
	local_directory=dask_workdir,
	device_memory_limit=capacity * device_spill_frac,
	)

	# Create the distributed client
	client = Client(cluster)