Skip to content

Instantly share code, notes, and snippets.

@mrm8488
Created November 19, 2019 01:27
Show Gist options
  • Save mrm8488/94df4e21c9a8d2e6e4b718e291803aa3 to your computer and use it in GitHub Desktop.
Save mrm8488/94df4e21c9a8d2e6e4b718e291803aa3 to your computer and use it in GitHub Desktop.
# Detect hardware
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
tpu = None
gpus = tf.config.experimental.list_logical_devices("GPU")
# Select appropriate distribution strategy
if tpu:
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu, steps_per_run=128) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
print('Running on single GPU ', gpus[0].name)
else:
strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment