mrm8488/TF_HW_Engine.py

## TF_HW_Engine.py
# Detect hardware
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu = None
  gpus = tf.config.experimental.list_logical_devices("GPU")

# Select appropriate distribution strategy
if tpu:
  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.experimental.TPUStrategy(tpu, steps_per_run=128) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)
	# Detect hardware
	try:
	tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
	except ValueError:
	tpu = None
	gpus = tf.config.experimental.list_logical_devices("GPU")

	# Select appropriate distribution strategy
	if tpu:
	tf.config.experimental_connect_to_cluster(tpu)
	tf.tpu.experimental.initialize_tpu_system(tpu)
	strategy = tf.distribute.experimental.TPUStrategy(tpu, steps_per_run=128) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
	print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
	elif len(gpus) > 1:
	strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
	print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
	elif len(gpus) == 1:
	strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
	print('Running on single GPU ', gpus[0].name)
	else:
	strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
	print('Running on CPU')
	print("Number of accelerators: ", strategy.num_replicas_in_sync)