thomasmooon/tf_keras_multiGPU.py

## tf_keras_multiGPU.py
def gpu_assignment(gpus, allow_growth = True, per_process_gpu_memory_fraction = 0.95):
    # GPU assignment

    ######################################################
    # !! run this BEFORE importing TF or keras !!
    # run code only on a single, particular GPU
    # from http://kawahara.ca/select-single-gpu-keras/
    ######################################################
    import os
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    """
    Environment Variable Syntax   Results
    CUDA_VISIBLE_DEVICES=1      Only device 1 will be seen
    CUDA_VISIBLE_DEVICES=0,1    Devices 0 and 1 will be visible
    CUDA_VISIBLE_DEVICES=”0,1”  Same as above, quotation marks are optional
    CUDA_VISIBLE_DEVICES=0,2,3  Devices 0, 2, 3 will be visible; device 1 is masked
    CUDA_VISIBLE_DEVICES=""     None GPU, only CPU
    """

    gpus_string = ""
    for gpu in gpus:
        gpus_string += "," +str(gpu)
    gpus_string = gpus_string[1:] # drop first comma
    os.environ["CUDA_VISIBLE_DEVICES"]=gpus_string


    ###################################
    ## extra imports to set GPU options
    ###################################
    import tensorflow as tf
    from keras import backend as k

    # TensorFlow wizardry
    config = tf.ConfigProto()
    # Don't pre-allocate memory; allocate as-needed
    config.gpu_options.allow_growth = allow_growth
    # Only allow a total fraction the GPU memory to be allocated
    config.gpu_options.per_process_gpu_memory_fraction = per_process_gpu_memory_fraction
    # Create a session with the above options specified.
    k.tensorflow_backend.set_session(tf.Session(config=config))

    return None


# run this before tensorflow or keras import
GPUS = [0,1,2,3,4,5,6,7]
gpu_assignment(GPUS)


import tensorflow as tf
from keras.applications import Xception
from keras.utils import multi_gpu_model
import numpy as np

num_samples = 1000
height = 224
width = 224
num_classes = 1000

# Instantiate the base model (or "template" model).
# We recommend doing this with under a CPU device scope,
# so that the model's weights are hosted on CPU memory.
# Otherwise they may end up hosted on a GPU, which would
# complicate weight sharing.
with tf.device('/cpu:0'):
    model = Xception(weights=None,
                     input_shape=(height, width, 3),
                     classes=num_classes)

# Replicates the model on 8 GPUs.
# This assumes that your machine has 8 available GPUs.
parallel_model = multi_gpu_model(model, gpus=len(GPUS))
parallel_model.compile(loss='categorical_crossentropy',
                       optimizer='rmsprop')

# Generate dummy data.
x = np.random.random((num_samples, height, width, 3))
y = np.random.random((num_samples, num_classes))

# This `fit` call will be distributed on 8 GPUs.
# Since the batch size is 256, each GPU will process 32 samples.
parallel_model.fit(x, y, epochs=20, batch_size=256)
	def gpu_assignment(gpus, allow_growth = True, per_process_gpu_memory_fraction = 0.95):
	# GPU assignment

	######################################################
	# !! run this BEFORE importing TF or keras !!
	# run code only on a single, particular GPU
	# from http://kawahara.ca/select-single-gpu-keras/
	######################################################
	import os
	os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
	"""
	Environment Variable Syntax Results
	CUDA_VISIBLE_DEVICES=1 Only device 1 will be seen
	CUDA_VISIBLE_DEVICES=0,1 Devices 0 and 1 will be visible
	CUDA_VISIBLE_DEVICES=”0,1” Same as above, quotation marks are optional
	CUDA_VISIBLE_DEVICES=0,2,3 Devices 0, 2, 3 will be visible; device 1 is masked
	CUDA_VISIBLE_DEVICES="" None GPU, only CPU
	"""

	gpus_string = ""
	for gpu in gpus:
	gpus_string += "," +str(gpu)
	gpus_string = gpus_string[1:] # drop first comma
	os.environ["CUDA_VISIBLE_DEVICES"]=gpus_string


	###################################
	## extra imports to set GPU options
	###################################
	import tensorflow as tf
	from keras import backend as k

	# TensorFlow wizardry
	config = tf.ConfigProto()
	# Don't pre-allocate memory; allocate as-needed
	config.gpu_options.allow_growth = allow_growth
	# Only allow a total fraction the GPU memory to be allocated
	config.gpu_options.per_process_gpu_memory_fraction = per_process_gpu_memory_fraction
	# Create a session with the above options specified.
	k.tensorflow_backend.set_session(tf.Session(config=config))

	return None


	# run this before tensorflow or keras import
	GPUS = [0,1,2,3,4,5,6,7]
	gpu_assignment(GPUS)


	import tensorflow as tf
	from keras.applications import Xception
	from keras.utils import multi_gpu_model
	import numpy as np

	num_samples = 1000
	height = 224
	width = 224
	num_classes = 1000

	# Instantiate the base model (or "template" model).
	# We recommend doing this with under a CPU device scope,
	# so that the model's weights are hosted on CPU memory.
	# Otherwise they may end up hosted on a GPU, which would
	# complicate weight sharing.
	with tf.device('/cpu:0'):
	model = Xception(weights=None,
	input_shape=(height, width, 3),
	classes=num_classes)

	# Replicates the model on 8 GPUs.
	# This assumes that your machine has 8 available GPUs.
	parallel_model = multi_gpu_model(model, gpus=len(GPUS))
	parallel_model.compile(loss='categorical_crossentropy',
	optimizer='rmsprop')

	# Generate dummy data.
	x = np.random.random((num_samples, height, width, 3))
	y = np.random.random((num_samples, num_classes))

	# This `fit` call will be distributed on 8 GPUs.
	# Since the batch size is 256, each GPU will process 32 samples.
	parallel_model.fit(x, y, epochs=20, batch_size=256)