from tensorflow.python.client import device_lib
def get_available_devices():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos]
print(get_available_devices())
# my output was => ['/device:CPU:0']
# good output must be => ['/device:CPU:0', '/device:GPU:0']
TensorFlow needs Compute Capibility >= 3.5 Search for capability (e.g. capability = 6.1 for GTX 1050 Ti)
- Find Python and TensorFlow version
import sys
print (sys.version)
# 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)]
import tensorflow as tf
print(tf.__version__)
# my output was => 2.4.1
-
Find right versions of CUDA Toolkit and cuDNN SDK that support your TensorFlow version:
-
Install CUDA Toolkit
Depends on OS
-
Check your version of the toolkit
nvcc --version # nvcc: NVIDIA (R) Cuda compiler driver # Copyright (c) 2005-2020 NVIDIA Corporation # Built on Thu_Jun_11_22:26:48_Pacific_Daylight_Time_2020 # Cuda compilation tools, release 11.0, V11.0.194 # Build cuda_11.0_bu.relgpu_drvr445TC445_37.28540450_0
-
Add CUDA environment variables
system variables / path must have: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\bin C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\libnvvp C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\extras\CUPTI\lib64 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include
-
Download cuDNN
-
Add cuDNN environment variables
C:\Program Files\NVIDIA\cuda\bin
pip uninstall tensorflow
pip install tensorflow-gpu
Restart your PC
print(get_available_devices())
# now this code should return => ['/device:CPU:0', '/device:GPU:0']
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import time
import numpy as np
from tensorflow.python.framework import ops
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
cpu_times = []
sizes = [1, 10, 100, 500, 1000, 2000, 3000, 4000, 5000, 8000, 10000]
for size in sizes:
ops.reset_default_graph()
start = time.time()
with tf.device('cpu:0'):
v1 = tf.Variable(tf.random.normal((size, size)))
v2 = tf.Variable(tf.random.normal((size, size)))
op = tf.matmul(v1, v2)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(op)
cpu_times.append(time.time() - start)
print('cpu time took: {0:.4f}'.format(time.time() - start))
gpu_times = []
for size in sizes:
ops.reset_default_graph()
start = time.time()
with tf.device('gpu:0'):
v1 = tf.Variable(tf.random.normal((size, size)))
v2 = tf.Variable(tf.random.normal((size, size)))
op = tf.matmul(v1, v2)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(op)
gpu_times.append(time.time() - start)
print('gpu time took: {0:.4f}'.format(time.time() - start))
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(sizes, gpu_times, label='GPU')
ax.plot(sizes, cpu_times, label='CPU')
plt.xlabel('MATRIX SIZE')
plt.ylabel('TIME (sec)')
plt.legend()
plt.show()