Last active
September 18, 2017 15:36
-
-
Save gaebor/f02b8e24f71830f727344a44214c3f59 to your computer and use it in GitHub Desktop.
Test the cnmem library preallocation, in order to prevent concurrent memory allocations on GPU. python2 only!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# presupposes that theano with gpu is already installed | |
FILENAME=theano_cnmem_text.log | |
export THEANO_FLAGS="gpuarray.preallocate=0.5,device=cuda,floatX=float32" | |
A_BIT=2 | |
pip install -U nvidia-ml-py | |
# http://stackoverflow.com/questions/5947742/how-to-change-the-output-color-of-echo-in-linux | |
RED='\033[0;31m' | |
NC='\033[0m' # No Color | |
GREEN='\033[0;32m' | |
YELLOW='\033[1;33m' | |
function gpu_mem(){ | |
python -c "from pynvml import * | |
nvmlInit() | |
handle = nvmlDeviceGetHandleByIndex(0) | |
mem_info = nvmlDeviceGetMemoryInfo(handle) | |
print('Mem: ${YELLOW}{0}${NC}MiB / {1}MiB'.format(mem_info.used/(1024*1024), mem_info.total/(1024*1024))) | |
nvmlShutdown() | |
" | |
} | |
function behold(){ | |
local N=10 | |
if [[ $2 -gt 0 ]]; then N=$2; fi | |
echo -n | |
for i in `seq $N` | |
do | |
echo -ne "\r$1 in $(($N-$i+1)) ... " | |
sleep 1 | |
done | |
echo | |
} | |
### fun part starts here ### | |
echo "idle usage:" | |
gpu_mem | |
behold "starting new theano session" | |
### this will allocate a lots of memory ### | |
python -c "import time | |
import theano | |
time.sleep(60)" &> $FILENAME & | |
PID=$! | |
echo -n "A python background process with PID $PID has been started ... " | |
sleep $A_BIT | |
if [[ -n `jobs -r -p | grep $PID` ]] | |
then | |
echo -e "${GREEN}still running${NC}" | |
gpu_mem | |
behold "starting another theano session" | |
### this will crash ### | |
python -c "import theano" | |
if [[ $? -ne 0 ]]; then echo -e "${RED}FAILED${NC}"; else echo -e "${GREEN}SUCCEEDED${NC}"; fi | |
if [[ -n `jobs -r -p | grep $PID` ]] | |
then | |
echo "$PID still running" | |
else | |
echo "$PID crashed inbetween" | |
fi | |
gpu_mem | |
behold "killing $PID" | |
kill $PID | |
sleep $A_BIT | |
echo "gpu went back to:" | |
gpu_mem | |
else | |
echo -e "${RED}$PID failed${NC}" | |
sleep $A_BIT | |
tail -n 5 $FILENAME | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import theano | |
import numpy | |
from pynvml import * | |
import sys | |
nvmlInit() | |
handle = nvmlDeviceGetHandleByIndex(0) | |
def gpu_mem(handle): | |
mem_info = nvmlDeviceGetMemoryInfo(handle) | |
print('Mem: %.3g%%'%(float(mem_info.used)/mem_info.total)) | |
n = 100 | |
k = 200 | |
l = 300 | |
if len(sys.argv) >= 4: | |
n, k, l = map(int, sys.argv[1:4]) | |
x = numpy.zeros((n,k),dtype=theano.config.floatX) | |
y = numpy.zeros((k,l),dtype=theano.config.floatX) | |
z = numpy.zeros((n,l),dtype=theano.config.floatX) | |
x_shared = theano.shared(x) | |
y_shared = theano.shared(y) | |
z_shared = theano.shared(z) | |
x = theano.tensor.matrix() | |
y = theano.tensor.matrix() | |
z = theano.tensor.matrix() | |
# the following function | |
# * is VERY unefficient in terms of memory usage | |
# * its temporary variables are allocated by theano, they are on the Device, we do not send or retrieve them | |
# * this allows us to investigate the allocation of variables, which are hidden in the compiled computational graph | |
f=theano.function([], [], | |
updates={z_shared:(x[:,:,None]*y[None,:,:]).sum(axis=1)}, | |
givens=[(x,x_shared),(y,y_shared)]) | |
gpu_mem(handle) | |
for _ in range(10): | |
f() | |
gpu_mem(handle) | |
nvmlShutdown() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is you are using gpuarray, the custom backend (as described here), then you will have to do some things differently.
Use
device=cuda
instead ofgpu
and
gpuarray.preallocate
instead oflib.cnmem
!