Skip to content

Instantly share code, notes, and snippets.

@dmitrysarov
Created August 6, 2020 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmitrysarov/900505e848739822a7502eaf1a780109 to your computer and use it in GitHub Desktop.
Save dmitrysarov/900505e848739822a7502eaf1a780109 to your computer and use it in GitHub Desktop.
start process if memory free
import subprocess, os
import re
import click
def find_free_gpu(amount_of_space):
#in Mb
nvidia_ouput = subprocess.check_output(['nvidia-smi'])
pid_memory = re.findall('(\d+)MiB\s/\s(\d+)MiB', str(nvidia_ouput), re.DOTALL)
print(pid_memory)
for gpu_num, (used, whole) in enumerate(pid_memory):
if int(whole)-int(used)>amount_of_space:
return gpu_num
return None
@click.command()
@click.argument('mem')
@click.argument('command') #command to run
def main(mem, command):
'''
mem - required memory value in Mb
command - command to run
e.g.
python start_if_free.py 21000 '/opt/conda/bin/python train.py with batch_size=10 gpu_ids=0'
be sure to set gpu id in you script to 0, because
current script will change visabelity of gpus for torch
'''
print('Parsing command...')
command_parts = command.split()
print(command_parts)
print('Looking for a memory ', mem, 'Mb')
while True:
gpu_id = find_free_gpu(int(mem))
print(gpu_id)
if gpu_id is not None:
break
print('Gpu id', gpu_id, ' is free')
my_env = os.environ
my_env['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
subprocess.run([*command_parts], env=my_env)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment