mdouze/run_on_cluster.bash Secret

## run_on_cluster.bash

set -e


function run_on ()
{
    sys="$1"
    shift
    name="$1"
    shift
    script="$logdir/$name.sh"

    if [ -e "$script" ]; then
        echo script "$script" exists
        return
    fi

    # srun handles special characters fine, but the shell interpreter
    # does not
    escaped_cmd=$( printf "%q " "$@" )

    cat > $script <<EOF
#! /bin/bash
srun $escaped_cmd
EOF

    echo -n "$logdir/$name.stdout "
    sbatch -n1 -J "$name" \
           $sys \
            --comment='priority is the only one that works'  \
           --output="$logdir/$name.stdout" \
           "$script"

}

# function to run a command in batch mode on a machine with a

function run_on_1machine {
    run_on "--cpus-per-task=80 --gres=gpu:0 --mem=500G --time=70:00:00 --partition=learnlab" "$@"
}

function run_on_1machine_1h {
    run_on "--cpus-per-task=80 --gres=gpu:2 --mem=100G --time=1:00:00 --partition=learnlab" "$@"
}

function run_on_1machine_3h {
    run_on "--cpus-per-task=80 --gres=gpu:2 --mem=100G --time=3:00:00 --partition=learnlab" "$@"
}

function run_on_4gpu_3h {
    run_on "--cpus-per-task=40 --gres=gpu:4 --mem=100G --time=3:00:00 --partition=learnlab" "$@"
}

function run_on_8gpu () {
    run_on "--cpus-per-task=80 --gres=gpu:8 --mem=500G --time=70:00:00 --partition=learnlab" "$@"
}

function run_on_1gpu_16gb {
    run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=learnlab  -C volta16gb " "$@"
}

function run_on_1gpu_16gb_devlab {
    run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=devlab  -C volta16gb " "$@"
}

function run_on_1gpu_32gb {
    run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=learnlab  -C volta32gb " "$@"
}

function run_on_1gpu_32gb_devlab {
    run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=devlab  -C volta32gb " "$@"
}

function run_on_4gpu_32gb_devlab {
    run_on "--cpus-per-task=32 --gres=gpu:4 --mem=100G --time=3:00:00 --partition=devlab  -C volta32gb " "$@"
}

function run_on_1gpu {
    run_on "--cpus-per-task=32 --gres=gpu:1 --mem=50G --time=3:00:00 --partition=learnlab " "$@"
}

function run_on_1gpu_devlab {
    run_on "--cpus-per-task=32 --gres=gpu:1 --mem=50G --time=3:00:00 --partition=devlab " "$@"
}


# prepare output directories
# set to some directory where all indexes, can be written.
basedir=/checkpoint/matthijs/hybrid_cpu_gpu

logdir=$basedir/logs

mkdir -p $logdir


if false; then

###########################
# 10M experiments, PQ32

db=bigann10M
#for st in cpu gpu gpu_tiled gpu_flat_quantizer cpu_flat_gpu_quantizer gpu_ivf_quantizer; do
for st in gpu_ivf_quantizer; do
    if [ $st == gpu_ivf_quantizer ]; then
        extraopt="--batch_size 4096"
    else
        extraopt=""
    fi

    key=$db.IVF65kPQ32.st$st
    run_on_1gpu_devlab $key.e \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st \
          --stats $logdir/$key.pickle $extraopt
done


###########################
# 10M experiments, PQ32, optimize batch size

db=bigann10M
st=gpu_tiled
for bs in 256 1024 4096 16384; do
    key=$db.IVF65kPQ32.st$st.bs$bs
    run_on_1gpu_devlab $key.e \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st \
          --db $db \
          --batch_size $bs \
          --stats $logdir/$key.pickle
done


###########################
# 1B experiments, PQ16


db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
# for st in cpu gpu gpu_tiled gpu_flat_quantizer cpu_flat_gpu_quantizer gpu_ivf_quantizer; do
# for st in gpu_tiled gpu_ivf_quantizer cpu_flat_gpu_quantizer; do
for st in gpu_ivf_quantizer; do
    key=$db.IVF1MPQ16.st$st

    case $st in
        gpu_tiled)
            extraopt="--batch_size 16384";;
        gpu_ivf_quantizer)
            extraopt="--batch_size 4096";;
        cpu_flat_gpu_quantizer)
            extraopt="--batch_size 16384";;
        *)
            extraopt="";;
    esac

    run_on_1gpu_32gb_devlab $key.e \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st \
          --db $db \
          --indexname $indexname \
          --stats $logdir/$key.pickle \
          $extraopt
done


###########################
# 1B experiments, PQ16, tune batch size


db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
# st=gpu_ivf_quantizer
st=gpu_tiled

for bs in 0 1024 4096 16384 65536; do
    key=$db.nq500k.IVF1MPQ16.st$st.bs$bs

    run_on_1gpu_32gb_devlab $key.a \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 500000 --search_type $st --nt 32 \
          --db $db \
          --indexname $indexname \
          --stats $logdir/$key.pickle \
          --batch_size $bs

done


###########################
# 1B, PQ16, 4 GPU

db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
#$ for st in multi_gpu multi_gpu_flat_quantizer; do
# for st in multi_gpu_sharded     multi_gpu_flat_quantizer_sharded; do
# for st in multi_gpu_sharded_1quantizer; do
for st in multi_gpu_Csharded1 multi_gpu_Csharded1_flat multi_gpu_Csharded1_ivf; do
    key=4GPU.$db.IVF1MPQ16.st$st

    case $st in
        gpu_tiled)
            extraopt="--batch_size 16384";;
        gpu_ivf_quantizer)
            extraopt="--batch_size 4096";;
        cpu_flat_gpu_quantizer)
            extraopt="--batch_size 16384";;
        *)
            extraopt="";;
    esac

    run_on_4gpu_32gb_devlab $key.b \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st --nt 32 \
          --db $db \
          --indexname $indexname \
          --stats $logdir/$key.pickle \
          $extraopt
done


###########################
# How to shard

db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
#$ for st in multi_gpu multi_gpu_flat_quantizer; do
# for st in multi_gpu_sharded     multi_gpu_flat_quantizer_sharded; do
for st in multi_gpu_sharded1 multi_gpu_sharded1_flat multi_gpu_sharded1_ivf; do
    key=4GPU.$db.IVF1MPQ16.st$st

    case $st in
        gpu_tiled)
            extraopt="--batch_size 16384";;
        gpu_ivf_quantizer)
            extraopt="--batch_size 4096";;
        cpu_flat_gpu_quantizer)
            extraopt="--batch_size 16384";;
        *)
            extraopt="";;
    esac

    run_on_4gpu_32gb_devlab $key.b \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st --nt 32 \
          --db $db \
          --indexname $indexname \
          --stats $logdir/$key.pickle \
          $extraopt
done

###########################
# try batch sizes

db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
st=multi_gpu_sharded1
for bs in 0 1024 4096 16384; do
  key=4GPU.$db.IVF1MPQ16.st$st.bs$bs

    run_on_4gpu_32gb_devlab $key.a \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st --nt 32 \
          --db $db \
          --indexname $indexname \
          --stats $logdir/$key.pickle \
          --batch_size $bs
done


###########################
# try options

db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
st=gpu_flat_quantizer
for opt in useFloat16 useFloat16CoarseQuantizer usePrecomputed; do
    key=$db.IVF1MPQ16.st$st.$opt
    run_on_1gpu_32gb_devlab $key.a \
        python -u bench_hybrid_cpu_gpu.py \
          --nq 100000 --search_type $st --nt 32 \
          --db $db \
          --indexname $indexname \
          --stats $logdir/$key.pickle \
          --$opt

done

#########################
# shard types


db=bigann1B
indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
# for st in multi_gpu_sharded1_flat multi_gpu_sharded1_ivf; do
for st in multi_gpu_Csharded1_flat multi_gpu_Csharded1_ivf multi_gpu_Csharded1; do
      for shard_type in 4; do
        key=4GPU.$db.IVF1MPQ16.st$st.shard_type$shard_type

        run_on_4gpu_32gb_devlab $key.a \
            python -u bench_hybrid_cpu_gpu.py \
            --nq 100000 --search_type $st --nt 32 \
            --db $db \
            --indexname $indexname \
            --stats $logdir/$key.pickle \
            --shard_type $shard_type
    done
done

fi


#########################
# PQ64 experiments (only shards)


# for db in bigann1B deep1B; do
for db in deep1B; do

    if [ $db == bigann1B ]; then
        indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ64_128_IVF1048576_HNSW32_PQ64.faissindex
    elif [ $db == deep1B ]; then
        indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbdeep1B.OPQ64_128_IVF1048576_HNSW32_PQ64.faissindex
    else
        exit 1
    fi

    #for st in multi_gpu_sharded1 multi_gpu_Csharded1 multi_gpu_Csharded1_flat multi_gpu_Csharded1_ivf; do
    #for st in multi_gpu_sharded1 multi_gpu_Csharded1_flat; do
    for st in multi_gpu_Csharded1_flat; do
        key=4GPU.$db.IVF1MPQ64.st$st

        case $st in
            multi_gpu_sharded1)
                extraopt="--batch_size 16384";;
            *)
                extraopt="";;
        esac

        run_on_4gpu_32gb_devlab $key.e \
            python -u bench_hybrid_cpu_gpu.py \
            --nq 100000 --search_type $st --nt 32 \
            --db $db \
            --useFloat16 --useFloat16CoarseQuantizer \
            --indexname $indexname \
            --stats $logdir/$key.pickle \
            $extraopt
    done

done

	set -e


	function run_on ()
	{
	sys="$1"
	shift
	name="$1"
	shift
	script="$logdir/$name.sh"

	if [ -e "$script" ]; then
	echo script "$script" exists
	return
	fi

	# srun handles special characters fine, but the shell interpreter
	# does not
	escaped_cmd=$( printf "%q " "$@" )

	cat > $script <<EOF
	#! /bin/bash
	srun $escaped_cmd
	EOF

	echo -n "$logdir/$name.stdout "
	sbatch -n1 -J "$name" \
	$sys \
	--comment='priority is the only one that works' \
	--output="$logdir/$name.stdout" \
	"$script"

	}

	# function to run a command in batch mode on a machine with a

	function run_on_1machine {
	run_on "--cpus-per-task=80 --gres=gpu:0 --mem=500G --time=70:00:00 --partition=learnlab" "$@"
	}

	function run_on_1machine_1h {
	run_on "--cpus-per-task=80 --gres=gpu:2 --mem=100G --time=1:00:00 --partition=learnlab" "$@"
	}

	function run_on_1machine_3h {
	run_on "--cpus-per-task=80 --gres=gpu:2 --mem=100G --time=3:00:00 --partition=learnlab" "$@"
	}

	function run_on_4gpu_3h {
	run_on "--cpus-per-task=40 --gres=gpu:4 --mem=100G --time=3:00:00 --partition=learnlab" "$@"
	}

	function run_on_8gpu () {
	run_on "--cpus-per-task=80 --gres=gpu:8 --mem=500G --time=70:00:00 --partition=learnlab" "$@"
	}

	function run_on_1gpu_16gb {
	run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=learnlab -C volta16gb " "$@"
	}

	function run_on_1gpu_16gb_devlab {
	run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=devlab -C volta16gb " "$@"
	}

	function run_on_1gpu_32gb {
	run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=learnlab -C volta32gb " "$@"
	}

	function run_on_1gpu_32gb_devlab {
	run_on "--cpus-per-task=32 --gres=gpu:1 --mem=100G --time=3:00:00 --partition=devlab -C volta32gb " "$@"
	}

	function run_on_4gpu_32gb_devlab {
	run_on "--cpus-per-task=32 --gres=gpu:4 --mem=100G --time=3:00:00 --partition=devlab -C volta32gb " "$@"
	}

	function run_on_1gpu {
	run_on "--cpus-per-task=32 --gres=gpu:1 --mem=50G --time=3:00:00 --partition=learnlab " "$@"
	}

	function run_on_1gpu_devlab {
	run_on "--cpus-per-task=32 --gres=gpu:1 --mem=50G --time=3:00:00 --partition=devlab " "$@"
	}


	# prepare output directories
	# set to some directory where all indexes, can be written.
	basedir=/checkpoint/matthijs/hybrid_cpu_gpu

	logdir=$basedir/logs

	mkdir -p $logdir


	if false; then

	###########################
	# 10M experiments, PQ32

	db=bigann10M
	#for st in cpu gpu gpu_tiled gpu_flat_quantizer cpu_flat_gpu_quantizer gpu_ivf_quantizer; do
	for st in gpu_ivf_quantizer; do
	if [ $st == gpu_ivf_quantizer ]; then
	extraopt="--batch_size 4096"
	else
	extraopt=""
	fi

	key=$db.IVF65kPQ32.st$st
	run_on_1gpu_devlab $key.e \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st \
	--stats $logdir/$key.pickle $extraopt
	done



	###########################
	# 10M experiments, PQ32, optimize batch size

	db=bigann10M
	st=gpu_tiled
	for bs in 256 1024 4096 16384; do
	key=$db.IVF65kPQ32.st$st.bs$bs
	run_on_1gpu_devlab $key.e \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st \
	--db $db \
	--batch_size $bs \
	--stats $logdir/$key.pickle
	done




	###########################
	# 1B experiments, PQ16


	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	# for st in cpu gpu gpu_tiled gpu_flat_quantizer cpu_flat_gpu_quantizer gpu_ivf_quantizer; do
	# for st in gpu_tiled gpu_ivf_quantizer cpu_flat_gpu_quantizer; do
	for st in gpu_ivf_quantizer; do
	key=$db.IVF1MPQ16.st$st

	case $st in
	gpu_tiled)
	extraopt="--batch_size 16384";;
	gpu_ivf_quantizer)
	extraopt="--batch_size 4096";;
	cpu_flat_gpu_quantizer)
	extraopt="--batch_size 16384";;
	*)
	extraopt="";;
	esac

	run_on_1gpu_32gb_devlab $key.e \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	$extraopt
	done


	###########################
	# 1B experiments, PQ16, tune batch size


	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	# st=gpu_ivf_quantizer
	st=gpu_tiled

	for bs in 0 1024 4096 16384 65536; do
	key=$db.nq500k.IVF1MPQ16.st$st.bs$bs

	run_on_1gpu_32gb_devlab $key.a \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 500000 --search_type $st --nt 32 \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	--batch_size $bs

	done


	###########################
	# 1B, PQ16, 4 GPU

	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	#$ for st in multi_gpu multi_gpu_flat_quantizer; do
	# for st in multi_gpu_sharded multi_gpu_flat_quantizer_sharded; do
	# for st in multi_gpu_sharded_1quantizer; do
	for st in multi_gpu_Csharded1 multi_gpu_Csharded1_flat multi_gpu_Csharded1_ivf; do
	key=4GPU.$db.IVF1MPQ16.st$st

	case $st in
	gpu_tiled)
	extraopt="--batch_size 16384";;
	gpu_ivf_quantizer)
	extraopt="--batch_size 4096";;
	cpu_flat_gpu_quantizer)
	extraopt="--batch_size 16384";;
	*)
	extraopt="";;
	esac

	run_on_4gpu_32gb_devlab $key.b \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st --nt 32 \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	$extraopt
	done



	###########################
	# How to shard

	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	#$ for st in multi_gpu multi_gpu_flat_quantizer; do
	# for st in multi_gpu_sharded multi_gpu_flat_quantizer_sharded; do
	for st in multi_gpu_sharded1 multi_gpu_sharded1_flat multi_gpu_sharded1_ivf; do
	key=4GPU.$db.IVF1MPQ16.st$st

	case $st in
	gpu_tiled)
	extraopt="--batch_size 16384";;
	gpu_ivf_quantizer)
	extraopt="--batch_size 4096";;
	cpu_flat_gpu_quantizer)
	extraopt="--batch_size 16384";;
	*)
	extraopt="";;
	esac

	run_on_4gpu_32gb_devlab $key.b \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st --nt 32 \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	$extraopt
	done

	###########################
	# try batch sizes

	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	st=multi_gpu_sharded1
	for bs in 0 1024 4096 16384; do
	key=4GPU.$db.IVF1MPQ16.st$st.bs$bs

	run_on_4gpu_32gb_devlab $key.a \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st --nt 32 \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	--batch_size $bs
	done


	###########################
	# try options

	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	st=gpu_flat_quantizer
	for opt in useFloat16 useFloat16CoarseQuantizer usePrecomputed; do
	key=$db.IVF1MPQ16.st$st.$opt
	run_on_1gpu_32gb_devlab $key.a \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st --nt 32 \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	--$opt

	done

	#########################
	# shard types


	db=bigann1B
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ16_128_IVF1048576_HNSW32_PQ16.faissindex
	# for st in multi_gpu_sharded1_flat multi_gpu_sharded1_ivf; do
	for st in multi_gpu_Csharded1_flat multi_gpu_Csharded1_ivf multi_gpu_Csharded1; do
	for shard_type in 4; do
	key=4GPU.$db.IVF1MPQ16.st$st.shard_type$shard_type

	run_on_4gpu_32gb_devlab $key.a \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st --nt 32 \
	--db $db \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	--shard_type $shard_type
	done
	done

	fi


	#########################
	# PQ64 experiments (only shards)


	# for db in bigann1B deep1B; do
	for db in deep1B; do

	if [ $db == bigann1B ]; then
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbbigann1B.OPQ64_128_IVF1048576_HNSW32_PQ64.faissindex
	elif [ $db == deep1B ]; then
	indexname=/checkpoint/matthijs/bench_all_ivf/indexes/NR.autotune.dbdeep1B.OPQ64_128_IVF1048576_HNSW32_PQ64.faissindex
	else
	exit 1
	fi

	#for st in multi_gpu_sharded1 multi_gpu_Csharded1 multi_gpu_Csharded1_flat multi_gpu_Csharded1_ivf; do
	#for st in multi_gpu_sharded1 multi_gpu_Csharded1_flat; do
	for st in multi_gpu_Csharded1_flat; do
	key=4GPU.$db.IVF1MPQ64.st$st

	case $st in
	multi_gpu_sharded1)
	extraopt="--batch_size 16384";;
	*)
	extraopt="";;
	esac

	run_on_4gpu_32gb_devlab $key.e \
	python -u bench_hybrid_cpu_gpu.py \
	--nq 100000 --search_type $st --nt 32 \
	--db $db \
	--useFloat16 --useFloat16CoarseQuantizer \
	--indexname $indexname \
	--stats $logdir/$key.pickle \
	$extraopt
	done

	done