Skip to content

Instantly share code, notes, and snippets.

@matteoferla
Last active December 21, 2023 10:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matteoferla/e0496d5766c12a0ae1738b943b41a536 to your computer and use it in GitHub Desktop.
Save matteoferla/e0496d5766c12a0ae1738b943b41a536 to your computer and use it in GitHub Desktop.
A bunch of scripts used on the HTCondor cluster
# ========================
# Sets presents...
# tries to source $HOME/.bashrc
# or $HOME2/.bashrc;
# or a fallback
# ========================
# source /data/xchem-fragalysis/shared/bashrc.sh
# universal fixes
export PS1="[\u@\h \W]\$"
export LANG=en_GB.UTF-8
export DATA=/data/xchem-fragalysis
export HOST=${HOST:-$(hostname)}
export USER=${USER:-$(users)}
export HOME=${HOME:-$_CONDOR_SCRATCH_DIR}
export SHELL=/bin/bash
source /etc/os-release;
export PIP_NO_CACHE_DIR=1
export PIP_NO_USER=1
export NUMEXPR_MAX_THREADS=$(lscpu -p=CPU | tail -n 1 | xargs)
export GIPHY_API="👾👾👾👾"
export MANIFOLD_API_KEY="👾👾👾👾"
export PLOTLY_API_KEY='👾👾👾👾'
export OE_LICENSE="$DATA/mferla/ASAP-oe_license.txt"
# frag network
export KUBECONFIG=$DATA/mferla/config-fragnet
export NEO4J_USER=matteo
export NEO4J_PASS='👾👾👾👾'
export USE_NEO4J_INSTEAD_API=true
# Jedi cache
mkdir -p $HOME2/.cache
export XDG_CACHE_HOME=$HOME2/.cache
# -------------------------------------
if [ -f $HOME/.bashrc ]
then
source $HOME/.bashrc;
elif [ -f $HOME2/.bashrc ]
then
source $HOME2/.bashrc;
else
export HOME2=${HOME2:-/data/xchem-fragalysis/mferla}
export PYTHONUSERBASE=${PYTHONUSERBASE:-$HOME2/conda/local}
export CONDA_ENVS_PATH=${CONDA_ENVS_PATH:-$DATA/mferla/.conda/envs:$DATA/sanchezg/app/miniconda3_2/envs:$DATA/mferla/rocky-conda/envs}
export MAMBA_ALWAYS_YES=yes
source $DATA/mferla/rocky-conda/etc/profile.d/conda.sh
conda activate
fi
export JUPYTER_CONFIG_DIR=${JUPYTER_CONFIG_DIR:-$HOME2/jupyter}
after_install() {
conda clean --all -y 2>&1 > /dev/null; chmod -r -f a=rwX $CONDA_PREFIX 2>&1 > /dev/null;
}
sleep 1;
#!/bin/bash
# ========================
# JUMP!
# Run a permanent ssh reverse proxy connection on SSH_FORWARD_PORT
# Requires SSH_USER remote user name
# A folder SSH_TMP_FOLDER which will be moved to $HOME/.ssh/
# which includes the filename SSH_KEY
# ========================
# $SSH_USER
if [ -z "$SSH_USER" ]; then
crash brutally "Your remote username SSH_USER ($SSH_USER) is not specified"
fi
if [ -z "$SSH_GATE_ADDRESS" ]; then
crash brutally "Your remote username SSH_GATE_ADDRESS ($SSH_GATE_ADDRESS) is not specified"
fi
if [ -z "$SSH_INNER_ADDRESS" ]; then
crash brutally "Your remote username SSH_INNER_ADDRESS ($SSH_INNER_ADDRESS) is not specified"
fi
if [ -n "$SSH_FORWARD_PORT" ]; then
echo '$SSH_FORWARD_PORT provided directly.'
elif [ -n "$JOB_PORT" ]; then
export SSH_FORWARD_PORT=$JOB_PORT
elif [ -n "$JUPYTER_PORT" ]; then
export $SSH_FORWARD_PORT=$JUPYTER_PORT
elif [ -n "$APPTAINERENV_JUPYTER_PORT" ]; then
export $SSH_FORWARD_PORT=$APPTAINERENV_SSH_FORWARD_PORT
else
raise error 'No $SSH_FORWARD_PORT provided'
fi
export DATA=/data/xchem-fragalysis;
export SSH_KEY=${SSH_KEY:-*}
export SSH_FOLDER=${SSH_FOLDER:-$HOME/.ssh}
#export SSH_PORT=${SSH_PORT:-22}
# most applications are okay with path//path but not ssh
export SSH_FOLDER=$(echo "$SSH_FOLDER" | sed "s/\/\//\//g" | sed "s/\/$//")
touch $SSH_FOLDER/test.txt
if [ ! -f $SSH_FOLDER/test.txt ]
then
echo "The folder $SSH_FOLDER is inaccessible"
mkdir -p /tmp/ssh
export SSH_FOLDER=/tmp/ssh
fi
echo 'prep connections by moving keys from $SSH_FOLDER to $HOME'
mkdir -p $SSH_FOLDER
touch $SSH_FOLDER/known_hosts
chmod 700 $SSH_FOLDER
chmod 600 $SSH_FOLDER/*
echo 'accepting fingerprints'
ssh-keygen -R $SSH_GATE_ADDRESS -f "$SSH_FOLDER/known_hosts"
while true;
do
ssh -N -R 0.0.0.0:$SSH_FORWARD_PORT:0.0.0.0:$SSH_FORWARD_PORT \
-o ProxyCommand="ssh -v -W %h:%p -l $SSH_USER -i $SSH_FOLDER/$SSH_KEY \
-o StrictHostKeyChecking=no \
$SSH_GATE_ADDRESS" \
-i $SSH_FOLDER/$SSH_KEY \
-o ServerAliveInterval=180 \
-o UserKnownHostsFile=$SSH_FOLDER/known_hosts \
-l $SSH_USER \
-o ExitOnForwardFailure=yes \
-o StrictHostKeyChecking=no \
$SSH_INNER_ADDRESS \
-v;
echo 'Connection to $SSH_GATE_ADDRESS > $SSH_INNER_ADDRESS lost' 1>&2;
sleep 600;
done;
#!/bin/bash
export HOST=${HOST:-$(hostname)}
export USER=${USER:-$(users)}
export HOME=${HOME:-$_CONDOR_SCRATCH_DIR}
source /etc/os-release;
echo "************************"
echo "HELLO WORLD!"
echo "************************"
echo "Greet from script ${0} as $USER in $HOST which runs $PRETTY_NAME"
echo "ls $PWD"
ls $PWD
export HOME=${HOME:-$_CONDOR_SCRATCH_DIR}
echo "ls $HOME"
ls $HOME
echo "printenv"
printenv
# ##############################
# install conda at $APPTAINERENV_CONDA_PREFIX
# example usage:
: '
export DATA=/data/xchem-fragalysis
export APPTAINERENV_CONDA_PREFIX=$DATA/mferla/waconda
export JOB_SCRIPT=$DATA/shared/singularity.sh
export JOB_INNER_SCRIPT=/data/xchem-fragalysis/shared/install_conda.sh
condor_submit $DATA/shared/target_script.condor
'
# ##############################
# set -e
export DATA=/data/xchem-fragalysis
if [[ -z "$CONDA_PREFIX" ]]; then
echo "Must provide CONDA_PREFIX in environment" 1>&2
exit 1
fi
if ! [ -f $DATA/shared/Miniconda3-latest-Linux-x86_64.sh ]; then
# wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o $DATA/shared/Miniconda3-latest-Linux-x86_64.sh;
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output $DATA/shared/Miniconda3-latest-Linux-x86_64.sh;
fi;
# rm -r $CONDA_PREFIX
if [ -f "$CONDA_PREFIX" ]; then
bash $DATA/shared/Miniconda3-latest-Linux-x86_64.sh -p $CONDA_PREFIX -b -u
else
bash $DATA/shared/Miniconda3-latest-Linux-x86_64.sh -p $CONDA_PREFIX -b
fi
source $CONDA_PREFIX/etc/profile.d/conda.sh
export PIP_NO_CACHE_DIR=1
export PIP_NO_USER=1
export PYTHONUSERBASE=$CONDA_PREFIX
conda activate base
conda update -n base -y -c defaults conda
conda install -y -c conda-forge conda-libmamba-solver
conda config --set solver libmamba
# Jupyter stuff
conda install -y -n base -c conda-forge distro nodejs sqlite jupyterlab jupyter_http_over_ws nb_conda_kernels
conda update -y -c conda-forge nodejs # peace among worlds
python -m pip install -q jupyter_theme_editor
# install whatever you want here
python -m pip install -q pandas plotly seaborn pillow pandas pandarallel pandera nglview pebble rdkit jupyterlab-spellchecker;
conda install -y -n base -c conda-forge openssh nano;
conda install -y -n base -c conda-forge util-linux;
conda install -y -n base -c conda-forge openbabel plip git;
conda install -y -n base -c conda-forge -c bioconda kalign2 hhsuite muscle hhsuite mmseqs2;
python -m pip install -q fragmenstein pyrosetta_help
python -m pip install -q $DATA/shared/pyrosetta-2023.27+release.e3ce6ea9faf-cp311-cp311-linux_x86_64.whl
# python -m pip cache purge # PIP_NO_CACHE_DIR conflict
conda install -y -c nvidia -c conda-forge cuda-toolkit cuda-nvcc cuda-command-line-tools gputil
conda install -y -c omnia -c conda-forge openmm openff-forcefields openff-toolkit openmmforcefields
conda install -y -c pytorch -c conda-forge pytorch torchvision matplotlib pandas
conda clean -y -t;
conda clean -y -i;
# A retro version for CentOS 7
CONDA_OVERRIDE_GLIBC=2.17 conda create -n glibc17 python=3.8;
# source $CONDA_PREFIX/etc/profile.d/conda.sh
# conda activate glibc17 # not base!
#chmod -R a+r $CONDA_PREFIX
#find $CONDA_PREFIX -type d -exec chmod 777 {} \;
# ##############################
# example usage:
: '
export DATA=/data/xchem-fragalysis
export APPTAINERENV_CONDA_PREFIX=$DATA/mferla/waconda
export JOB_SCRIPT=$DATA/shared/singularity.sh
export JOB_INIT_SCRIPT=/data/xchem-fragalysis/shared/stats_connection.sh
export JOB_INNER_SCRIPT=/data/xchem-fragalysis/shared/notebook.sh
export JOB_PORT=1300
export SSH_FORWARD_PORT=1300
export SSH_KEY=seiryu
export SSH_USER=ferla
export SSH_FOLDER=$DATA/mferla/singularity/tmp
export JUPYTER_CONFIG_DIR=$DATA/mferla/jupyter
export APPTAINERENV_CONDA_PREFIX=/data/xchem-fragalysis/mferla/waconda
export APPTAINERENV_JUPYTER_notebook_dir=/data/xchem-fragalysis/mferla
export APPTAINERENV_JUPYTER_CONFIG_DIR=$JUPYTER_CONFIG_DIR
export APPTAINER_HOSTNAME='lucky13'
condor_submit $DATA/shared/target_script.condor -a 'Requirements=(machine == "orpheus-worker-gpu-13.novalocal")'
'
# ##############################
export HOST=${HOST:-$(hostname)}
export USER=${USER:-$(users)}
export HOME=${HOME:-$_CONDOR_SCRATCH_DIR}
source /etc/os-release;
if [ -n "$JUPYTER_PORT" ]; then
echo "$JUPYTER_PORT set"
elif [ -n "$JOB_PORT" ]; then
export JUPYTER_PORT=$JOB_PORT
elif [ -n "$SSH_FORWARD_PORT" ]; then
export JUPYTER_PORT=$SSH_FORWARD_PORT
else
raise error "Your JUPYTER_PORT is not specified"
fi
if [ -z "$JUPYTER_CONFIG_DIR" ]; then
raise error "Your JUPYTER_CONFIG_DIR is not specified either"
fi
echo "************************"
echo "HELLO JUPYTER!"
echo "************************"
echo "Greet from Jupyter lab script ${0} as $USER in $HOST which runs $PRETTY_NAME on $JUPYTER_PORT with settings from $JUPYTER_CONFIG_DIR"
source /data/xchem-fragalysis/shared/bashrc.sh;
# conda activate
#export JUPYTER_CONFIG_PATH=$HEADQUARTERS/.jupyter
# First time? Remember to set:
# jupyter notebook --generate-config
# yes invasion | jupyter server password
# port is JUPYTER_PORT
while true
do
jupyter lab --ip="0.0.0.0" --no-browser
done

Premise

Matteo uses the following custom env convention

DATA=/data/xchem-fragalysis
HOME2=/data/xchem-fragalysis/mferla

Along with JOB_* which are script dependent.

About using Matteo's conda:

source $DATA/mferla/rocky-conda/etc/profile.d/conda.sh
conda activate base
export CONDA_ENVS_PATH=$DATA/mferla/.conda/envs:$DATA/sanchezg/app/miniconda3_2/envs:$DATA/mferla/rocky-conda/envs

You can add your own env space if you wish and still borrow Matteo & Ruben's envs. So the last line would be:

export CONDA_ENVS_PATH=👾👾👾:$DATA/mferla/.conda/envs:$DATA/sanchezg/app/miniconda3_2/envs:$DATA/mferla/rocky-conda/envs

where 👾👾👾 is an absolute path to your env.

target_script.condor

Fullpath: /data/xchem-fragalysis/shared/target_script.condor

This script runs $JOB_NODE_SCRIPT within initial dir $HOME2 to specify a particular machine in its entirety. add -a 'Requirements=(machine == "orpheus-worker-gpu-666.novalocal")' as a cmd arg to specify a particular machine. For machine info see https://www.stats.ox.ac.uk/~ferla/info/condor.html or run condor_status -json

Envs used:

  • $HOME2 the fake home
  • $JOB_SCRIPT

The folder HOME2/logs must exists beforehand.

nice scripts to use for JOB_SCRIPT

  • /data/xchem-fragalysis/shared/singularity.sh
  • /data/xchem-fragalysis/shared/helloworld.sh

helloworld.sh

This is just a test:

JOB_SCRIPT=/data/xchem-fragalysis/shared/helloworld.sh condor_submit /data/xchem-fragalysis/shared/target_script.condor

Then

echo 'Stdout'
cat $HOME/log/condor-log.615.0.out
echo 'Stderr'
cat $HOME/log/condor-log.615.0.err

singularity.sh

JOB_SCRIPT=/data/xchem-fragalysis/shared/singularity.sh \
APPTAINER_CONTAINER=/data/xchem-fragalysis/shared/singularity/rockyplus.def \
JOB_INNER_SCRIPT=/data/xchem-fragalysis/mferla/Zika/job.sh \
condor_submit /data/xchem-fragalysis/shared/target_script.condor

SSH jump

/etc/ssh/sshd_config is not readable, so I don't know if AllowTcpForwarding is allowd. All variants w/ ssh -oProxyCommand='ssh -p22 qdf33232@ssh.diamond.ac.uk -W %h:%p -i $HOME/.ssh/diamond_rsa -F /tmp/blank' -p22 qdf33232@diamond-cpu -i $HOME/.ssh/diamond_rsa -v -F /tmp/blank -o ForwardAgent=no -o RequestTTY=yes fail.


Author: Matteo Date: 12 August 2023

if [ -z "$SSH_USER" ]; then
raise error "Your remote username SSH_USER ($SSH_USER) is not specified"
fi
if [ -z "$SSH_ADDRESS" ]; then
raise error "Your remote address SSH_ADDRESS ($SSH_ADDRESS) is not specified"
fi
if [ -n "$SSH_FORWARD_PORT" ]; then
echo '$SSH_FORWARD_PORT provided directly.'
elif [ -n "$JOB_PORT" ]; then
export SSH_FORWARD_PORT=$JOB_PORT
elif [ -n "$JUPYTER_PORT" ]; then
export $SSH_FORWARD_PORT=$JUPYTER_PORT
elif [ -n "$APPTAINERENV_JUPYTER_PORT" ]; then
export $SSH_FORWARD_PORT=$APPTAINERENV_SSH_FORWARD_PORT
else
raise error 'No $SSH_FORWARD_PORT provided'
fi
export DATA=/data/xchem-fragalysis;
export SSH_KEY=${SSH_KEY:-*}
export SSH_FOLDER=${SSH_FOLDER:-$HOME/.ssh}
export SSH_PORT=${SSH_PORT:-22}
# most applications are okay with path//path but not ssh
export SSH_FOLDER=$(echo "$SSH_FOLDER" | sed "s/\/\//\//g" | sed "s/\/$//")
touch $SSH_FOLDER/test.txt
if [ ! -f $SSH_FOLDER/test.txt ]
then
echo "The folder $SSH_FOLDER is inaccessible"
mkdir -p /tmp/ssh
export SSH_FOLDER=/tmp/ssh
fi
echo 'prep connections by moving keys from $SSH_FOLDER to $HOME'
mkdir -p $SSH_FOLDER
touch $SSH_FOLDER/known_hosts
chmod 700 $SSH_FOLDER
chmod 600 $SSH_FOLDER/*
echo 'accepting fingerprints'
ssh-keygen -R $SSH_ADDRESS -f "$SSH_FOLDER/known_hosts"
while true;
do
ssh -N -R 0.0.0.0:$SSH_FORWARD_PORT:localhost:$SSH_FORWARD_PORT -p 666 \
-o ServerAliveInterval=180 \
-o ExitOnForwardFailure=yes \
-i $SSH_FOLDER/$SSH_KEY \
-o UserKnownHostsFile=$SSH_FOLDER/known_hosts \
-l $SSH_USER \
-p $SSH_PORT \
$SSH_ADDRESS;
echo 'Connection to stats lost' 1>&2;
sleep 600;
done;
mkdir -p $HOME/.ssh/
mv singularity/tmp/* $HOME/.ssh/
#ssh-keygen -R www.matteoferla.com:666 -f $HOME/.ssh/
while true
do
sleep 60;
done
# ============================================================
# This script runs $JOB_NODE_SCRIPT within initial dir $HOME2
# to specify a particular machine
# add `-a 'Requirements=(machine == "orpheus-worker-gpu-13.novalocal")'` as a cmd arg
# Envs used:
# * $HOME2 the fake home, e.g. /data/xchem-fragalysis/mferla
# * $JOB_SCRIPT
# nice scripts to use for JOB_SCRIPT
# /data/xchem-fragalysis/shared/singularity.sh
# /data/xchem-fragalysis/shared/helloworld.sh
# ============================================================
Executable = /bin/bash
arguments = $ENV(JOB_SCRIPT)
Universe = vanilla
getenv = JOB_*,SINGULARITY_*,JUPYTER_*,CONDA_*,APPTAINER_*,APPTAINERENV_*,PYTHON*,HOME2,SSH_*
initialdir = $ENV(HOME2)
Output = $ENV(HOME2)/logs/condor-log.$(Cluster).$(Process).out
Error = $ENV(HOME2)/logs/condor-log.$(Cluster).$(Process).err
Log = $ENV(HOME2)/logs/condor-log.$(Cluster).$(Process).log
request_cpus = Target.TotalSlotCpus
request_gpus = Target.TotalSlotGPUs
request_memory = Target.TotalSlotMemory
+RequiresWholeMachine = True
Queue
@matteoferla
Copy link
Author

This is stored here in case I am away and someone needs one of these files.
I will one day write this all up properly

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment