Skip to content

Instantly share code, notes, and snippets.

@berceanu
Last active February 13, 2021 14:47
Show Gist options
  • Save berceanu/ae1a2b1964eea073d179d814de2a3f74 to your computer and use it in GitHub Desktop.
Save berceanu/ae1a2b1964eea073d179d814de2a3f74 to your computer and use it in GitHub Desktop.
RA5 SLURM config
CgroupAutomount=yes
CgroupReleaseAgentDir="/etc/slurm/cgroup"
ConstrainCores=yes
ConstrainDevices=yes
ConstrainRAMSpace=yes
#TaskAffinity=yes
/dev/null
/dev/urandom
/dev/zero
/dev/sda*
/dev/cpu/*/*
/dev/pts/*
/dev/nvidia*
NodeName=thor Name=gpu File=/dev/nvidia0 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia1 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia2 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia3 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia4 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia5 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia6 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia7 CPUs=0-23
NodeName=thor Name=gpu File=/dev/nvidia8 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia9 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia10 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia11 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia12 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia13 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia14 CPUs=24-47
NodeName=thor Name=gpu File=/dev/nvidia15 CPUs=24-47
#
# Example slurm.conf file. Please run configurator.html
# (in doc/html) to build a configuration file customized
# for your environment.
#
#
# slurm.conf file generated by configurator.html.
#
# See the slurm.conf man page for more information.
#
ClusterName=compute-cluster
ControlMachine=heimdall
ControlAddr=192.168.0.1
#BackupController=
#BackupAddr=
#
SlurmUser=slurm
#SlurmdUser=root
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
StateSaveLocation=/var/spool/slurm/ctld
SlurmdSpoolDir=/var/spool/slurm/d
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
ProctrackType=proctrack/cgroup
PluginDir=/usr/lib/slurm
#FirstJobId=
ReturnToService=1
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=/etc/slurm/prolog.d/*
#Epilog=/etc/slurm/epilog.d/*
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/cgroup
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
SelectType=select/cons_res
SelectTypeParameters=CR_Core_Memory
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
#
# LOGGING
SlurmctldDebug=3
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdDebug=3
SlurmdLogFile=/var/log/slurmd.log
JobCompType=jobcomp/none
#JobCompLoc=
#
# ACCOUNTING
JobAcctGatherType=jobacct_gather/cgroup
#JobAcctGatherFrequency=30
#
AccountingStorageTRES=gres/gpu
DebugFlags=CPU_Bind,gres
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost=heimdall
#AccountingStorageLoc=
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStorageUser=slurm
#
# COMPUTE NODES
GresTypes=gpu
NodeName=thor Gres=gpu:16 CPUs=48 Sockets=2 CoresPerSocket=24 ThreadsPerCore=1 RealMemory=1546812 State=UNKNOWN
NodeName= epicur0[0-3] CPUs=64 RealMemory=64239 Sockets=2 CoresPerSocket=16 ThreadsPerCore=2 Feature=HyperThread
PartitionName=cpu Nodes=epicur0[0-3] Default=YES MaxTime=INFINITE State=UP
PartitionName=gpu Nodes=thor MaxTime=INFINITE State=UP DefMemPerCPU=3000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment