Skip to content

Instantly share code, notes, and snippets.

@treydock
Last active March 5, 2024 17:54
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save treydock/b964c5599fd057b0aa6a to your computer and use it in GitHub Desktop.
Save treydock/b964c5599fd057b0aa6a to your computer and use it in GitHub Desktop.
SLURM job_submit lua script
--[[
SLURM job submit filter for QOS
Some code and ideas pulled from https://github.com/edf-hpc/slurm-llnl-misc-plugins/blob/master/job_submit.lua
--]]
--########################################################################--
--
-- Define constant
--
--########################################################################--
PARTITION_TO_QOS = {
["hepx"] = { qos = "hepx" },
["idhmc"] = { qos = "idhmc" },
["serial"] = { qos = "general" },
["serial-long"] = { qos = "long" },
["mpi-core8"] = { qos = "mpi" },
["mpi-core32"] = { qos = "mpi" },
["mpi-core32-4g"] = { qos = "mpi" },
["background"] = { qos = "background" },
["background-4g"] = { qos = "background" },
["grid"] = { qos = "grid" },
["interactive"] = { qos = "interactive" },
["default"] = { qos = "general" }
}
-- UNUSED
--[[
PARTITION_ROUTES = {
['something'] = 'something-else',
}
]]
--########################################################################--
--
-- Define functions
--
--########################################################################--
--========================================================================--
function dump(o)
if type(o) == 'table' then
local s = '{ '
for k,v in pairs(o) do
if type(k) ~= 'number' then k = '"'..k..'"' end
s = s .. '['..k..'] = ' .. dump(v) .. ','
end
return s .. '} '
else
return tostring(o)
end
end
--========================================================================--
-- UNUSED
--[[
function reroute_job(job_desc, routeT)
local partition = routeT["part"]
log_info("slurm_job_submit#reroute_job: Setting partition to %s", partition)
job_desc.partition = partition
if job_desc.qos == nil then
log_info("slurm_job_submit#reroute_job: Setting QOS to %s", qos)
job_desc.qos = qos
end
end
]]
--========================================================================--
function _build_part_table ( part_list )
-- Create a partition table from SLURM structure
local part_rec = {}
for i in ipairs(part_list) do
part_rec[i] = { part_rec_ptr=part_list[i] }
setmetatable (part_rec[i], part_rec_meta)
end
return part_rec
end
--========================================================================--
function default_partition(part_rec)
-- Return the name of the default partition
-- part_rec : list of partitions
local i = 1
while part_rec[i] do
if part_rec[i].flag_default == 1 then
return part_rec[i].name
end
i = i + 1
end
end
--========================================================================--
function get_partition(part_rec, name)
-- Return the part_rec matching name
-- part_rec : list of partitions
-- name : partition name
local i = 1
while part_rec[i] do
if part_rec[i].name == name then
return part_rec[i]
end
i = i + 1
end
end
--========================================================================--
function get_partition_qos(partition)
local qos = nil
local part = partition or 'NONE'
local partT = PARTITION_TO_QOS[part] or PARTITION_TO_QOS["default"]
log_debug("slurm_job_submit#get_partition_qos: partition: %s", part)
if partT == nil then
return qos
else
qos = partT["qos"]
end
log_debug("slurm_job_submit#get_partition_qos: partT: %s", dump(partT))
log_debug("slurm_job_submit#get_partition_qos: partition: %s matched to qos: %s", part, qos)
return qos
end
--########################################################################--
--
-- SLURM job_submit/lua interface:
--
--########################################################################--
function slurm_job_submit ( job_desc, part_list, submit_uid )
setmetatable (job_desc, job_req_meta)
local part_rec = _build_part_table(part_list)
local account = job_desc.account or job_desc.default_account
local partition = job_desc.partition or default_partition(part_rec)
if job_desc.qos == nil then
local qos = get_partition_qos(partition)
if qos ~= nil then
log_info("slurm_job_submit: job from uid %d, setting qos value: %s", submit_uid, qos)
job_desc.qos = qos
end
end
return 0
end
function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid )
setmetatable (job_desc, job_req_meta)
setmetatable (job_rec, job_rec_meta)
local part_rec = _build_part_table(part_list)
local current_partition = job_rec.partition
local new_partition = job_desc.partition or current_partition
-- If changing partition
if current_partition ~= new_partition then
local new_part_rec = get_partition(part_rec, new_partition)
-- If qos was not specified
if job_desc.qos == nil then
local qos = get_partition_qos(new_partition)
if qos ~= nil then
log_info("slurm_job_modify: for job %u from uid %d, qos value: %s", job_rec.job_id, modify_uid, qos)
job_desc.qos = qos
end
end
-- If time was not specified
-- Instead of nil SLURM returns 4294967294
if (job_desc.time_limit == nil or job_desc.time_limit == 4294967294) then
if job_rec.time_limit > new_part_rec.max_time then
log_info("slurm_job_modify: for job %u from uid %d, time_limit value: %s", job_rec.job_id, modify_uid, new_part_rec.max_time)
job_desc.time_limit = new_part_rec.max_time
end
end
end
return 0
end
--########################################################################--
--
-- Initialization code:
--
-- Define functions for logging and accessing slurmctld structures
--
--########################################################################--
log_info = slurm.log_info
log_verbose = slurm.log_verbose
log_debug = slurm.log_debug
log_err = slurm.error
log_user = slurm.log_user
job_rec_meta = {
__index = function (table, key)
return _get_job_rec_field(table.job_rec_ptr, key)
end
}
job_req_meta = {
__index = function (table, key)
return _get_job_req_field(table.job_desc_ptr, key)
end,
__newindex = function (table, key, value)
return _set_job_req_field(table.job_desc_ptr, key, value or "")
end
}
part_rec_meta = {
__index = function (table, key)
return _get_part_rec_field(table.part_rec_ptr, key)
end
}
log_info("initialized")
return slurm.SUCCESS
tests = {
{["hepx"] = "hepx"},
{["hepx"] = "serial"},
{["hepx"] = "background"},
{["idhmc"] = "serial"},
{["idhmc"] = "background"},
{["iamcs"] = "serial"},
{["iamcs"] = "mpi-core8"},
{["iamcs"] = "mpi-core32"},
{["iamcs"] = "mpi-core32-4g"},
{["iamcs"] = "background"},
{["foo"] = "serial"},
{["foo"] = "mpi-core8"},
{["foo"] = "background"},
{["foo"] = "bar"},
}
for i, test in ipairs(tests) do
for account,partition in pairs(test) do
printf("TEST: default_account: %s , partition: %s", account, partition)
local qos = get_partition_qos(partition) or ""
printf("-------------------------------------------------------------")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment