Last active
April 28, 2022 14:42
-
-
Save mikerenfro/df89fac5052a45cc2c1651b9a30978e0 to your computer and use it in GitHub Desktop.
Newer job_submit.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
For use, this script should be copied into a file name "job_submit.lua" | |
in the same directory as the SLURM configuration file, slurm.conf. | |
--]] | |
function fix_undefined_partition(job_desc) | |
if (job_desc.partition == nil) then | |
local default_partition = "batch" | |
job_desc.partition = default_partition | |
slurm.log_info( | |
"slurm_job_submit: No partition specified, moved to batch.") | |
end | |
end | |
function move_to_gpu_partition(job_desc, submit_uid) | |
-- batch -> gpu | |
-- interactive -> gpu-interactive | |
-- debug -> gpu-debug | |
local partition = "" | |
local qos = "gpu" | |
if string.match(job_desc.partition, "gpu") then | |
-- They requested a GPU partition explicitly, leave it alone | |
partition = job_desc.partition | |
else | |
if (job_desc.partition == 'batch') then | |
partition = "gpu" | |
else | |
partition = "gpu-"..job_desc.partition | |
end | |
end | |
slurm.log_info("slurm_job_submit: for user %u, setting partition: %s", | |
submit_uid, partition) | |
job_desc.partition = partition | |
job_desc.qos = qos | |
return slurm.SUCCESS | |
end | |
function fix_default_values(job_desc) | |
if (job_desc.cpus_per_task == 65534) then | |
job_desc.cpus_per_task = 1 | |
slurm.log_info("slurm_job_submit: setting cpus_per_task = 1.") | |
end | |
local ntasks_per_node_specified = true | |
local ntasks_specified = true | |
if ((job_desc.ntasks_per_node == 65534) and | |
(job_desc.ntasks == 65534)) then | |
job_desc.ntasks_per_node = 1 | |
job_desc.ntasks = 1 | |
ntasks_per_node_specified = false | |
ntasks_specified = false | |
slurm.log_info( | |
"slurm_job_submit: setting ntasks, ntasks_per_node = 1.") | |
elseif ((job_desc.ntasks_per_node == 65534) and | |
(job_desc.ntasks ~= 65534)) then | |
ntasks_per_node_specified = false | |
slurm.log_info("slurm_job_submit: ntasks specified (%d), ntasks_per_node not specified.", | |
job_desc.num_tasks); | |
elseif ((job_desc.ntasks_per_node ~= 65534) and | |
(job_desc.ntasks == 65534)) then | |
ntasks_specified = false | |
slurm.log_info("slurm_job_submit: ntasks_per_node specified (%d), ntasks not specified.", | |
job_desc.ntasks_per_node); | |
end | |
return ntasks_per_node_specified, ntasks_specified | |
end | |
function move_to_any_if_needed(job_desc, | |
submit_uid, | |
ntasks_per_node_specified, | |
ntasks_specified) | |
--[[ | |
How many CPUs are requested per node? | |
If ntasks_per_node was explicitly specified, could be | |
cpus_per_tasks*ntasks_per_node | |
If ntasks was explicitly specified, could be | |
cpus_per_task <= cpus_per_node <= cpus_per_task*ntasks | |
--]] | |
if (job_desc.partition ~= 'bigmem' and job_desc.partition ~= 'hugemem' and not string.find(job_desc.partition, "gpu")) then | |
if (ntasks_per_node_specified) then | |
if ((job_desc.cpus_per_task)*(job_desc.ntasks_per_node) <= 12) then | |
move_to_any_partition(job_desc, submit_uid) | |
end | |
elseif (ntasks_specified and | |
(job_desc.cpus_per_task <= 12) and | |
((job_desc.num_tasks)/(job_desc.max_nodes) <= 12)) then | |
move_to_any_partition(job_desc, submit_uid) | |
end -- if job is small enough for anywhere queue | |
end -- if job was not in bigmem queue or gpu queues | |
return slurm.SUCCESS | |
end | |
function move_to_any_partition(job_desc, submit_uid) | |
slurm.log_info("slurm_job_submit: candidate for anywhere queue, ".. | |
"(cpus/task)*(task/node)=(%d)*(%d)=%d.", | |
job_desc.cpus_per_task, job_desc.ntasks_per_node, | |
(job_desc.cpus_per_task)*(job_desc.ntasks_per_node)) | |
-- we'll route to the appropriate anywhere partition: | |
-- interactive -> any-interactive | |
-- debug -> any-debug | |
local partition = "" | |
if (job_desc.partition == 'debug' or job_desc.partition == 'interactive') then | |
partition = "any-"..job_desc.partition | |
slurm.log_info("slurm_job_submit: for user %u, setting partition: %s", | |
submit_uid, partition) | |
job_desc.partition = partition | |
end | |
return slurm.SUCCESS | |
end | |
function slurm_job_submit(job_desc, part_list, submit_uid) | |
test_user_table = {} | |
test_user_table[10001] = 'testuser1' | |
test_user_table[10002] = 'testuser2' | |
-- test_enabled = (test_user_table[submit_uid] ~= nil) | |
test_enabled = false | |
if (test_enabled) then -- use logic for testing | |
slurm.log_info("testing mode enabled") | |
-- As the default partition is set later by SLURM we need to set it | |
-- here using the same logic | |
fix_undefined_partition(job_desc) | |
-- If we reserved a GPU, | |
if (job_desc.gres ~= nil) then | |
move_to_gpu_partition(job_desc, submit_uid) | |
else | |
-- Make default reservation values explicit for easier decision-making | |
ntasks_per_node_specified, ntasks_specified = | |
fix_default_values(job_desc) | |
-- Decide if job is small enough for "anywhere" partitions and | |
-- move it if it is. | |
move_to_any_if_needed(job_desc, | |
submit_uid, | |
ntasks_per_node_specified, | |
ntasks_specified) | |
end -- if we reserved a GPU | |
else -- use default logic for production | |
-- As the default partition is set later by SLURM we need to set it | |
-- here using the same logic | |
fix_undefined_partition(job_desc) | |
-- If we reserved a GPU, | |
if (job_desc.gres ~= nil) then | |
move_to_gpu_partition(job_desc, submit_uid) | |
else | |
-- Make default reservation values explicit for easier decision-making | |
ntasks_per_node_specified, ntasks_specified = | |
fix_default_values(job_desc) | |
-- Decide if job is small enough for "anywhere" partitions and | |
-- move it if it is. | |
move_to_any_if_needed(job_desc, | |
submit_uid, | |
ntasks_per_node_specified, | |
ntasks_specified) | |
end -- if we reserved a GPU | |
end -- detect if testing or production | |
return slurm.SUCCESS | |
end | |
function slurm_job_modify(job_desc, job_rec, part_list, modify_uid) | |
return slurm.SUCCESS | |
end | |
slurm.log_info("initialized") | |
return slurm.SUCCESS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment