|
import os |
|
import random |
|
import glob |
|
import copy |
|
import subprocess |
|
import toml |
|
import click |
|
import toml |
|
from sklearn.model_selection import ParameterGrid |
|
|
|
def run_jsonnet(base_model: str, args: str, out_path: str): |
|
subprocess.run(f"jsonnet {base_model} {args} > {out_path}", shell=True, check=True) |
|
|
|
def clone_src(target_dir: str): |
|
subprocess.run(f"python setup.py build", shell=True, check=True) |
|
subprocess.run(f"cp -r build/lib/qb {target_dir}", shell=True, check=True) |
|
|
|
def random_experiment_id(): |
|
return str(random.randint(1_000_000, 2_000_000)) |
|
|
|
def hyper_to_configs(path: str): |
|
with open(path) as f: |
|
# Read the parent config, like the rnn.toml |
|
hyper_conf = toml.load(f) |
|
configs = [] |
|
n_trials = hyper_conf.get("n_trials", 1) |
|
# If it defines a hyper parameter sweep, then generate a config for each one |
|
if "hyper" in hyper_conf: |
|
# ParameterGrid takes a list of parameters and converts them to a sweep |
|
grid = ParameterGrid(hyper_conf["hyper"]) |
|
del hyper_conf["hyper"] |
|
for params in grid: |
|
for trial in range(n_trials): |
|
# Make a deep copy to avoid overwriting old configs |
|
conf = copy.deepcopy(hyper_conf) |
|
# Fill in the value of each configuration |
|
for name, val in params.items(): |
|
splits = name.split(".") |
|
access = conf |
|
for part in splits[:-1]: |
|
access = access[part] |
|
access[splits[-1]] = val |
|
# Write down which trial this is |
|
conf["trial"] = trial |
|
configs.append(conf) |
|
return configs |
|
else: |
|
# if not, just return the original config |
|
if "hyper" in hyper_conf: |
|
del hyper_conf["hyper"] |
|
return [hyper_conf] |
|
@click.command() |
|
@click.option("--slurm-job/--no-slurm-job", is_flag=True, default=True) |
|
@click.argument("hyper_conf_path") |
|
@click.argument("base_json_conf") |
|
@click.argument("name") |
|
def hyper_cli(slurm_job: bool, hyper_conf_path: str, base_json_conf: str, name: str): |
|
# 1) Generate all the configuration files and directories |
|
|
|
# hyper_conf_path is a toml file defining the hyper parameter sweep |
|
configs = hyper_to_configs(hyper_conf_path) |
|
for c in configs: |
|
conf_name = random_experiment_id() |
|
trial = c["trial"] |
|
|
|
# This defines the path like config/generated/rnn/{random_experiment_id}/{trial} |
|
conf_dir = os.path.abspath(os.path.join("config", "generated", name, conf_name, trial)) |
|
allennlp_conf_path = os.path.join(conf_dir, f"{conf_name}.json") |
|
conf_path = os.path.join(conf_dir, f"{conf_name}.toml") |
|
|
|
# This defines the path like model/generated/rnn/{random_experiment_id}/{trial} |
|
serialization_dir = os.path.abspath( |
|
os.path.join("model", "generated", name, conf_name, trial) |
|
) |
|
|
|
# Save all this information in the new configuration file. |
|
# My code in particular takes only this file and takes all arguments from it. |
|
c["generated_id"] = conf_name |
|
c["name"] = name |
|
c["allennlp_conf"] = allennlp_conf_path |
|
c["serialization_dir"] = serialization_dir |
|
c["conf_dir"] = conf_dir |
|
c["conf_path"] = conf_path |
|
c["trial"] = trial |
|
os.makedirs(os.path.dirname(conf_path), exist_ok=True) |
|
os.makedirs(serialization_dir, exist_ok=True) |
|
with open(conf_path, "w") as f: |
|
toml.dump(c, f) |
|
args = [] |
|
for key, val in c["params"].items(): |
|
# jsonnet has a quirk the string parameters need --tla-str while other values need tla-code |
|
if isinstance(val, str): |
|
args.append(f"--tla-str {key}={val}") |
|
else: |
|
args.append(f"--tla-code {key}={val}") |
|
args = " ".join(args) |
|
# Generate the json config |
|
run_jsonnet(base_json_conf, args, allennlp_conf_path) |
|
# Copy the source using `setup.py` to the experiment directory |
|
clone_src(conf_dir) |
|
|
|
# 2) Generate the run script, optionally making it a slurm script. |
|
with open(f"{name}-jobs.sh", "w") as f: |
|
for c in configs: |
|
conf_dir = c["conf_dir"] |
|
conf_path = c["conf_path"] |
|
# Check if slurm configs are defined, otherwise use some defaults specific to UMD cluster |
|
if "slurm" in c: |
|
slurm_time = c["slurm"].get("time", "4-00:00:00") |
|
slurm_qos = c["slurm"].get("qos", "gpu-long") |
|
else: |
|
slurm_time = "4-00:00:00" |
|
slurm_qos = "gpu-long" |
|
|
|
if slurm_job: |
|
args = [ |
|
"sbatch", |
|
"--qos", |
|
slurm_qos, |
|
"--time", |
|
slurm_time, |
|
"slurm-allennlp.sh", |
|
conf_dir, |
|
conf_path, |
|
] |
|
f.write(" ".join(args) + "\n") |
|
else: |
|
f.write(f"train.sh {conf_dir} {conf_path}\n") |
|
|
|
|
|
if __name__ == "__main__": |
|
hyper_cli() |