EntilZha/environment.yaml

## environment.yaml
name: qb
dependencies:
  - python=3.7
  - pytorch=1.4.*
  - cudatoolkit=10.1
  - numpy
  - scipy
  - pandas=1.0.*
  - requests
  - flask
  - ipython
  - pylint
  - pyflakes
  - pycodestyle
  - jedi
  - click=7.0.*
  - toml
  - pip
  - pip:
    - allennlp==0.9.*
    - overrides
    - spacy
    - plotnine
    - unidecode
channels:
  - anaconda
  - pytorch
  - conda-forge

## hyper.py
import os
import random
import glob
import copy
import subprocess
import toml
import click
import toml
from sklearn.model_selection import ParameterGrid

def run_jsonnet(base_model: str, args: str, out_path: str):
    subprocess.run(f"jsonnet {base_model} {args} > {out_path}", shell=True, check=True)

def clone_src(target_dir: str):
    subprocess.run(f"python setup.py build", shell=True, check=True)
    subprocess.run(f"cp -r build/lib/qb {target_dir}", shell=True, check=True)

def random_experiment_id():
    return str(random.randint(1_000_000, 2_000_000))

def hyper_to_configs(path: str):
    with open(path) as f:
        # Read the parent config, like the rnn.toml
        hyper_conf = toml.load(f)
    configs = []
    n_trials = hyper_conf.get("n_trials", 1)
    # If it defines a hyper parameter sweep, then generate a config for each one
    if "hyper" in hyper_conf:
        # ParameterGrid takes a list of parameters and converts them to a sweep
        grid = ParameterGrid(hyper_conf["hyper"])
        del hyper_conf["hyper"]
        for params in grid:
            for trial in range(n_trials):
                # Make a deep copy to avoid overwriting old configs
                conf = copy.deepcopy(hyper_conf)
                # Fill in the value of each configuration
                for name, val in params.items():
                    splits = name.split(".")
                    access = conf
                    for part in splits[:-1]:
                        access = access[part]
                    access[splits[-1]] = val
                # Write down which trial this is
                conf["trial"] = trial
                configs.append(conf)
        return configs
    else:
        # if not, just return the original config
        if "hyper" in hyper_conf:
            del hyper_conf["hyper"]
        return [hyper_conf]
@click.command()
@click.option("--slurm-job/--no-slurm-job", is_flag=True, default=True)
@click.argument("hyper_conf_path")
@click.argument("base_json_conf")
@click.argument("name")
def hyper_cli(slurm_job: bool, hyper_conf_path: str, base_json_conf: str, name: str):
    # 1) Generate all the configuration files and directories

    # hyper_conf_path is a toml file defining the hyper parameter sweep
    configs = hyper_to_configs(hyper_conf_path)
    for c in configs:
        conf_name = random_experiment_id()
        trial = c["trial"]

        # This defines the path like config/generated/rnn/{random_experiment_id}/{trial}
        conf_dir = os.path.abspath(os.path.join("config", "generated", name, conf_name, trial))
        allennlp_conf_path = os.path.join(conf_dir, f"{conf_name}.json")
        conf_path = os.path.join(conf_dir, f"{conf_name}.toml")

        # This defines the path like model/generated/rnn/{random_experiment_id}/{trial}
        serialization_dir = os.path.abspath(
            os.path.join("model", "generated", name, conf_name, trial)
        )

        # Save all this information in the new configuration file.
        # My code in particular takes only this file and takes all arguments from it.
        c["generated_id"] = conf_name
        c["name"] = name
        c["allennlp_conf"] = allennlp_conf_path
        c["serialization_dir"] = serialization_dir
        c["conf_dir"] = conf_dir
        c["conf_path"] = conf_path
        c["trial"] = trial
        os.makedirs(os.path.dirname(conf_path), exist_ok=True)
        os.makedirs(serialization_dir, exist_ok=True)
        with open(conf_path, "w") as f:
            toml.dump(c, f)
        args = []
        for key, val in c["params"].items():
            # jsonnet has a quirk the string parameters need --tla-str while other values need tla-code
            if isinstance(val, str):
                args.append(f"--tla-str {key}={val}")
            else:
                args.append(f"--tla-code {key}={val}")
        args = " ".join(args)
        # Generate the json config
        run_jsonnet(base_json_conf, args, allennlp_conf_path)
        # Copy the source using `setup.py` to the experiment directory
        clone_src(conf_dir)

    # 2) Generate the run script, optionally making it a slurm script.
    with open(f"{name}-jobs.sh", "w") as f:
        for c in configs:
            conf_dir = c["conf_dir"]
            conf_path = c["conf_path"]
            # Check if slurm configs are defined, otherwise use some defaults specific to UMD cluster
            if "slurm" in c:
                slurm_time = c["slurm"].get("time", "4-00:00:00")
                slurm_qos = c["slurm"].get("qos", "gpu-long")
            else:
                slurm_time = "4-00:00:00"
                slurm_qos = "gpu-long"

            if slurm_job:
                args = [
                    "sbatch",
                    "--qos",
                    slurm_qos,
                    "--time",
                    slurm_time,
                    "slurm-allennlp.sh",
                    conf_dir,
                    conf_path,
                ]
                f.write(" ".join(args) + "\n")
            else:
                f.write(f"train.sh {conf_dir} {conf_path}\n")


if __name__ == "__main__":
    hyper_cli()

## rnn.toml
model = "rnn"
n_trials = 1

[slurm]
qos = "gpu-medium"
time = "1-00:00:00"

[hyper]
"params.lr" = [0.001, 0.0001, 0.00001]
"params.dropout" = [0.2, 0.35, 0.5]
"params.hidden_dim" = [100, 300, 500, 1000]
"params.n_hidden_layers" = [1, 2]

[params]
dropout = 0.5
lr = 0.001
hidden_dim = 300
n_hidden_layers = 1

## rnn_model.jsonnet
function(lr=0.001, dropout=0.25, hidden_dim=1500, n_hidden_layers=1, debug=false) {
  dataset_reader: {
    qanta_path: '/home/entilzha/code/qb-bert/src/data/qanta.mapped.2018.04.18.json',
    lazy: false,
    debug: debug,
    type: 'qanta',
    full_question_only: false,
    first_sentence_only: false,
    char_skip: null,
    tokenizer: {
      type: 'word',
    },
    token_indexers: {
      text: {
        type: 'single_id',
        lowercase_tokens: true
      }
    },
  },
  train_data_path: 'guesstrain',
  validation_data_path: 'guessdev',
  model: {
    type: 'rnn_guesser',
    dropout: dropout,
    hidden_dim: hidden_dim,
    n_hidden_layers: n_hidden_layers,
  },
  iterator: {
    type: 'bucket',
    sorting_keys: [['text', 'num_tokens']],
    batch_size: 128,
  },
  trainer: {
    type: 'callback',
    callbacks: [
      {
        type: 'checkpoint',
        checkpointer: { num_serialized_models_to_keep: 1 },
      },
      { type: 'track_metrics', patience: 3, validation_metric: '+accuracy' },
      'validate',
      { type: 'log_to_tensorboard' },
      {
        type: 'update_learning_rate',
        learning_rate_scheduler: {
          type: 'reduce_on_plateau',
          patience: 2,
          mode: 'max',
          verbose: true,
        },
      },
    ]
    optimizer: {
      type: 'adam',
      lr: lr,
    },
    num_epochs: 50,
    cuda_device: 0,
  },
}
	name: qb
	dependencies:
	- python=3.7
	- pytorch=1.4.*
	- cudatoolkit=10.1
	- numpy
	- scipy
	- pandas=1.0.*
	- requests
	- flask
	- ipython
	- pylint
	- pyflakes
	- pycodestyle
	- jedi
	- click=7.0.*
	- toml
	- pip
	- pip:
	- allennlp==0.9.*
	- overrides
	- spacy
	- plotnine
	- unidecode
	channels:
	- anaconda
	- pytorch
	- conda-forge
	import os
	import random
	import glob
	import copy
	import subprocess
	import toml
	import click
	import toml
	from sklearn.model_selection import ParameterGrid

	def run_jsonnet(base_model: str, args: str, out_path: str):
	subprocess.run(f"jsonnet {base_model} {args} > {out_path}", shell=True, check=True)

	def clone_src(target_dir: str):
	subprocess.run(f"python setup.py build", shell=True, check=True)
	subprocess.run(f"cp -r build/lib/qb {target_dir}", shell=True, check=True)

	def random_experiment_id():
	return str(random.randint(1_000_000, 2_000_000))

	def hyper_to_configs(path: str):
	with open(path) as f:
	# Read the parent config, like the rnn.toml
	hyper_conf = toml.load(f)
	configs = []
	n_trials = hyper_conf.get("n_trials", 1)
	# If it defines a hyper parameter sweep, then generate a config for each one
	if "hyper" in hyper_conf:
	# ParameterGrid takes a list of parameters and converts them to a sweep
	grid = ParameterGrid(hyper_conf["hyper"])
	del hyper_conf["hyper"]
	for params in grid:
	for trial in range(n_trials):
	# Make a deep copy to avoid overwriting old configs
	conf = copy.deepcopy(hyper_conf)
	# Fill in the value of each configuration
	for name, val in params.items():
	splits = name.split(".")
	access = conf
	for part in splits[:-1]:
	access = access[part]
	access[splits[-1]] = val
	# Write down which trial this is
	conf["trial"] = trial
	configs.append(conf)
	return configs
	else:
	# if not, just return the original config
	if "hyper" in hyper_conf:
	del hyper_conf["hyper"]
	return [hyper_conf]
	@click.command()
	@click.option("--slurm-job/--no-slurm-job", is_flag=True, default=True)
	@click.argument("hyper_conf_path")
	@click.argument("base_json_conf")
	@click.argument("name")
	def hyper_cli(slurm_job: bool, hyper_conf_path: str, base_json_conf: str, name: str):
	# 1) Generate all the configuration files and directories

	# hyper_conf_path is a toml file defining the hyper parameter sweep
	configs = hyper_to_configs(hyper_conf_path)
	for c in configs:
	conf_name = random_experiment_id()
	trial = c["trial"]

	# This defines the path like config/generated/rnn/{random_experiment_id}/{trial}
	conf_dir = os.path.abspath(os.path.join("config", "generated", name, conf_name, trial))
	allennlp_conf_path = os.path.join(conf_dir, f"{conf_name}.json")
	conf_path = os.path.join(conf_dir, f"{conf_name}.toml")

	# This defines the path like model/generated/rnn/{random_experiment_id}/{trial}
	serialization_dir = os.path.abspath(
	os.path.join("model", "generated", name, conf_name, trial)
	)

	# Save all this information in the new configuration file.
	# My code in particular takes only this file and takes all arguments from it.
	c["generated_id"] = conf_name
	c["name"] = name
	c["allennlp_conf"] = allennlp_conf_path
	c["serialization_dir"] = serialization_dir
	c["conf_dir"] = conf_dir
	c["conf_path"] = conf_path
	c["trial"] = trial
	os.makedirs(os.path.dirname(conf_path), exist_ok=True)
	os.makedirs(serialization_dir, exist_ok=True)
	with open(conf_path, "w") as f:
	toml.dump(c, f)
	args = []
	for key, val in c["params"].items():
	# jsonnet has a quirk the string parameters need --tla-str while other values need tla-code
	if isinstance(val, str):
	args.append(f"--tla-str {key}={val}")
	else:
	args.append(f"--tla-code {key}={val}")
	args = " ".join(args)
	# Generate the json config
	run_jsonnet(base_json_conf, args, allennlp_conf_path)
	# Copy the source using `setup.py` to the experiment directory
	clone_src(conf_dir)

	# 2) Generate the run script, optionally making it a slurm script.
	with open(f"{name}-jobs.sh", "w") as f:
	for c in configs:
	conf_dir = c["conf_dir"]
	conf_path = c["conf_path"]
	# Check if slurm configs are defined, otherwise use some defaults specific to UMD cluster
	if "slurm" in c:
	slurm_time = c["slurm"].get("time", "4-00:00:00")
	slurm_qos = c["slurm"].get("qos", "gpu-long")
	else:
	slurm_time = "4-00:00:00"
	slurm_qos = "gpu-long"

	if slurm_job:
	args = [
	"sbatch",
	"--qos",
	slurm_qos,
	"--time",
	slurm_time,
	"slurm-allennlp.sh",
	conf_dir,
	conf_path,
	]
	f.write(" ".join(args) + "\n")
	else:
	f.write(f"train.sh {conf_dir} {conf_path}\n")


	if __name__ == "__main__":
	hyper_cli()
	model = "rnn"
	n_trials = 1

	[slurm]
	qos = "gpu-medium"
	time = "1-00:00:00"

	[hyper]
	"params.lr" = [0.001, 0.0001, 0.00001]
	"params.dropout" = [0.2, 0.35, 0.5]
	"params.hidden_dim" = [100, 300, 500, 1000]
	"params.n_hidden_layers" = [1, 2]

	[params]
	dropout = 0.5
	lr = 0.001
	hidden_dim = 300
	n_hidden_layers = 1
	function(lr=0.001, dropout=0.25, hidden_dim=1500, n_hidden_layers=1, debug=false) {
	dataset_reader: {
	qanta_path: '/home/entilzha/code/qb-bert/src/data/qanta.mapped.2018.04.18.json',
	lazy: false,
	debug: debug,
	type: 'qanta',
	full_question_only: false,
	first_sentence_only: false,
	char_skip: null,
	tokenizer: {
	type: 'word',
	},
	token_indexers: {
	text: {
	type: 'single_id',
	lowercase_tokens: true
	}
	},
	},
	train_data_path: 'guesstrain',
	validation_data_path: 'guessdev',
	model: {
	type: 'rnn_guesser',
	dropout: dropout,
	hidden_dim: hidden_dim,
	n_hidden_layers: n_hidden_layers,
	},
	iterator: {
	type: 'bucket',
	sorting_keys: [['text', 'num_tokens']],
	batch_size: 128,
	},
	trainer: {
	type: 'callback',
	callbacks: [
	{
	type: 'checkpoint',
	checkpointer: { num_serialized_models_to_keep: 1 },
	},
	{ type: 'track_metrics', patience: 3, validation_metric: '+accuracy' },
	'validate',
	{ type: 'log_to_tensorboard' },
	{
	type: 'update_learning_rate',
	learning_rate_scheduler: {
	type: 'reduce_on_plateau',
	patience: 2,
	mode: 'max',
	verbose: true,
	},
	},
	]
	optimizer: {
	type: 'adam',
	lr: lr,
	},
	num_epochs: 50,
	cuda_device: 0,
	},
	}