Last active
July 13, 2023 01:19
-
-
Save samhavens/0e8b61b370fb1d8a343ccc017110b492 to your computer and use it in GitHub Desktop.
LLM Foundry Schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"$schema": "http://json-schema.org/draft-07/schema#", | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string" | |
}, | |
"integrations": { | |
"type": "array", | |
"items": { | |
"type": "object", | |
"properties": { | |
"entity": { | |
"type": "string" | |
}, | |
"integration_type": { | |
"type": "string" | |
}, | |
"project": { | |
"type": "string" | |
}, | |
"tags": { | |
"type": "array", | |
"items": { | |
"type": "string" | |
} | |
}, | |
"git_branch": { | |
"type": "string" | |
}, | |
"git_repo": { | |
"type": "string" | |
}, | |
"git_commit": { | |
"type": "string" | |
}, | |
"pip_install": { | |
"type": "string" | |
} | |
} | |
} | |
}, | |
"cluster": { | |
"type": "string" | |
}, | |
"gpu_num": { | |
"type": "integer" | |
}, | |
"gpu_type": { | |
"type": "string", | |
"oneOf": [ | |
{ | |
"const": "h100_80gb" | |
}, | |
{ | |
"const": "a100_80gb" | |
}, | |
{ | |
"const": "a100_40gb" | |
} | |
] | |
}, | |
"image": { | |
"type": "string" | |
}, | |
"priority": { | |
"type": "string", | |
"oneOf": [ | |
{ | |
"const": "high" | |
}, | |
{ | |
"const": "medium" | |
}, | |
{ | |
"const": "low" | |
} | |
] | |
}, | |
"command": { | |
"type": "string" | |
}, | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"run_name": { | |
"type": [ | |
"string", | |
"null" | |
] | |
}, | |
"tokenizer_name": { | |
"type": "string" | |
}, | |
"global_train_batch_size": { | |
"type": "integer" | |
}, | |
"max_duration": { | |
"type": "string" | |
}, | |
"max_seq_len": { | |
"type": "integer" | |
}, | |
"device_train_microbatch_size": { | |
"oneOf": [ | |
{ | |
"type": "integer" | |
}, | |
{ | |
"type": "string", | |
"enum": [ | |
"auto" | |
] | |
} | |
] | |
}, | |
"device_eval_batch_size": { | |
"type": "integer" | |
}, | |
"eval_first": { | |
"type": "boolean" | |
}, | |
"eval_interval": { | |
"type": [ | |
"integer", | |
"string" | |
] | |
}, | |
"save_folder": { | |
"type": "string" | |
}, | |
"save_interval": { | |
"type": "string" | |
}, | |
"save_num_checkpoints_to_keep": { | |
"type": "integer" | |
}, | |
"global_seed": { | |
"type": "integer" | |
}, | |
"seed": { | |
"type": "integer" | |
}, | |
"precision": { | |
"type": "string", | |
"enum": [ | |
"amp_bf16", | |
"amp_fp16", | |
"fp32" | |
] | |
}, | |
"load_weights_only": { | |
"type": "boolean" | |
}, | |
"autoresume": { | |
"type": "boolean" | |
}, | |
"python_log_level": { | |
"type": "string" | |
}, | |
"log_to_console": { | |
"type": "boolean" | |
}, | |
"progress_bar": { | |
"type": "boolean" | |
}, | |
"loggers": { | |
"type": "object", | |
"properties": { | |
"wandb": { | |
"type": "object" | |
}, | |
"tensorboard": { | |
"type": "object" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"model": { | |
"oneOf": [ | |
{ | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string", | |
"enum": [ | |
"mpt_causal_lm" | |
] | |
}, | |
"init_device": { | |
"type": "string" | |
}, | |
"d_model": { | |
"type": "integer" | |
}, | |
"n_heads": { | |
"type": "integer" | |
}, | |
"n_layers": { | |
"type": "integer" | |
}, | |
"expansion_ratio": { | |
"type": "integer" | |
}, | |
"emb_pdrop": { | |
"type": "number" | |
}, | |
"resid_pdrop": { | |
"type": "number" | |
}, | |
"max_seq_len": { | |
"type": "integer" | |
}, | |
"vocab_size": { | |
"type": "integer" | |
}, | |
"tokenizer_name": { | |
"type": "string" | |
}, | |
"attn_config": { | |
"type": "object", | |
"properties": { | |
"attn_impl": { | |
"type": "string" | |
}, | |
"alibi": { | |
"type": "boolean" | |
}, | |
"clip_qkv": { | |
"type": "integer" | |
}, | |
"attn_uses_sequence_id": { | |
"type": "boolean" | |
}, | |
"attn_pdrop": { | |
"type": "number" | |
} | |
} | |
}, | |
"norm_type": { | |
"type": "string" | |
}, | |
"no_bias": { | |
"type": "boolean" | |
}, | |
"init_config": { | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string" | |
}, | |
"init_nonlinearity": { | |
"type": "string" | |
} | |
} | |
} | |
} | |
}, | |
{ | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string", | |
"enum": [ | |
"hf_causal_lm" | |
] | |
}, | |
"init_device": { | |
"type": "string" | |
}, | |
"pretrained": { | |
"type": "boolean" | |
}, | |
"use_auth_token": { | |
"type": "boolean" | |
}, | |
"pretrained_model_name_or_path": { | |
"type": "string" | |
}, | |
"config_overrides": { | |
"type": "object", | |
"properties": { | |
"max_seq_len": { | |
"type": "integer" | |
}, | |
"attn_config": { | |
"type": "object", | |
"properties": { | |
"attn_impl": { | |
"type": "string" | |
}, | |
"attn_uses_sequence_id": { | |
"type": "boolean" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
] | |
}, | |
"tokenizer": { | |
"type": "object", | |
"properties": { | |
"kwargs": { | |
"type": [ | |
"object", | |
"null" | |
], | |
"properties": { | |
"model_max_length": { | |
"type": "integer" | |
} | |
} | |
}, | |
"name": { | |
"type": "string" | |
} | |
} | |
}, | |
"algorithms": { | |
"type": "object", | |
"properties": { | |
"gradient_clipping": { | |
"type": "object", | |
"properties": { | |
"clipping_threshold": { | |
"type": "integer" | |
}, | |
"clipping_type": { | |
"type": "string" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"alibi": { | |
"type": "object" | |
}, | |
"fused_layernorm": { | |
"type": "object" | |
}, | |
"gated_linear_units": { | |
"type": "object" | |
}, | |
"low_precision_layernorm": { | |
"type": "object" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"callbacks": { | |
"type": "object", | |
"properties": { | |
"lr_monitor": { | |
"type": "object", | |
"additionalProperties": false | |
}, | |
"memory_monitor": { | |
"type": "object", | |
"additionalProperties": false | |
}, | |
"speed_monitor": { | |
"type": "object", | |
"properties": { | |
"window_size": { | |
"type": "integer" | |
}, | |
"gpu_flops_available": { | |
"type": "number" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"fdiff": { | |
"type": "object", | |
"additionalProperties": true | |
}, | |
"runtime_estimator": { | |
"type": "object", | |
"additionalProperties": false | |
}, | |
"optimizer_monitor": { | |
"type": "object", | |
"properties": { | |
"log_optimizer_metrics": { | |
"type": "boolean" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"generate_callback": { | |
"type": "object", | |
"properties": { | |
"prompts": { | |
"type": "array", | |
"items": { | |
"type": "string" | |
} | |
} | |
}, | |
"additionalProperties": true | |
}, | |
"global_lr_scaling": { | |
"type": "object", | |
"additionalProperties": true | |
}, | |
"layer_freezing": { | |
"type": "object", | |
"additionalProperties": true | |
}, | |
"mono_ckpt_saver": { | |
"type": "object", | |
"additionalProperties": true | |
}, | |
"scheduled_gc": { | |
"type": "object", | |
"additionalProperties": true | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"console_log_interval": { | |
"type": "string" | |
}, | |
"train_loader": { | |
"type": "object", | |
"properties": { | |
"dataset": { | |
"type": "object", | |
"properties": { | |
"allow_pad_trimming": { | |
"type": "boolean" | |
}, | |
"decoder_only_format": { | |
"type": "boolean" | |
}, | |
"hf_name": { | |
"type": "string" | |
}, | |
"max_seq_len": { | |
"type": "integer" | |
}, | |
"packing_ratio": { | |
"type": "integer" | |
}, | |
"shuffle": { | |
"type": "boolean" | |
}, | |
"split": { | |
"type": "string" | |
}, | |
"tokenizer_name": { | |
"type": "string" | |
} | |
} | |
}, | |
"drop_last": { | |
"type": "boolean" | |
}, | |
"name": { | |
"type": "string" | |
}, | |
"num_workers": { | |
"type": "integer" | |
}, | |
"persistent_workers": { | |
"type": "boolean" | |
}, | |
"pin_memory": { | |
"type": "boolean" | |
}, | |
"prefetch_factor": { | |
"type": "integer" | |
}, | |
"timeout": { | |
"type": "integer" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"fsdp_config": { | |
"type": "object", | |
"properties": { | |
"activation_checkpointing": { | |
"type": "boolean" | |
}, | |
"activation_checkpointing_reentrant": { | |
"type": "boolean" | |
}, | |
"activation_cpu_offload": { | |
"type": "boolean" | |
}, | |
"limit_all_gathers": { | |
"type": "boolean" | |
}, | |
"mixed_precision": { | |
"type": "string" | |
}, | |
"sharding_strategy": { | |
"type": "string" | |
}, | |
"state_dict_type":{ | |
"type": "string", | |
"enum": [ | |
"full", | |
"local", | |
"sharded" | |
] | |
}, | |
"sync_module_states": { | |
"type": "boolean" | |
}, | |
"verbose": { | |
"type": "boolean" | |
} | |
}, | |
"additionalProperties": false | |
}, | |
"optimizer": { | |
"oneOf": [ | |
{ | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string", | |
"enum": [ | |
"decoupled_adamw", | |
"decoupled_lionw" | |
] | |
}, | |
"betas": { | |
"type": "array", | |
"items": { | |
"type": "number" | |
} | |
}, | |
"eps": { | |
"type": "number" | |
}, | |
"lr": { | |
"type": "number" | |
}, | |
"weight_decay": { | |
"type": "number", | |
"minimum": 0, | |
"maximum": 1.0e-3 | |
} | |
}, | |
"required": [ | |
"name", | |
"betas", | |
"eps", | |
"lr", | |
"weight_decay" | |
], | |
"additionalProperties": false | |
}, | |
{ | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string", | |
"enum": [ | |
"clip_lion" | |
] | |
}, | |
"betas": { | |
"type": "array", | |
"items": { | |
"type": "number" | |
} | |
}, | |
"lr": { | |
"type": "number" | |
}, | |
"weight_decay": { | |
"type": "integer" | |
}, | |
"outlier_threshold": { | |
"type": "number" | |
} | |
}, | |
"required": [ | |
"name", | |
"betas", | |
"lr", | |
"weight_decay", | |
"outlier_threshold" | |
], | |
"additionalProperties": false | |
}, | |
{ | |
"type": "object", | |
"properties": { | |
"name": { | |
"type": "string", | |
"enum": [ | |
"adalr_lion" | |
] | |
}, | |
"betas": { | |
"type": "array", | |
"items": { | |
"type": "number" | |
} | |
}, | |
"lr": { | |
"type": "number" | |
}, | |
"weight_decay": { | |
"type": "integer" | |
}, | |
"outlier_threshold": { | |
"type": "number" | |
}, | |
"timeout": { | |
"type": "integer" | |
}, | |
"lr_penalty": { | |
"type": "number" | |
}, | |
"min_scale": { | |
"type": "number" | |
} | |
}, | |
"required": [ | |
"name", | |
"betas", | |
"lr", | |
"weight_decay", | |
"outlier_threshold", | |
"timeout", | |
"lr_penalty", | |
"min_scale" | |
], | |
"additionalProperties": false | |
} | |
] | |
}, | |
"scheduler": { | |
"type": "object", | |
"properties": { | |
"alpha_f": { | |
"type": "integer" | |
}, | |
"name": { | |
"type": "string", | |
"enum": [ | |
"constant_with_warmup", | |
"cosine_with_warmup", | |
"linear_decay_with_warmup" | |
] | |
}, | |
"t_warmup": { | |
"type": "string" | |
} | |
}, | |
"additionalProperties": false, | |
"required": [ | |
"name", | |
"t_warmup" | |
] | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment