-
-
Save jjasghar/436931fbee1d34f029f3c099311301c3 to your computer and use it in GitHub Desktop.
config.yaml for Community Model Build for granite-3.0-8b-base
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Chat configuration section. | |
chat: | |
# Predefined setting or environment that influences the behavior and responses of | |
# the chat assistant. Each context is associated with a specific prompt that | |
# guides the assistant on how to respond to user inputs. Available contexts: | |
# default, cli_helper. | |
# Default: default | |
context: default | |
# Directory where chat logs are stored. | |
# Default: /home/instructlab/.local/share/instructlab/chatlogs | |
logs_dir: /home/instructlab/.local/share/instructlab/chatlogs | |
# The maximum number of tokens that can be generated in the chat completion. Be | |
# aware that larger values use more memory. | |
# Default: None | |
max_tokens: | |
# Model to be used for chatting with. | |
# Default: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf | |
model: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf | |
# Filepath of a dialog session file. | |
# Default: None | |
session: | |
# Controls the randomness of the model's responses. Lower values make the output | |
# more deterministic, while higher values produce more random results. | |
# Default: 1.0 | |
temperature: 1.0 | |
# Enable vim keybindings for chat. | |
# Default: False | |
vi_mode: false | |
# Renders vertical overflow if enabled, displays ellipses otherwise. | |
# Default: True | |
visible_overflow: true | |
# Evaluate configuration section. | |
evaluate: | |
# Base taxonomy branch | |
# Default: None | |
base_branch: | |
# Base model to compare with 'model' for mt_bench_branch and mmlu_branch. | |
# Default: instructlab/granite-7b-lab | |
base_model: /home/instructlab/.cache/instructlab/models/instructlab/granite-7b-lab | |
# Taxonomy branch containing custom skills/knowledge that should be used for | |
# evaluation runs. | |
# Default: None | |
branch: | |
# Number of GPUs to use for running evaluation. | |
# Default: None | |
gpus: 4 | |
# MMLU benchmarking settings | |
mmlu: | |
# Batch size for evaluation. Valid values are a positive integer or 'auto' to | |
# select the largest batch size that will fit in memory. | |
# Default: auto | |
batch_size: auto | |
# Number of question-answer pairs provided in the context preceding the question | |
# used for evaluation. | |
# Default: 5 | |
few_shots: 5 | |
# Settings to run MMLU against a branch of taxonomy containing custom | |
# skills/knowledge used for training. | |
mmlu_branch: | |
# Directory where custom MMLU tasks are stored. | |
# Default: /home/instructlab/.local/share/instructlab/datasets | |
tasks_dir: /home/instructlab/.local/share/instructlab/datasets | |
# Model to be evaluated | |
# Default: None | |
model: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf | |
# Multi-turn benchmarking settings for skills. | |
mt_bench: | |
# Judge model for mt_bench and mt_bench_branch. | |
# Default: prometheus-eval/prometheus-8x7b-v2.0 | |
judge_model: /home/instructlab/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0 | |
# Number of workers to use for evaluation with mt_bench or mt_bench_branch. Must | |
# be a positive integer or 'auto'. | |
# Default: auto | |
max_workers: auto | |
# Directory where evaluation results are stored. | |
# Default: /home/instructlab/.local/share/instructlab/internal/eval_data | |
output_dir: /home/instructlab/.local/share/instructlab/internal/eval_data | |
# Settings to run MT-Bench against a branch of taxonomy containing custom | |
# skills/knowledge used for training | |
mt_bench_branch: | |
# Path to where base taxonomy is stored. | |
# Default: /home/instructlab/.local/share/instructlab/taxonomy | |
taxonomy_path: /home/instructlab/.local/share/instructlab/taxonomy | |
# General configuration section. | |
general: | |
# Debug level for logging. | |
# Default: 0 | |
debug_level: 0 | |
# Log format. https://docs.python.org/3/library/logging.html#logrecord-attributes | |
# Default: %(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s | |
log_format: '%(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s' | |
# Log level for logging. | |
# Default: INFO | |
log_level: INFO | |
# Generate configuration section. | |
generate: | |
# Maximum number of words per chunk. | |
# Default: 1000 | |
chunk_word_count: 1000 | |
# The maximum amount of tokens for the model to generate during knowledge | |
# generation. A lower number yields less data but a faster SDG run. It is | |
# reccomended to use this on consumer hardware | |
# Default: 4096 | |
max_num_tokens: 4096 | |
# Teacher model that will be used to synthetically generate training data. | |
# Default: /home/instructlab/.cache/instructlab/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf | |
model: /home/instructlab/.cache/instructlab/models/mistralai/Mixtral-8x7B-Instruct-v0.1 | |
# Number of CPU cores to use for generation. | |
# Default: 10 | |
num_cpus: 10 | |
# Number of instructions to use | |
# Default: -1 | |
# Deprecated: see 'sdg_scale_factor' instead | |
num_instructions: -1 | |
# Directory where generated datasets are stored. | |
# Default: /home/instructlab/.local/share/instructlab/datasets | |
output_dir: /home/instructlab/.local/share/instructlab/datasets | |
# Data generation pipeline to use. Available: 'simple', 'full', or a valid path to | |
# a directory of pipeline workflow YAML files. Note that 'full' requires a larger | |
# teacher model, Mixtral-8x7b. | |
# Default: full | |
pipeline: full | |
# The total number of instructions to be generated. | |
# Default: 30 | |
sdg_scale_factor: 30 | |
# Path to seed file to be used for generation. | |
# Default: /home/instructlab/.local/share/instructlab/internal/seed_tasks.json | |
# Deprecated | |
seed_file: /home/instructlab/.local/share/instructlab/internal/seed_tasks.json | |
# Branch of taxonomy used to calculate diff against. | |
# Default: origin/main | |
taxonomy_base: origin/main | |
# Directory where taxonomy is stored and accessed from. | |
# Default: /home/instructlab/.local/share/instructlab/taxonomy | |
taxonomy_path: /home/instructlab/.local/share/instructlab/taxonomy | |
# Teacher configuration | |
teacher: | |
# Serving backend to use to host the model. | |
# Default: None | |
# Examples: | |
# - vllm | |
# - llama-cpp | |
backend: vllm | |
# Chat template to supply to the model. Possible values: 'auto'(default), | |
# 'tokenizer', a path to a jinja2 file. | |
# Default: None | |
# Examples: | |
# - auto | |
# - tokenizer | |
# - A filesystem path expressing the location of a custom template | |
chat_template: | |
# Host and port to serve on. | |
# Default: 127.0.0.1:8000 | |
host_port: 127.0.0.1:8000 | |
# llama-cpp serving settings. | |
llama_cpp: | |
# Number of model layers to offload to GPU. -1 means all layers. | |
# Default: -1 | |
gpu_layers: -1 | |
# Large Language Model Family | |
# Default: '' | |
# Examples: | |
# - granite | |
# - mixtral | |
llm_family: '' | |
# Maximum number of tokens that can be processed by the model. | |
# Default: 4096 | |
max_ctx_size: 4096 | |
# Directory where model to be served is stored. | |
# Default: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf | |
model_path: /home/instructlab/.cache/instructlab/models/mistralai/Mixtral-8x7B-Instruct-v0.1 | |
# vLLM serving settings. | |
vllm: | |
# Number of GPUs to use. | |
# Default: None | |
gpus: 4 | |
# Large Language Model Family | |
# Default: '' | |
# Examples: | |
# - granite | |
# - mixtral | |
llm_family: mixtral | |
# Maximum number of attempts to start the vLLM server. | |
# Default: 120 | |
max_startup_attempts: 120 | |
# vLLM specific arguments. All settings can be passed as a list of strings, see: | |
# https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html | |
# Default: [] | |
# Examples: | |
# - ['--dtype', 'auto'] | |
# - ['--lora-alpha', '32'] | |
vllm_args: [] | |
# Metadata pertaining to the specifics of the system which the Configuration is | |
# meant to be applied to. | |
metadata: | |
# Manufacturer, Family, and SKU of the system CPU, ex: Apple M3 Max | |
# Default: None | |
cpu_info: | |
# Amount of GPUs on the system, ex: 8 | |
# Default: None | |
gpu_count: 8 | |
# Family of the system GPU, ex: H100 | |
# Default: None | |
gpu_family: A100 | |
# Manufacturer of the system GPU, ex: Nvidia | |
# Default: None | |
gpu_manufacturer: Nvidia | |
# Specific SKU related information about the given GPU, ex: PCIe, NVL | |
# Default: None | |
gpu_sku: | |
- 80GB HBM3 | |
- NVL | |
- PCIe | |
# Serve configuration section. | |
serve: | |
# Serving backend to use to host the model. | |
# Default: None | |
# Examples: | |
# - vllm | |
# - llama-cpp | |
backend: vllm | |
# Chat template to supply to the model. Possible values: 'auto'(default), | |
# 'tokenizer', a path to a jinja2 file. | |
# Default: None | |
# Examples: | |
# - auto | |
# - tokenizer | |
# - A filesystem path expressing the location of a custom template | |
chat_template: auto | |
# Host and port to serve on. | |
# Default: 127.0.0.1:8000 | |
host_port: 127.0.0.1:8000 | |
# llama-cpp serving settings. | |
llama_cpp: | |
# Number of model layers to offload to GPU. -1 means all layers. | |
# Default: -1 | |
gpu_layers: -1 | |
# Large Language Model Family | |
# Default: '' | |
# Examples: | |
# - granite | |
# - mixtral | |
llm_family: '' | |
# Maximum number of tokens that can be processed by the model. | |
# Default: 4096 | |
max_ctx_size: 4096 | |
# Directory where model to be served is stored. | |
# Default: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf | |
model_path: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf | |
# vLLM serving settings. | |
vllm: | |
# Number of GPUs to use. | |
# Default: None | |
gpus: 4 | |
# Large Language Model Family | |
# Default: '' | |
# Examples: | |
# - granite | |
# - mixtral | |
llm_family: '' | |
# Maximum number of attempts to start the vLLM server. | |
# Default: 120 | |
max_startup_attempts: 120 | |
# vLLM specific arguments. All settings can be passed as a list of strings, see: | |
# https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html | |
# Default: [] | |
# Examples: | |
# - ['--dtype', 'auto'] | |
# - ['--lora-alpha', '32'] | |
vllm_args: | |
- --tensor-parallel-size | |
- '4' | |
# Train configuration section. | |
train: | |
# Additional arguments to pass to the training script. These arguments are passed | |
# as key-value pairs to the training script. | |
# Default: {} | |
additional_args: | |
learning_rate: 2e-5 | |
lora_alpha: 32 | |
lora_dropout: 0.1 | |
warmup_steps: 25 | |
use_dolomite: true | |
# Save a checkpoint at the end of each epoch. | |
# Default: True | |
checkpoint_at_epoch: true | |
# Directory where periodic training checkpoints are stored. | |
# Default: /home/instructlab/.local/share/instructlab/checkpoints | |
ckpt_output_dir: /home/instructlab/.local/share/instructlab/checkpoints | |
# Directory where the processed training data is stored (post | |
# filtering/tokenization/masking). | |
# Default: /home/instructlab/.local/share/instructlab/internal | |
data_output_dir: /home/instructlab/.local/share/instructlab/internal | |
# For the training library (primary training method), this specifies the path to | |
# the dataset file. For legacy training (MacOS/Linux), this specifies the path to | |
# the directory. | |
# Default: /home/instructlab/.local/share/instructlab/datasets | |
data_path: /home/instructlab/.local/share/instructlab/datasets | |
# Allow CPU offload for deepspeed optimizer. | |
# Default: False | |
deepspeed_cpu_offload_optimizer: true | |
# PyTorch device to use. Use 'cpu' for 'simple' and 'full' training on Linux. Use | |
# 'mps' for 'full' training on MacOS Metal Performance Shader. Use 'cuda' for | |
# Nvidia CUDA / AMD ROCm GPUs. Use 'hpu' for Intel Gaudi GPUs. | |
# Default: cpu | |
# Examples: | |
# - cpu | |
# - mps | |
# - cuda | |
# - hpu | |
device: cuda | |
# Whether or not we should disable the use of flash-attention during training. | |
# This is useful when using older GPUs. | |
# Default: False | |
disable_flash_attn: false | |
# Pick a distributed training backend framework for GPU accelerated full fine- | |
# tuning. | |
# Default: fsdp | |
distributed_backend: fsdp | |
# The number of samples in a batch that the model should see before its parameters | |
# are updated. | |
# Default: 64 | |
effective_batch_size: 128 | |
# Allow CPU offload for FSDP optimizer. | |
# Default: False | |
fsdp_cpu_offload_optimizer: false | |
# Boolean to indicate if the model being trained is a padding-free transformer | |
# model such as Granite. | |
# Default: False | |
is_padding_free: false | |
# The data type for quantization in LoRA training. Valid options are 'None' and | |
# 'nf4'. | |
# Default: nf4 | |
# Examples: | |
# - nf4 | |
lora_quantize_dtype: | |
# Rank of low rank matrices to be used during training. | |
# Default: 0 | |
lora_rank: 0 | |
# Maximum tokens per gpu for each batch that will be handled in a single step. If | |
# running into out-of-memory errors, this value can be lowered but not below the | |
# `max_seq_len`. | |
# Default: 5000 | |
max_batch_len: 60000 | |
# Maximum sequence length to be included in the training set. Samples exceeding | |
# this length will be dropped. | |
# Default: 4096 | |
max_seq_len: 4096 | |
# Directory where the model to be trained is stored. | |
# Default: instructlab/granite-7b-lab | |
#model_path: /home/instructlab/.cache/instructlab/models/instructlab/granite-7b-lab | |
model_path: /home/instructlab/.cache/instructlab/models/ibm-granite/granite-3.0-8b-base | |
# Number of GPUs to use for training. This value is not supported in legacy | |
# training or MacOS. | |
# Default: 1 | |
nproc_per_node: 8 | |
# Number of epochs to run training for. | |
# Default: 10 | |
num_epochs: 8 | |
# Base directory for organization of end-to-end intermediate outputs. | |
# Default: /home/instructlab/.local/share/instructlab/phased | |
phased_base_dir: /home/instructlab/.local/share/instructlab/phased | |
# Judge model path for phased MT-Bench evaluation. | |
# Default: /home/instructlab/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0 | |
phased_mt_bench_judge: /home/instructlab/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0 | |
# Phased phase1 effective batch size. | |
# Default: 128 | |
phased_phase1_effective_batch_size: 128 | |
# Learning rate for phase1 knowledge training. | |
# Default: 2e-05 | |
phased_phase1_learning_rate: 2e-05 | |
# Number of epochs to run training for during phase1 (experimentally optimal | |
# number is 7). | |
# Default: 7 | |
phased_phase1_num_epochs: 7 | |
# Number of samples the model should see before saving a checkpoint during phase1. | |
# Disabled when set to 0. | |
# Default: 0 | |
phased_phase1_samples_per_save: 0 | |
# Phased phase2 effective batch size. | |
# Default: 3840 | |
phased_phase2_effective_batch_size: 1024 | |
# Learning rate for phase2 skills training. | |
# Default: 6e-06 | |
phased_phase2_learning_rate: 6e-06 | |
# Number of epochs to run training for during phase2. | |
# Default: 10 | |
phased_phase2_num_epochs: 10 | |
# Number of samples the model should see before saving a checkpoint during phase2. | |
# Disabled when set to 0. | |
# Default: 0 | |
phased_phase2_samples_per_save: 0 | |
# Training pipeline to use. Simple is for systems with limited resources, full is | |
# for more capable consumer systems (64 GB of RAM), and accelerated is for systems | |
# with a dedicated GPU. | |
# Default: full | |
# Examples: | |
# - simple | |
# - full | |
# - accelerated | |
pipeline: accelerated | |
# Number of samples the model should see before saving a checkpoint. | |
# Default: 250000 | |
save_samples: 0 | |
# Optional path to a yaml file that tracks the progress of multiphase training. | |
# Default: None | |
training_journal: | |
# Configuration file structure version. | |
# Default: 1.0.0 | |
version: 1.0.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment