Skip to content

Instantly share code, notes, and snippets.

@jjasghar
Last active December 13, 2024 18:03
Show Gist options
  • Save jjasghar/436931fbee1d34f029f3c099311301c3 to your computer and use it in GitHub Desktop.
Save jjasghar/436931fbee1d34f029f3c099311301c3 to your computer and use it in GitHub Desktop.
config.yaml for Community Model Build for granite-3.0-8b-base
# Chat configuration section.
chat:
# Predefined setting or environment that influences the behavior and responses of
# the chat assistant. Each context is associated with a specific prompt that
# guides the assistant on how to respond to user inputs. Available contexts:
# default, cli_helper.
# Default: default
context: default
# Directory where chat logs are stored.
# Default: /home/instructlab/.local/share/instructlab/chatlogs
logs_dir: /home/instructlab/.local/share/instructlab/chatlogs
# The maximum number of tokens that can be generated in the chat completion. Be
# aware that larger values use more memory.
# Default: None
max_tokens:
# Model to be used for chatting with.
# Default: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
model: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
# Filepath of a dialog session file.
# Default: None
session:
# Controls the randomness of the model's responses. Lower values make the output
# more deterministic, while higher values produce more random results.
# Default: 1.0
temperature: 1.0
# Enable vim keybindings for chat.
# Default: False
vi_mode: false
# Renders vertical overflow if enabled, displays ellipses otherwise.
# Default: True
visible_overflow: true
# Evaluate configuration section.
evaluate:
# Base taxonomy branch
# Default: None
base_branch:
# Base model to compare with 'model' for mt_bench_branch and mmlu_branch.
# Default: instructlab/granite-7b-lab
base_model: /home/instructlab/.cache/instructlab/models/instructlab/granite-7b-lab
# Taxonomy branch containing custom skills/knowledge that should be used for
# evaluation runs.
# Default: None
branch:
# Number of GPUs to use for running evaluation.
# Default: None
gpus: 4
# MMLU benchmarking settings
mmlu:
# Batch size for evaluation. Valid values are a positive integer or 'auto' to
# select the largest batch size that will fit in memory.
# Default: auto
batch_size: auto
# Number of question-answer pairs provided in the context preceding the question
# used for evaluation.
# Default: 5
few_shots: 5
# Settings to run MMLU against a branch of taxonomy containing custom
# skills/knowledge used for training.
mmlu_branch:
# Directory where custom MMLU tasks are stored.
# Default: /home/instructlab/.local/share/instructlab/datasets
tasks_dir: /home/instructlab/.local/share/instructlab/datasets
# Model to be evaluated
# Default: None
model: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
# Multi-turn benchmarking settings for skills.
mt_bench:
# Judge model for mt_bench and mt_bench_branch.
# Default: prometheus-eval/prometheus-8x7b-v2.0
judge_model: /home/instructlab/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0
# Number of workers to use for evaluation with mt_bench or mt_bench_branch. Must
# be a positive integer or 'auto'.
# Default: auto
max_workers: auto
# Directory where evaluation results are stored.
# Default: /home/instructlab/.local/share/instructlab/internal/eval_data
output_dir: /home/instructlab/.local/share/instructlab/internal/eval_data
# Settings to run MT-Bench against a branch of taxonomy containing custom
# skills/knowledge used for training
mt_bench_branch:
# Path to where base taxonomy is stored.
# Default: /home/instructlab/.local/share/instructlab/taxonomy
taxonomy_path: /home/instructlab/.local/share/instructlab/taxonomy
# General configuration section.
general:
# Debug level for logging.
# Default: 0
debug_level: 0
# Log format. https://docs.python.org/3/library/logging.html#logrecord-attributes
# Default: %(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s
log_format: '%(levelname)s %(asctime)s %(name)s:%(lineno)d: %(message)s'
# Log level for logging.
# Default: INFO
log_level: INFO
# Generate configuration section.
generate:
# Maximum number of words per chunk.
# Default: 1000
chunk_word_count: 1000
# The maximum amount of tokens for the model to generate during knowledge
# generation. A lower number yields less data but a faster SDG run. It is
# reccomended to use this on consumer hardware
# Default: 4096
max_num_tokens: 4096
# Teacher model that will be used to synthetically generate training data.
# Default: /home/instructlab/.cache/instructlab/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf
model: /home/instructlab/.cache/instructlab/models/mistralai/Mixtral-8x7B-Instruct-v0.1
# Number of CPU cores to use for generation.
# Default: 10
num_cpus: 10
# Number of instructions to use
# Default: -1
# Deprecated: see 'sdg_scale_factor' instead
num_instructions: -1
# Directory where generated datasets are stored.
# Default: /home/instructlab/.local/share/instructlab/datasets
output_dir: /home/instructlab/.local/share/instructlab/datasets
# Data generation pipeline to use. Available: 'simple', 'full', or a valid path to
# a directory of pipeline workflow YAML files. Note that 'full' requires a larger
# teacher model, Mixtral-8x7b.
# Default: full
pipeline: full
# The total number of instructions to be generated.
# Default: 30
sdg_scale_factor: 30
# Path to seed file to be used for generation.
# Default: /home/instructlab/.local/share/instructlab/internal/seed_tasks.json
# Deprecated
seed_file: /home/instructlab/.local/share/instructlab/internal/seed_tasks.json
# Branch of taxonomy used to calculate diff against.
# Default: origin/main
taxonomy_base: origin/main
# Directory where taxonomy is stored and accessed from.
# Default: /home/instructlab/.local/share/instructlab/taxonomy
taxonomy_path: /home/instructlab/.local/share/instructlab/taxonomy
# Teacher configuration
teacher:
# Serving backend to use to host the model.
# Default: None
# Examples:
# - vllm
# - llama-cpp
backend: vllm
# Chat template to supply to the model. Possible values: 'auto'(default),
# 'tokenizer', a path to a jinja2 file.
# Default: None
# Examples:
# - auto
# - tokenizer
# - A filesystem path expressing the location of a custom template
chat_template:
# Host and port to serve on.
# Default: 127.0.0.1:8000
host_port: 127.0.0.1:8000
# llama-cpp serving settings.
llama_cpp:
# Number of model layers to offload to GPU. -1 means all layers.
# Default: -1
gpu_layers: -1
# Large Language Model Family
# Default: ''
# Examples:
# - granite
# - mixtral
llm_family: ''
# Maximum number of tokens that can be processed by the model.
# Default: 4096
max_ctx_size: 4096
# Directory where model to be served is stored.
# Default: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
model_path: /home/instructlab/.cache/instructlab/models/mistralai/Mixtral-8x7B-Instruct-v0.1
# vLLM serving settings.
vllm:
# Number of GPUs to use.
# Default: None
gpus: 4
# Large Language Model Family
# Default: ''
# Examples:
# - granite
# - mixtral
llm_family: mixtral
# Maximum number of attempts to start the vLLM server.
# Default: 120
max_startup_attempts: 120
# vLLM specific arguments. All settings can be passed as a list of strings, see:
# https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
# Default: []
# Examples:
# - ['--dtype', 'auto']
# - ['--lora-alpha', '32']
vllm_args: []
# Metadata pertaining to the specifics of the system which the Configuration is
# meant to be applied to.
metadata:
# Manufacturer, Family, and SKU of the system CPU, ex: Apple M3 Max
# Default: None
cpu_info:
# Amount of GPUs on the system, ex: 8
# Default: None
gpu_count: 8
# Family of the system GPU, ex: H100
# Default: None
gpu_family: A100
# Manufacturer of the system GPU, ex: Nvidia
# Default: None
gpu_manufacturer: Nvidia
# Specific SKU related information about the given GPU, ex: PCIe, NVL
# Default: None
gpu_sku:
- 80GB HBM3
- NVL
- PCIe
# Serve configuration section.
serve:
# Serving backend to use to host the model.
# Default: None
# Examples:
# - vllm
# - llama-cpp
backend: vllm
# Chat template to supply to the model. Possible values: 'auto'(default),
# 'tokenizer', a path to a jinja2 file.
# Default: None
# Examples:
# - auto
# - tokenizer
# - A filesystem path expressing the location of a custom template
chat_template: auto
# Host and port to serve on.
# Default: 127.0.0.1:8000
host_port: 127.0.0.1:8000
# llama-cpp serving settings.
llama_cpp:
# Number of model layers to offload to GPU. -1 means all layers.
# Default: -1
gpu_layers: -1
# Large Language Model Family
# Default: ''
# Examples:
# - granite
# - mixtral
llm_family: ''
# Maximum number of tokens that can be processed by the model.
# Default: 4096
max_ctx_size: 4096
# Directory where model to be served is stored.
# Default: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
model_path: /home/instructlab/.cache/instructlab/models/granite-7b-lab-Q4_K_M.gguf
# vLLM serving settings.
vllm:
# Number of GPUs to use.
# Default: None
gpus: 4
# Large Language Model Family
# Default: ''
# Examples:
# - granite
# - mixtral
llm_family: ''
# Maximum number of attempts to start the vLLM server.
# Default: 120
max_startup_attempts: 120
# vLLM specific arguments. All settings can be passed as a list of strings, see:
# https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
# Default: []
# Examples:
# - ['--dtype', 'auto']
# - ['--lora-alpha', '32']
vllm_args:
- --tensor-parallel-size
- '4'
# Train configuration section.
train:
# Additional arguments to pass to the training script. These arguments are passed
# as key-value pairs to the training script.
# Default: {}
additional_args:
learning_rate: 2e-5
lora_alpha: 32
lora_dropout: 0.1
warmup_steps: 25
use_dolomite: true
# Save a checkpoint at the end of each epoch.
# Default: True
checkpoint_at_epoch: true
# Directory where periodic training checkpoints are stored.
# Default: /home/instructlab/.local/share/instructlab/checkpoints
ckpt_output_dir: /home/instructlab/.local/share/instructlab/checkpoints
# Directory where the processed training data is stored (post
# filtering/tokenization/masking).
# Default: /home/instructlab/.local/share/instructlab/internal
data_output_dir: /home/instructlab/.local/share/instructlab/internal
# For the training library (primary training method), this specifies the path to
# the dataset file. For legacy training (MacOS/Linux), this specifies the path to
# the directory.
# Default: /home/instructlab/.local/share/instructlab/datasets
data_path: /home/instructlab/.local/share/instructlab/datasets
# Allow CPU offload for deepspeed optimizer.
# Default: False
deepspeed_cpu_offload_optimizer: true
# PyTorch device to use. Use 'cpu' for 'simple' and 'full' training on Linux. Use
# 'mps' for 'full' training on MacOS Metal Performance Shader. Use 'cuda' for
# Nvidia CUDA / AMD ROCm GPUs. Use 'hpu' for Intel Gaudi GPUs.
# Default: cpu
# Examples:
# - cpu
# - mps
# - cuda
# - hpu
device: cuda
# Whether or not we should disable the use of flash-attention during training.
# This is useful when using older GPUs.
# Default: False
disable_flash_attn: false
# Pick a distributed training backend framework for GPU accelerated full fine-
# tuning.
# Default: fsdp
distributed_backend: fsdp
# The number of samples in a batch that the model should see before its parameters
# are updated.
# Default: 64
effective_batch_size: 128
# Allow CPU offload for FSDP optimizer.
# Default: False
fsdp_cpu_offload_optimizer: false
# Boolean to indicate if the model being trained is a padding-free transformer
# model such as Granite.
# Default: False
is_padding_free: false
# The data type for quantization in LoRA training. Valid options are 'None' and
# 'nf4'.
# Default: nf4
# Examples:
# - nf4
lora_quantize_dtype:
# Rank of low rank matrices to be used during training.
# Default: 0
lora_rank: 0
# Maximum tokens per gpu for each batch that will be handled in a single step. If
# running into out-of-memory errors, this value can be lowered but not below the
# `max_seq_len`.
# Default: 5000
max_batch_len: 60000
# Maximum sequence length to be included in the training set. Samples exceeding
# this length will be dropped.
# Default: 4096
max_seq_len: 4096
# Directory where the model to be trained is stored.
# Default: instructlab/granite-7b-lab
#model_path: /home/instructlab/.cache/instructlab/models/instructlab/granite-7b-lab
model_path: /home/instructlab/.cache/instructlab/models/ibm-granite/granite-3.0-8b-base
# Number of GPUs to use for training. This value is not supported in legacy
# training or MacOS.
# Default: 1
nproc_per_node: 8
# Number of epochs to run training for.
# Default: 10
num_epochs: 8
# Base directory for organization of end-to-end intermediate outputs.
# Default: /home/instructlab/.local/share/instructlab/phased
phased_base_dir: /home/instructlab/.local/share/instructlab/phased
# Judge model path for phased MT-Bench evaluation.
# Default: /home/instructlab/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0
phased_mt_bench_judge: /home/instructlab/.cache/instructlab/models/prometheus-eval/prometheus-8x7b-v2.0
# Phased phase1 effective batch size.
# Default: 128
phased_phase1_effective_batch_size: 128
# Learning rate for phase1 knowledge training.
# Default: 2e-05
phased_phase1_learning_rate: 2e-05
# Number of epochs to run training for during phase1 (experimentally optimal
# number is 7).
# Default: 7
phased_phase1_num_epochs: 7
# Number of samples the model should see before saving a checkpoint during phase1.
# Disabled when set to 0.
# Default: 0
phased_phase1_samples_per_save: 0
# Phased phase2 effective batch size.
# Default: 3840
phased_phase2_effective_batch_size: 1024
# Learning rate for phase2 skills training.
# Default: 6e-06
phased_phase2_learning_rate: 6e-06
# Number of epochs to run training for during phase2.
# Default: 10
phased_phase2_num_epochs: 10
# Number of samples the model should see before saving a checkpoint during phase2.
# Disabled when set to 0.
# Default: 0
phased_phase2_samples_per_save: 0
# Training pipeline to use. Simple is for systems with limited resources, full is
# for more capable consumer systems (64 GB of RAM), and accelerated is for systems
# with a dedicated GPU.
# Default: full
# Examples:
# - simple
# - full
# - accelerated
pipeline: accelerated
# Number of samples the model should see before saving a checkpoint.
# Default: 250000
save_samples: 0
# Optional path to a yaml file that tracks the progress of multiphase training.
# Default: None
training_journal:
# Configuration file structure version.
# Default: 1.0.0
version: 1.0.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment