Last active
July 21, 2023 18:45
-
-
Save chengscott/3193bdcb73c73b78aedbf6556d423350 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/megatron/arguments.py b/megatron/arguments.py | |
index 4bf1d72..bcea6ce 100644 | |
--- a/megatron/arguments.py | |
+++ b/megatron/arguments.py | |
@@ -1090,7 +1090,7 @@ def _add_distributed_args(parser): | |
default=False, help='If set, use custom-built ring exchange ' | |
'for p2p communications. Note that this option will require ' | |
'a custom built image that support ring-exchange p2p.') | |
- group.add_argument('--local_rank', type=int, default=None, | |
+ group.add_argument('--local-rank', type=int, default=None, | |
help='local rank passed from distributed launcher.') | |
group.add_argument('--lazy-mpu-init', type=bool, required=False, | |
help='If set to True, initialize_megatron() ' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"train_batch_size" : GLOBAL_BATCH, | |
"train_micro_batch_size_per_gpu": MICRO_BATCH, | |
"gradient_accumulation_steps": 1, | |
"steps_per_print": 1, | |
"wall_clock_breakdown" : true, | |
"zero_optimization": { | |
"stage": ZERO_STAGE, | |
"allgather_partitions": true, | |
"reduce_scatter": true, | |
"allgather_bucket_size": 5e8, | |
"overlap_comm": true, | |
"contiguous_gradients": true | |
}, | |
"fp16": { | |
"enabled": true, | |
"initial_scale_power": 12 | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash -xe | |
[[ -z "$MODEL_SIZE" ]] && { echo "MODEL_SIZE is not set"; exit 1; } | |
[[ -z "$NLAYERS" ]] && { echo "NLAYERS is not set"; exit 1; } | |
[[ -z "$HIDDEN" ]] && { echo "HIDDEN is not set"; exit 1; } | |
[[ -z "$ATEN_HEADS" ]] && { echo "ATEN_HEADS is not set"; exit 1; } | |
[[ -z "$PBS_NODEFILE" ]] && { echo "PBS_NODEFILE is not set"; exit 1; } | |
[[ -z "$ZERO_STAGE" ]] && { echo "ZERO_STAGE is not set"; exit 1; } | |
[[ -z "$DATA_PREFIX" ]] && { echo "DATA_PREFIX is not set"; exit 1; } | |
[[ -z "$CHECKPOINT_PATH" ]] && { echo "CHECKPOINT_PATH is not set"; exit 1; } | |
NNODES=$(wc -l < "${PBS_NODEFILE}") | |
GPUS_PER_NODE=4 | |
WORLD_SIZE=$((GPUS_PER_NODE * NNODES)) | |
DDP_IMPL="local" | |
MPSIZE=1 | |
MICRO_BATCH=1 | |
GLOBAL_BATCH=$((WORLD_SIZE * MICRO_BATCH / MPSIZE )) | |
DS_CFG="ds-${NNODES}-${MODEL_SIZE}-zero${ZERO_STAGE}-${DDP_IMPL}.json" | |
cp ds.tpl.json "${DS_CFG}" | |
sed -i "s/GLOBAL_BATCH/${GLOBAL_BATCH}/g" "${DS_CFG}" | |
sed -i "s/MICRO_BATCH/${MICRO_BATCH}/g" "${DS_CFG}" | |
sed -i "s/ZERO_STAGE/${ZERO_STAGE}/g" "${DS_CFG}" | |
date -R | |
mpiexec -np "${WORLD_SIZE}" --ppn "${GPUS_PER_NODE}" --hostfile "${PBS_NODEFILE}" \ | |
--env MASTER_ADDR="${HOSTNAME}" \ | |
--env MASTER_PORT=5566 \ | |
python pretrain_gpt.py \ | |
--DDP-impl "${DDP_IMPL}" \ | |
--deepspeed \ | |
--deepspeed_mpi \ | |
--deepspeed_config "${DS_CFG}" \ | |
--zero-stage "${ZERO_STAGE}" \ | |
--tensor-model-parallel-size "${MPSIZE}" \ | |
--no-pipeline-parallel \ | |
--num-layers "${NLAYERS}" \ | |
--hidden-size "${HIDDEN}" \ | |
--num-attention-heads "${ATEN_HEADS}" \ | |
--micro-batch-size "${MICRO_BATCH}" \ | |
--global-batch-size "${GLOBAL_BATCH}" \ | |
--seq-length 1024 \ | |
--max-position-embeddings 1024 \ | |
--train-iters 200000 \ | |
--lr-decay-iters 320000 \ | |
--save "${CHECKPOINT_PATH}" \ | |
--num-workers 1 \ | |
--data-path "${DATA_PREFIX}/my-gpt2_text_document" \ | |
--vocab-file "${DATA_PREFIX}/gpt2-vocab.json" \ | |
--merge-file "${DATA_PREFIX}/gpt2-merges.txt" \ | |
--data-impl mmap \ | |
--split 949,50,1 \ | |
--distributed-backend nccl \ | |
--lr 0.00015 \ | |
--lr-decay-style cosine \ | |
--min-lr 1.0e-5 \ | |
--weight-decay 1e-2 \ | |
--clip-grad 1.0 \ | |
--lr-warmup-fraction .01 \ | |
--checkpoint-activations \ | |
--log-interval 100 \ | |
--save-interval 10000 \ | |
--eval-interval 1000 \ | |
--eval-iters 10 \ | |
--fp16 | |
rm -f "${DS_CFG}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment