Created
October 7, 2022 23:30
-
-
Save moyix/859325271c18744816e77ca4c9b2675d to your computer and use it in GitHub Desktop.
Training command line and deepspeed config for CodeGen 16B, 3xA100 GPUs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"fp16": { | |
"enabled": true, | |
"loss_scale": 0, | |
"loss_scale_window": 1000, | |
"initial_scale_power": 16, | |
"hysteresis": 2, | |
"min_loss_scale": 1 | |
}, | |
"optimizer": { | |
"type": "AdamW", | |
"params": { | |
"lr": "auto", | |
"weight_decay": "auto" | |
} | |
}, | |
"scheduler": { | |
"type": "WarmupDecayLR", | |
"params": { | |
"warmup_min_lr": "auto", | |
"warmup_max_lr": "auto", | |
"warmup_num_steps": "auto", | |
"total_num_steps": "auto" | |
} | |
}, | |
"zero_optimization": { | |
"stage": 3, | |
"offload_optimizer": { | |
"device": "cpu", | |
"pin_memory": false | |
}, | |
"offload_param": { | |
"device": "cpu", | |
"pin_memory": false | |
}, | |
"overlap_comm": true, | |
"contiguous_gradients": true, | |
"reduce_bucket_size": "auto", | |
"stage3_prefetch_bucket_size": "auto", | |
"stage3_param_persistence_threshold": "auto", | |
"sub_group_size": 1e9, | |
"stage3_max_live_parameters": 1e9, | |
"stage3_max_reuse_distance": 1e9, | |
"stage3_gather_16bit_weights_on_model_save": true | |
}, | |
"gradient_accumulation_steps": 32, | |
"gradient_clipping": "auto", | |
"steps_per_print": 2000, | |
"train_batch_size": "auto", | |
"train_micro_batch_size_per_gpu": "auto", | |
"wall_clock_breakdown": false | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#SBATCH --nodes=1 | |
#SBATCH --ntasks-per-node=1 | |
#SBATCH --cpus-per-task=4 | |
#SBATCH --mem=400GB | |
#SBATCH --gres=gpu:a100:3 | |
#SBATCH --job-name=codegenV16B | |
#SBATCH --time=166:00:00 | |
module purge | |
singularity exec --nv \ | |
--overlay /scratch/st4920/pytorch-codegen-v2.ext3:ro \ | |
/scratch/work/public/singularity/cuda11.3.0-cudnn8-devel-ubuntu20.04.sif \ | |
/bin/bash -c "source /ext3/env.sh; conda activate codegen_deepspeed; deepspeed run_clm.py --model_name_or_path=Salesforce/codegen-16B-multi --save_steps=500 --per_device_train_batch_size=1 --num_train_epochs 1 --output_dir=CodeGen/codegen-16B-verilog-1-epochs --report_to 'wandb' --dataset_name code_segments_verilog_unfiltered --tokenizer_name Salesforce/codegen-16B-multi --learning_rate 2e-5 --block_size 2048 --do_train --fp16 --gradient_accumulation_steps 32 --overwrite_output_dir --deepspeed ds_config_AdamW_16B_reduce_mem.json" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment