Last active
September 25, 2021 09:32
-
-
Save gchhablani/070d41ec7b02a0b3b0429d04cadee557 to your computer and use it in GitHub Desktop.
Bash script to download and push multiberts intermediate checkpoints.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
intermediate() { | |
local seed=$1 | |
local ckpt=$2 | |
local step=$((ckpt/1000)) | |
local multiberts_ckpt_dir="multiberts-seed-${seed}-${step}k" | |
mkdir $multiberts_ckpt_dir | |
huggingface-cli repo create --yes ${multiberts_ckpt_dir} --organization google | |
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/google/${multiberts_ckpt_dir} | |
gsutil cp gs://multiberts/public/intermediates/seed_${seed}/step_${ckpt}/bert.ckpt.data-00000-of-00001 ${multiberts_ckpt_dir}_orig/bert.ckpt.data-00000-of-00001 | |
gsutil cp gs://multiberts/public/intermediates/seed_${seed}/step_${ckpt}/bert.ckpt.index ${multiberts_ckpt_dir}_orig/bert.ckpt.index | |
gsutil cp gs://multiberts/public/intermediates/seed_${seed}/step_${ckpt}/bert.ckpt.meta ${multiberts_ckpt_dir}_orig/bert.ckpt.meta | |
gsutil cp gs://multiberts/public/intermediates/seed_${seed}/step_${ckpt}/checkpoint ${multiberts_ckpt_dir}_orig/checkpoint | |
python ../src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py --tf_checkpoint_path ${multiberts_ckpt_dir}_orig/bert.ckpt --bert_config_file bert_config.json --pytorch_dump_path ${multiberts_ckpt_dir}/pytorch_model.bin | |
cp README.md ${multiberts_ckpt_dir} | |
cp config.json ${multiberts_ckpt_dir} | |
cd ${multiberts_ckpt_dir} | |
sed -i "s/seed-0/seed-${seed}/g" README.md | |
sed -i "s/Seed 0/Seed ${seed}/g" README.md | |
sed -i "s/Checkpoint 0/Checkpoint ${step}/g" README.md | |
sed -i "s/checkpoint 0/checkpoint ${step}/g" README.md | |
sed -i "s/-0k/-${step}k/g" README.md | |
git add . && git commit -m "Add or Fix Model" | |
git push | |
cd .. | |
rm -rf ${multiberts_ckpt_dir} | |
rm -rf ${multiberts_ckpt_dir}_orig | |
} | |
for seed in {0..4}; do | |
for ckpt in {0..200000..20000} ; do | |
intermediate ${seed} ${ckpt} | |
done | |
for ckpt in {200000..2000001..100000} ; do | |
intermediate ${seed} ${ckpt} | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment