Skip to content

Instantly share code, notes, and snippets.

@justheuristic
Last active June 11, 2022 22:58
Show Gist options
  • Save justheuristic/ff549f7f6e0006469aa31bdcdcbb8855 to your computer and use it in GitHub Desktop.
Save justheuristic/ff549f7f6e0006469aa31bdcdcbb8855 to your computer and use it in GitHub Desktop.
# meant for jupyter notebook
import os
import time
from IPython.display import clear_output
from huggingface_hub import Repository

repo_url = 'https://huggingface.co/bigscience/dechonk-logs-1'
local_repo_folder = './dechonk-logs-1'
yt_token = TODO      # auth for our cluster
hf_write_token = TODO  # https://huggingface.co/settings/token
download_path = "./downloaded_log_tars"  # will be created
out_logs_path = './main_exp_logs'
out_logs_abspath = os.path.abspath(out_logs_path)
upload_period = 8 * 60 * 60  # seconds


exp_urls = {
    "bloom-smaller-from-scratch-0.5hid-lr2e-4": TODO_DOWNLOAD_URL_FOR_LOGS_TAR,
    "bloom-6b3-from-scratch-warmup1000": TODO_DOWNLOAD_URL_FOR_LOGS_TAR,
    "bloom-6b3-156Btokens-continue-warmup1000": TODO_DOWNLOAD_URL_FOR_LOGS_TAR,
}
for name, url in exp_urls.items():
    assert url.startswith('https://nirvana.yandex-team.ru/api/ui/resultItem'), url
    assert url.endswith('/data'), url


!mkdir -p {download_path}
!mkdir -p {out_logs_path}

def _download_and_extract(exp_name):
    assert exp_name in exp_urls
    tar_path = os.path.join(download_path, exp_name + '.tar')
    untar_folder = os.path.join(out_logs_path, exp_name)
    !mkdir -p {untar_folder}
    !wget --header="Authorization: OAuth {yt_token}" {exp_urls[exp_name]} -O {tar_path}
    !tar -xvf {tar_path} -C {untar_folder}
    !mv {untar_folder}/*/* {untar_folder}


while True:
    clear_output(True)
    try:
        for exp_name in exp_urls:
            _download_and_extract(exp_name)

        repo = Repository(local_repo_folder, clone_from=repo_url, use_auth_token=hf_write_token)
        repo.git_pull()
        with repo.commit("push-o-matic"):
            !rm -r {os.path.join(repo.local_dir, 'tensorboard')}
            !cp -r {out_logs_abspath} {os.path.join(repo.local_dir, 'tensorboard')}
        repo.git_push()
    except Exception as e:
        print(e)
    print(f"Next attempt in {upload_period/60/60:.3f} hours")
    time.sleep(upload_period)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment