# meant for jupyter notebook
import os
import time
from IPython.display import clear_output
from huggingface_hub import Repository
repo_url = 'https://huggingface.co/bigscience/dechonk-logs-1'
local_repo_folder = './dechonk-logs-1'
yt_token = TODO # auth for our cluster
hf_write_token = TODO # https://huggingface.co/settings/token
download_path = "./downloaded_log_tars" # will be created
out_logs_path = './main_exp_logs'
out_logs_abspath = os.path.abspath(out_logs_path)
upload_period = 8 * 60 * 60 # seconds
exp_urls = {
"bloom-smaller-from-scratch-0.5hid-lr2e-4": TODO_DOWNLOAD_URL_FOR_LOGS_TAR,
"bloom-6b3-from-scratch-warmup1000": TODO_DOWNLOAD_URL_FOR_LOGS_TAR,
"bloom-6b3-156Btokens-continue-warmup1000": TODO_DOWNLOAD_URL_FOR_LOGS_TAR,
}
for name, url in exp_urls.items():
assert url.startswith('https://nirvana.yandex-team.ru/api/ui/resultItem'), url
assert url.endswith('/data'), url
!mkdir -p {download_path}
!mkdir -p {out_logs_path}
def _download_and_extract(exp_name):
assert exp_name in exp_urls
tar_path = os.path.join(download_path, exp_name + '.tar')
untar_folder = os.path.join(out_logs_path, exp_name)
!mkdir -p {untar_folder}
!wget --header="Authorization: OAuth {yt_token}" {exp_urls[exp_name]} -O {tar_path}
!tar -xvf {tar_path} -C {untar_folder}
!mv {untar_folder}/*/* {untar_folder}
while True:
clear_output(True)
try:
for exp_name in exp_urls:
_download_and_extract(exp_name)
repo = Repository(local_repo_folder, clone_from=repo_url, use_auth_token=hf_write_token)
repo.git_pull()
with repo.commit("push-o-matic"):
!rm -r {os.path.join(repo.local_dir, 'tensorboard')}
!cp -r {out_logs_abspath} {os.path.join(repo.local_dir, 'tensorboard')}
repo.git_push()
except Exception as e:
print(e)
print(f"Next attempt in {upload_period/60/60:.3f} hours")
time.sleep(upload_period)
-
-
Save justheuristic/ff549f7f6e0006469aa31bdcdcbb8855 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment