Skip to content

Instantly share code, notes, and snippets.

@pmrowla
Last active December 15, 2020 15:24
Show Gist options
  • Save pmrowla/bbaea8fd71be49d38ce1bc002c5977c4 to your computer and use it in GitHub Desktop.
Save pmrowla/bbaea8fd71be49d38ce1bc002c5977c4 to your computer and use it in GitHub Desktop.
DVC experiments migration script
#!/usr/bin/env python3
"""Migrate DVC experiments from .dvc/experiments to refs/exps.
Requires DVC API.
"""
import argparse
import os
import re
from dvc.repo import Repo
from dvc.repo.experiments.base import ExpRefInfo
from dvc.scm import SCM
BRANCH_RE = re.compile(
r"^(?P<baseline_rev>[a-f0-9]{7})-(?P<exp_sha>[a-f0-9]+)"
r"(?P<checkpoint>-checkpoint)?(:?-)?"
)
def migrate(path=os.curdir, dry_run=False):
repo = Repo(path)
print(f"Looking for experiments in '{repo.root_dir}'...")
exp_dir = os.path.join(repo.dvc_dir, "experiments")
if not os.path.isdir(exp_dir):
print(f"Nothing to do, '{exp_dir}' is not a valid experiments dir.")
return
exp_scm = SCM(exp_dir)
for branch in exp_scm.list_branches():
m = BRANCH_RE.match(branch)
if m:
baseline = exp_scm.resolve_rev(m.group("baseline_rev"))
exp_hash = m.group("exp_sha")
extra = branch[m.end():]
name = extra if extra else f"exp-{exp_hash[:5]}"
exp_ref = ExpRefInfo(baseline_sha=baseline, name=name)
print("Found experiment:")
print(f"\tOld: {branch}")
print(f"\tNew: {exp_ref}")
if not dry_run:
repo.scm.fetch_refspecs(
exp_dir,
[f"refs/heads/{branch}:{exp_ref}"],
force=True,
)
print(f"\tMigrated experiment '{exp_ref.name}'")
if not dry_run:
print("Experiments have been migrated.")
print(f"'{exp_dir}' is no longer needed and can be safely removed.")
def main():
parser = argparse.ArgumentParser(
description=(
"Migrate DVC experiments from .dvc/experiments to refs/exps."
),
)
parser.add_argument(
"-n",
"--dry-run",
action="store_true",
help=(
"Print actions that would be taken but do not actually migrate"
"any experiments."
)
)
args = parser.parse_args()
migrate(dry_run=args.dry_run)
if __name__ == "__main__":
main()
@pmrowla
Copy link
Author

pmrowla commented Dec 15, 2020

To migrate experiments, run this script from the root of a DVC repository. The script must be run from a python environment with the DVC package installed (i.e. the virtual env used to pip install dvc).

-n/--dry-run can be used to show experiments which would be migrated (without performing the actual migration). After all experiments have been migrated, the .dvc/experiments directory can be removed, as it is no longer used in current DVC releases.

Example:

$ dvc-exp-migrate.py
Looking for experiments in '/Users/pmrowla/git/scratch/checkpoint-test'...
Found experiment:
        Old: def680f-44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a
        New: refs/exps/de/f680f7a14852025e6bc3d6503037d5933a0533/exp-44136
        Migrated experiment 'exp-44136'
Found experiment:
        Old: def680f-6285acfea6063fc5b580a0e223750980c46a511227dadd779f9f62567cf745ff-checkpoint
        New: refs/exps/de/f680f7a14852025e6bc3d6503037d5933a0533/exp-6285a
        Migrated experiment 'exp-6285a'
Experiments have been migrated.
'/Users/pmrowla/git/scratch/checkpoint-test/.dvc/experiments' is no longer needed and can be safely removed.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment