Last active
October 16, 2021 10:16
-
-
Save skshetry/07a3e26e6b06783e1ad7a4b6db6479da to your computer and use it in GitHub Desktop.
Migrator for DVC from old-style stage to new pipeline file (use at your own risk, requires `dvc>=1.0.0`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import os | |
logger = logging.getLogger(__name__) | |
logger.setLevel(logging.INFO) | |
def _get_repo(): | |
from dvc.repo import Repo | |
return Repo() | |
def migrate(dvc, path, name): | |
from dvc.dvcfile import SingleStageFile, PipelineFile | |
dvcfile = SingleStageFile(dvc, path) | |
stage = dvcfile.stage | |
stage.name = name | |
p_file = PipelineFile(dvc, "dvc.yaml") | |
# using internal APIs, there are checks on `dump()`. | |
p_file._dump_pipeline_file(stage) | |
p_file._dump_lockfile(stage) | |
logger.info("'{}' has been added to 'dvc.yaml' and 'dvc.lock'.") | |
os.rename(dvcfile.path, dvcfile.path + ".bak") | |
logger.info("'{0}' has been renamed to '{0}.bak'.".format(dvcfile.path)) | |
logger.info( | |
"Delete it after carefully reviewing" | |
" 'dvc.lock' and 'dvc.yaml' or use it to rollback." | |
) | |
def rollback(dvc, path, name): | |
from dvc.utils.stage import dump_stage_file | |
from dvc.dvcfile import PipelineFile | |
dvcfile = PipelineFile(dvc, "dvc.yaml") | |
stage = dvcfile.stages[name] | |
dump_stage_file(path, stage.dumpd()) | |
# don't have an API for removing entry. | |
logger.warning( | |
"Please remove entries regarding '{name}' from 'dvc.yaml' and 'dvc.lock'.".format( | |
name=name | |
) | |
) | |
if __name__ == "__main__": | |
### Usage: ./migrator.py <dot-dvc-file> <stage_name> | |
### ./migrator.py featurize.dvc featurize | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-r", "--rollback", action="store_true") | |
parser.add_argument( | |
"path", help="Path to dvcfile (to output in terms of rollback)" | |
) | |
parser.add_argument( | |
"name", help="Name of stage to output (to use in terms of rollback)" | |
) | |
args = parser.parse_args() | |
repo = _get_repo() | |
if args.rollback: | |
rollback(repo, args.path, args.name) | |
else: | |
migrate(repo, args.path, args.name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
related blog post: https://towardsdatascience.com/automatically-migrate-your-project-from-dvc-0-94-to-dvc-1-x-416a5b9e837b which links to https://github.com/DAGsHub/Dvc1Migration