Created
February 27, 2024 12:53
-
-
Save Harry-Chen/84510f93ea8a82383cf49e0cc0c13769 to your computer and use it in GitHub Desktop.
Recover LFS file tree from GitLab object directory (hashed path)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import click | |
import pathlib | |
import shutil | |
@click.command() | |
@click.option('-d', '--delim', default=' - ', help='Delimiter used in the LFS object list') | |
@click.option('-l', '--level', default=2, type=click.IntRange(0, 31), help='Number of levels in the LFS object tree') | |
@click.argument("lfs-obj-list", type=click.File('r')) | |
@click.argument("lfs-obj-dir", type=click.Path(exists=True, file_okay=False)) | |
@click.argument("output-dir", type=click.Path(exists=False, file_okay=False)) | |
def main(delim, level, lfs_obj_list, lfs_obj_dir, output_dir): | |
''' | |
This script will take a list of LFS objects and a directory containing | |
the LFS objects with hashed path, and construct the LFS object tree. | |
LFS_OBJ_LIST: generated from `git-lfs ls-files --long`\n | |
LFS_OBJ_DIR: directory containing LFS objects with hashed path\n | |
OUTPUT_DIR: directory to store the LFS object tree | |
''' | |
# ensure the output directory exists | |
dest = pathlib.Path(output_dir) | |
dest.mkdir(parents=True, exist_ok=True) | |
lfs_base = pathlib.Path(lfs_obj_dir) | |
# Read the LFS object list | |
count = 0 | |
for l in lfs_obj_list.readlines(): | |
hash, path = l.split(delim) | |
hash = hash.strip() | |
path = path.strip() | |
assert len(hash) == 64, "Invalid hash length" | |
# construct the hashed path | |
hash_file = lfs_base | |
for _ in range(level): | |
hash_file = hash_file / hash[:2] | |
hash = hash[2:] | |
hash_file = hash_file / hash | |
if not hash_file.exists(): | |
print(f"File {path} with hash {hash} does not exist in object directory", file=sys.stderr) | |
continue | |
# copy the file to the output directory | |
dest_file = dest / path | |
dest_file.parent.mkdir(parents=True, exist_ok=True) | |
shutil.copy2(hash_file, dest_file) | |
count += 1 | |
print(f"Total {count} files copied to {dest}") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment