Skip to content

Instantly share code, notes, and snippets.

@Harry-Chen
Created February 27, 2024 12:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Harry-Chen/84510f93ea8a82383cf49e0cc0c13769 to your computer and use it in GitHub Desktop.
Save Harry-Chen/84510f93ea8a82383cf49e0cc0c13769 to your computer and use it in GitHub Desktop.
Recover LFS file tree from GitLab object directory (hashed path)
#!/usr/bin/env python3
import sys
import click
import pathlib
import shutil
@click.command()
@click.option('-d', '--delim', default=' - ', help='Delimiter used in the LFS object list')
@click.option('-l', '--level', default=2, type=click.IntRange(0, 31), help='Number of levels in the LFS object tree')
@click.argument("lfs-obj-list", type=click.File('r'))
@click.argument("lfs-obj-dir", type=click.Path(exists=True, file_okay=False))
@click.argument("output-dir", type=click.Path(exists=False, file_okay=False))
def main(delim, level, lfs_obj_list, lfs_obj_dir, output_dir):
'''
This script will take a list of LFS objects and a directory containing
the LFS objects with hashed path, and construct the LFS object tree.
LFS_OBJ_LIST: generated from `git-lfs ls-files --long`\n
LFS_OBJ_DIR: directory containing LFS objects with hashed path\n
OUTPUT_DIR: directory to store the LFS object tree
'''
# ensure the output directory exists
dest = pathlib.Path(output_dir)
dest.mkdir(parents=True, exist_ok=True)
lfs_base = pathlib.Path(lfs_obj_dir)
# Read the LFS object list
count = 0
for l in lfs_obj_list.readlines():
hash, path = l.split(delim)
hash = hash.strip()
path = path.strip()
assert len(hash) == 64, "Invalid hash length"
# construct the hashed path
hash_file = lfs_base
for _ in range(level):
hash_file = hash_file / hash[:2]
hash = hash[2:]
hash_file = hash_file / hash
if not hash_file.exists():
print(f"File {path} with hash {hash} does not exist in object directory", file=sys.stderr)
continue
# copy the file to the output directory
dest_file = dest / path
dest_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(hash_file, dest_file)
count += 1
print(f"Total {count} files copied to {dest}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment