public
Last active

A script to extract a tree from a git repository

  • Download Gist
extract-tree-from-git.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
#!/usr/bin/python3.1
 
from subprocess import call, Popen, PIPE, check_call
import re
import sys
import os
import errno
 
# !! n.b. You probably shouldn't be using this - it's vastly faster and safer to use
# one of the other methods mentioned here:
# http://stackoverflow.com/questions/3364575/whats-the-best-way-to-extract-a-tree-from-a-git-repository
# I'm leaving this script up just for interest.
 
# Find the umask, so we can extract files with appropriate permissions
original_umask = os.umask(0)
os.umask(original_umask)
 
if len(sys.argv) != 3:
print("Usage: {} <tree-ish> <output-directory>".format(sys.argv[0]),file=sys.stderr)
sys.exit(1)
 
treeish, output_directory = sys.argv[1:]
 
# From http://stackoverflow.com/questions/35817/whats-the-best-way-to-escape-os-system-calls-in-python
def shellquote(s):
return "'" + s.replace("'", "'\\''") + "'"
 
# Get the output of a command, split on line endings or NUL ('\0'):
def command_to_lines(command,nul=False):
p = Popen(command,stdout=PIPE)
output = p.communicate()[0]
if p.returncode != 0:
print("'{}' failed.".format(' '.join(command),file=sys.stderr))
sys.exit(1)
str_output = output.decode()
if nul:
return str_output.split('\0')
else:
return str_output.splitlines(False)
 
# A generator that parses the output of "git ls-tree -r -z" to yield a
# tupe for each blob in the tree:
def get_blobs_in_tree(tree,path_prefix=""):
for line in command_to_lines(["git","ls-tree","-r","-z",tree],nul=True):
m = re.search('(\S+)\s+(\S+)\s+(\S+)\s+(.*)',line)
if m:
mode, object_type, object_name, path = m.groups()
if object_type == "blob":
mode_int = int(mode,8)
yield (object_name, path, mode_int)
 
# Print out information about a blob:
def print_blob( object_name, path, git_file_mode ):
print("{} {:o} {}".format(object_name,git_file_mode,path))
 
# Extract a blob to an output directory + leading directory components:
def extract_blob_to(object_name,path,git_file_mode,output_directory):
dirname, basename = os.path.split(path)
destination = os.path.join(output_directory,dirname)
try:
os.makedirs(destination)
except OSError as e:
if e.errno != errno.EEXIST:
raise
destination_filename = os.path.join(destination,basename)
if git_file_mode == 0o120000:
# Then this is a symlink:
p = Popen(["git","show",object_name],stdout=PIPE)
symlink_destination = p.communicate()[0]
check_call(["ln","-s",symlink_destination,destination_filename])
else:
check_call("git show {} > {}".format(
object_name,
shellquote(destination_filename)),shell=True)
permissions = (git_file_mode % 0o1000) & ~ original_umask
check_call(["chmod","{:o}".format(permissions),destination_filename])
 
for object_name, path, git_file_mode in get_blobs_in_tree(treeish):
print_blob(object_name, path, git_file_mode)
extract_blob_to(object_name, path, git_file_mode, output_directory)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.