A script to extract a tree from a git repository
from subprocess import call, Popen, PIPE, check_call
import re
import sys
import os
import errno
# !! n.b. You probably shouldn't be using this - it's vastly faster and safer to use
# one of the other methods mentioned here:
# I'm leaving this script up just for interest.
# Find the umask, so we can extract files with appropriate permissions
original_umask = os.umask(0)
if len(sys.argv) != 3:
print("Usage: {} <tree-ish> <output-directory>".format(sys.argv[0]),file=sys.stderr)
treeish, output_directory = sys.argv[1:]
# From
def shellquote(s):
return "'" + s.replace("'", "'\\''") + "'"
# Get the output of a command, split on line endings or NUL ('\0'):
def command_to_lines(command,nul=False):
p = Popen(command,stdout=PIPE)
output = p.communicate()[0]
if p.returncode != 0:
print("'{}' failed.".format(' '.join(command),file=sys.stderr))
str_output = output.decode()
if nul:
return str_output.split('\0')
return str_output.splitlines(False)
# A generator that parses the output of "git ls-tree -r -z" to yield a
# tupe for each blob in the tree:
def get_blobs_in_tree(tree,path_prefix=""):
for line in command_to_lines(["git","ls-tree","-r","-z",tree],nul=True):
m ='(\S+)\s+(\S+)\s+(\S+)\s+(.*)',line)
if m:
mode, object_type, object_name, path = m.groups()
if object_type == "blob":
mode_int = int(mode,8)
yield (object_name, path, mode_int)
# Print out information about a blob:
def print_blob( object_name, path, git_file_mode ):
print("{} {:o} {}".format(object_name,git_file_mode,path))
# Extract a blob to an output directory + leading directory components:
def extract_blob_to(object_name,path,git_file_mode,output_directory):
dirname, basename = os.path.split(path)
destination = os.path.join(output_directory,dirname)
except OSError as e:
if e.errno != errno.EEXIST:
destination_filename = os.path.join(destination,basename)
if git_file_mode == 0o120000:
# Then this is a symlink:
p = Popen(["git","show",object_name],stdout=PIPE)
symlink_destination = p.communicate()[0]
check_call("git show {} > {}".format(
permissions = (git_file_mode % 0o1000) & ~ original_umask
for object_name, path, git_file_mode in get_blobs_in_tree(treeish):
print_blob(object_name, path, git_file_mode)
extract_blob_to(object_name, path, git_file_mode, output_directory)
