Skip to content

Instantly share code, notes, and snippets.

@Elzair
Created July 28, 2016 17:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Elzair/b6aaf451ca7345423b666d2943b8ec90 to your computer and use it in GitHub Desktop.
Save Elzair/b6aaf451ca7345423b666d2943b8ec90 to your computer and use it in GitHub Desktop.
git-extract-subdir
#!/usr/bin/env ruby
require 'fileutils'
require 'optparse'
require 'ostruct'
require 'pathname'
require 'pp'
$verbose = false
class Util
def self.exec(cmd)
output = `#{cmd}`
if $?.exitstatus != 0
STDERR.puts("Error executing '#{cmd}'")
exit
end
output
end
def self.getCommonBeginning(strs, dir_only = false)
strchars = strs.map { |str| str.chars }
idx = 0
done = false
until done
c = strchars[0][idx]
for chars in strchars[1..-1]
if chars[idx] != c
done = true
break
end
end
if !done
idx = idx + 1
end
end
common = strchars[0][0...idx].join("")
# Strip out any characters after the final slash
if dir_only
last_dir_idx = common.rindex(/\//)
if last_dir_idx != nil
common = common[0..last_dir_idx]
else
common = ""
end
end
common
end
def self.filterCommonBeginning(strs, common)
strs.map { |str| str[common.length..-1] }
end
def self.filterUrl(url)
if url.match(/\A(https|http|ssh):\/\/|@.*:/)
url.gsub(/\.git\Z/){""}.split("/")[-1]
else
url.gsub(/\Afile:\/\//){""}
end
end
end
class Options
def self.parse(args)
options = OpenStruct.new
options.base_path = ""
options.commit = ""
options.new_path = ""
options.no_subdir = false
options.tmp_path = "tmp"
options.subdirs1 = []
options.subdirs2 = []
options.use_temp = false
opt_parser = OptionParser.new do |opts|
opts.banner = "Usage: test.rb [options]"
opts.separator ""
opts.separator "Specific options:"
opts.on("-b [URL]", "--basepath [URL]",
"Clone new repository to work on from URL (or filepath)") do |url|
options.base_path = url || ""
end
opts.on("-c [SHA1]", "--commit [SHA1]",
"Commit SHA1 to reset temporary repository to") do |sha|
options.commit = sha || ""
end
opts.on("-n", "--no-common-subdirs",
"When multiple subdirectories are specified, do not try to make a common directory the root directory") { options.no_subdir = true }
opts.on("-p path/to/new/repository", "--path path/to/new/repository",
"Relative path to new repository") do |new_path|
options.new_path = new_path
end
opts.on("-P path/to/tmp/repository", "--tmppath path/to/tmp/repository",
"Relative path to temporary repository") do |tmp_path|
options.tmp_path = tmp_path || "tmp"
end
opts.on("-s \"sd1/sd2,sd3,sd4\"", "--subdirs \"sd1/sd2,sd3,sd4\"",
"Comma separated list of subdirectories to factor out into new repository") do |subdirs|
options.subdirs1 = subdirs.split(/,/)
if options.subdirs1.length == 1
options.subdirs1 = options.subdirs1[0]
end
end
opts.on("-S \"sd1/sd2,sd3,sd4\"", "--oldsubdirs \"sd1/sd2,sd3,sd4\"",
"Comma separated list of subdirectories to factor out into temporary repository") do |subdirs|
options.subdirs2 = subdirs.split(/,/)
if options.subdirs2.length == 1
options.subdirs2 = options.subdirs2[0]
end
options.use_temp = true
end
opts.on("-v", "--verbose",
"Display verbose output") { $verbose = true; puts("Enabled verbose output") }
end
opt_parser.parse!(args)
# Do some verifications of arguments
if options.base_path == "" || options.new_path == "" || options.subdirs1 == []
STDERR.puts("You must specify a URL to the old repository, a path to the new directory, and a list of subdirectories to extract")
exit
end
options
end
end
class Git
def self.addRemote(url,name="origin")
self.cmd("remote add #{name} #{url}")
end
def self.checkout(branches, track=false)
if branches.respond_to?('each')
branches.each { |branch| self.checkout(branch, track) }
else
sleep(0.01) # If we do not add a slight delay, git will step on itself
branch = branches
# Checkout new branch that has the same state as the remote branch
if track
self.cmd("checkout -b #{branch} origin/#{branch}")
else
self.cmd("checkout #{branch}")
end
end
end
def self.cmd(cmd)
str = "git #{cmd}"
if $verbose
puts(str)
end
Util.exec(str)
end
def self.clone(url, path)
self.cmd("clone #{url} #{path}")
end
def self.fetch(remote="origin")
self.cmd("fetch #{remote}")
end
def self.filter(subdir=nil, no_common_subdirs=false, update_tags=true, clean_repo=true)
tagcmd = update_tags ? "--tag-name-filter cat" : ""
if subdir == nil
self.cmd("filter-branch -- --all")
elsif subdir.respond_to?('each')
# If all specified subdirectories share a common path, first filter commits based on that path
common = Util.getCommonBeginning(subdir, true)
if common != "" && !no_common_subdirs
self.cmd("filter-branch -f --prune-empty --subdirectory-filter #{common} #{tagcmd} -- --all")
subdir = Util.filterCommonBeginning(subdir, common)
end
subdirs = subdir.join("/ ")
self.cmd("filter-branch -f --prune-empty --index-filter 'git rm --cached -r -q -- . ; git reset -q $GIT_COMMIT -- #{subdirs}/' #{tagcmd} -- --all")
else
self.cmd("filter-branch -f --prune-empty --subdirectory-filter #{subdir}/ #{tagcmd} -- --all")
end
# Remove unused material from repository
if clean_repo
self.cmd('for-each-ref --format="%(refname)" refs/original/ | xargs -n 1 git update-ref -d')
self.cmd("reflog expire --expire=now --all")
self.cmd("gc --prune=now")
end
end
def self.fullClone(url, path, commit)
cwd = FileUtils.pwd()
puts("Creating #{path} from #{url}")
self.clone(url, path)
FileUtils.cd(path)
puts("Fetching all remote branches for #{path}")
self.fetch()
puts("Creating matching branches for #{path}")
local_branches = self.getBranches(:local, commit)
pp(local_branches)
new_branches = self.getBranches(:remote, commit)
.map { |b| b.gsub(/\Aorigin\//){""} }
.keep_if { |b| !local_branches.include?(b) }
pp(new_branches)
self.checkout(new_branches, track=true)
FileUtils.cd(cwd)
end
def self.getBranches(type=:local, commit="")
flag1 = case type
when :local
""
when :remote
"-r"
when :all
"-a"
end
flag2 = commit != "" ? "--contains #{commit}" : ""
self.cmd("branch #{flag1} #{flag2}")
.split(/\n/)
.map { |b| b.gsub(/\A\*?\p{Space}+/){""} }
.keep_if { |b| !b.match(/\A(origin\/HEAD|HEAD)/) }
end
def self.getHeads(branches=nil)
if branches == nil
branches = self.getBranches(:local)
end
branches.map { |b| self.cmd("rev-list #{b}").split(/\n/)[0] }
end
def self.getTails(branches=nil)
if branches == nil
branches = self.getBranches(:local)
end
branches.map { |b| self.cmd("rev-list #{b}").split(/\n/)[-1] }
end
def self.rmRemote(name="origin")
self.cmd("remote rm #{name}")
end
def self.reset(sha, branches=nil)
if branches.respond_to?('each')
branches.each { |branch| self.checkout(branch); self.reset(sha) }
else
self.cmd("reset --hard #{sha}")
end
end
end
def main(args)
# Process shell arguments and get current directory
options = Options.parse(args)
cwd = FileUtils.pwd()
if $verbose
pp(args)
pp(options)
end
# Get full paths to new repo (and tmp repo, if specified)
new_path = Pathname.new(cwd).join(options.new_path).realdirpath()
if options.use_temp
tmp_path = Pathname.new(cwd).join(options.tmp_path).realdirpath()
end
# Clone new repository (and temp repository, if needed) from old repository
# Make temp repository origin of new repository for grafting purposes
# Checkout all needed branches as well
FileUtils.cd(cwd)
if options.use_temp
Git.fullClone(options.base_path, tmp_path, options.commit)
Git.fullClone(tmp_path, new_path, options.commit)
else
Git.fullClone(options.base_path, new_path, options.commit)
end
# Remove old origin from new repository (if using tmp repo)
if options.use_temp
FileUtils.cd(new_path)
puts("Removing 'origin' from #{new_path}")
Git.rmRemote("origin")
end
# Filter new repository
FileUtils.cd(new_path)
#Git.checkout("master")
puts("Extracting subdirectories #{options.subdirs1} in #{new_path}")
Git.filter(options.subdirs1, no_common_subdirs=options.no_subdir)
# Filter temporary repository (if specified)
if options.use_temp
FileUtils.cd(tmp_path)
# Reset all branches to a certain commit (if specified)
if options.commit != ""
puts("Resetting all branches in #{tmp_path} to #{options.commit}")
Git.reset(options.commit, Git.getBranches(:local))
end
#Git.checkout("master")
puts("Extracting subdirectories #{options.subdirs2} in #{tmp_path}")
Git.filter(options.subdirs2, no_common_subdirs=options.no_subdir)
end
# Now stitch together both repositories (if needed)
if options.use_temp
FileUtils.cd(new_path)
# Add new filtered tmp repo as origin
Git.addRemote(tmp_path, "origin")
Git.fetch("origin")
tails = Git.getTails()
heads = Git.getHeads(Git.getBranches(:remote))
comb = tails.zip(heads).uniq().map { |c| "#{c[0]} #{c[1]}" }.join("\n")
if $verbose
puts("Heads length: #{heads.length}, Tails length: #{tails.length}")
pp(comb)
end
# Create grafts file in new repository
File.open('.git/info/grafts', 'a') { |f| f.write(comb) }
# Call git filter-branch to stitch history together
Git.filter()
end
end
main(ARGV)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment