Created
July 28, 2016 17:13
-
-
Save Elzair/b6aaf451ca7345423b666d2943b8ec90 to your computer and use it in GitHub Desktop.
git-extract-subdir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'fileutils' | |
require 'optparse' | |
require 'ostruct' | |
require 'pathname' | |
require 'pp' | |
$verbose = false | |
class Util | |
def self.exec(cmd) | |
output = `#{cmd}` | |
if $?.exitstatus != 0 | |
STDERR.puts("Error executing '#{cmd}'") | |
exit | |
end | |
output | |
end | |
def self.getCommonBeginning(strs, dir_only = false) | |
strchars = strs.map { |str| str.chars } | |
idx = 0 | |
done = false | |
until done | |
c = strchars[0][idx] | |
for chars in strchars[1..-1] | |
if chars[idx] != c | |
done = true | |
break | |
end | |
end | |
if !done | |
idx = idx + 1 | |
end | |
end | |
common = strchars[0][0...idx].join("") | |
# Strip out any characters after the final slash | |
if dir_only | |
last_dir_idx = common.rindex(/\//) | |
if last_dir_idx != nil | |
common = common[0..last_dir_idx] | |
else | |
common = "" | |
end | |
end | |
common | |
end | |
def self.filterCommonBeginning(strs, common) | |
strs.map { |str| str[common.length..-1] } | |
end | |
def self.filterUrl(url) | |
if url.match(/\A(https|http|ssh):\/\/|@.*:/) | |
url.gsub(/\.git\Z/){""}.split("/")[-1] | |
else | |
url.gsub(/\Afile:\/\//){""} | |
end | |
end | |
end | |
class Options | |
def self.parse(args) | |
options = OpenStruct.new | |
options.base_path = "" | |
options.commit = "" | |
options.new_path = "" | |
options.no_subdir = false | |
options.tmp_path = "tmp" | |
options.subdirs1 = [] | |
options.subdirs2 = [] | |
options.use_temp = false | |
opt_parser = OptionParser.new do |opts| | |
opts.banner = "Usage: test.rb [options]" | |
opts.separator "" | |
opts.separator "Specific options:" | |
opts.on("-b [URL]", "--basepath [URL]", | |
"Clone new repository to work on from URL (or filepath)") do |url| | |
options.base_path = url || "" | |
end | |
opts.on("-c [SHA1]", "--commit [SHA1]", | |
"Commit SHA1 to reset temporary repository to") do |sha| | |
options.commit = sha || "" | |
end | |
opts.on("-n", "--no-common-subdirs", | |
"When multiple subdirectories are specified, do not try to make a common directory the root directory") { options.no_subdir = true } | |
opts.on("-p path/to/new/repository", "--path path/to/new/repository", | |
"Relative path to new repository") do |new_path| | |
options.new_path = new_path | |
end | |
opts.on("-P path/to/tmp/repository", "--tmppath path/to/tmp/repository", | |
"Relative path to temporary repository") do |tmp_path| | |
options.tmp_path = tmp_path || "tmp" | |
end | |
opts.on("-s \"sd1/sd2,sd3,sd4\"", "--subdirs \"sd1/sd2,sd3,sd4\"", | |
"Comma separated list of subdirectories to factor out into new repository") do |subdirs| | |
options.subdirs1 = subdirs.split(/,/) | |
if options.subdirs1.length == 1 | |
options.subdirs1 = options.subdirs1[0] | |
end | |
end | |
opts.on("-S \"sd1/sd2,sd3,sd4\"", "--oldsubdirs \"sd1/sd2,sd3,sd4\"", | |
"Comma separated list of subdirectories to factor out into temporary repository") do |subdirs| | |
options.subdirs2 = subdirs.split(/,/) | |
if options.subdirs2.length == 1 | |
options.subdirs2 = options.subdirs2[0] | |
end | |
options.use_temp = true | |
end | |
opts.on("-v", "--verbose", | |
"Display verbose output") { $verbose = true; puts("Enabled verbose output") } | |
end | |
opt_parser.parse!(args) | |
# Do some verifications of arguments | |
if options.base_path == "" || options.new_path == "" || options.subdirs1 == [] | |
STDERR.puts("You must specify a URL to the old repository, a path to the new directory, and a list of subdirectories to extract") | |
exit | |
end | |
options | |
end | |
end | |
class Git | |
def self.addRemote(url,name="origin") | |
self.cmd("remote add #{name} #{url}") | |
end | |
def self.checkout(branches, track=false) | |
if branches.respond_to?('each') | |
branches.each { |branch| self.checkout(branch, track) } | |
else | |
sleep(0.01) # If we do not add a slight delay, git will step on itself | |
branch = branches | |
# Checkout new branch that has the same state as the remote branch | |
if track | |
self.cmd("checkout -b #{branch} origin/#{branch}") | |
else | |
self.cmd("checkout #{branch}") | |
end | |
end | |
end | |
def self.cmd(cmd) | |
str = "git #{cmd}" | |
if $verbose | |
puts(str) | |
end | |
Util.exec(str) | |
end | |
def self.clone(url, path) | |
self.cmd("clone #{url} #{path}") | |
end | |
def self.fetch(remote="origin") | |
self.cmd("fetch #{remote}") | |
end | |
def self.filter(subdir=nil, no_common_subdirs=false, update_tags=true, clean_repo=true) | |
tagcmd = update_tags ? "--tag-name-filter cat" : "" | |
if subdir == nil | |
self.cmd("filter-branch -- --all") | |
elsif subdir.respond_to?('each') | |
# If all specified subdirectories share a common path, first filter commits based on that path | |
common = Util.getCommonBeginning(subdir, true) | |
if common != "" && !no_common_subdirs | |
self.cmd("filter-branch -f --prune-empty --subdirectory-filter #{common} #{tagcmd} -- --all") | |
subdir = Util.filterCommonBeginning(subdir, common) | |
end | |
subdirs = subdir.join("/ ") | |
self.cmd("filter-branch -f --prune-empty --index-filter 'git rm --cached -r -q -- . ; git reset -q $GIT_COMMIT -- #{subdirs}/' #{tagcmd} -- --all") | |
else | |
self.cmd("filter-branch -f --prune-empty --subdirectory-filter #{subdir}/ #{tagcmd} -- --all") | |
end | |
# Remove unused material from repository | |
if clean_repo | |
self.cmd('for-each-ref --format="%(refname)" refs/original/ | xargs -n 1 git update-ref -d') | |
self.cmd("reflog expire --expire=now --all") | |
self.cmd("gc --prune=now") | |
end | |
end | |
def self.fullClone(url, path, commit) | |
cwd = FileUtils.pwd() | |
puts("Creating #{path} from #{url}") | |
self.clone(url, path) | |
FileUtils.cd(path) | |
puts("Fetching all remote branches for #{path}") | |
self.fetch() | |
puts("Creating matching branches for #{path}") | |
local_branches = self.getBranches(:local, commit) | |
pp(local_branches) | |
new_branches = self.getBranches(:remote, commit) | |
.map { |b| b.gsub(/\Aorigin\//){""} } | |
.keep_if { |b| !local_branches.include?(b) } | |
pp(new_branches) | |
self.checkout(new_branches, track=true) | |
FileUtils.cd(cwd) | |
end | |
def self.getBranches(type=:local, commit="") | |
flag1 = case type | |
when :local | |
"" | |
when :remote | |
"-r" | |
when :all | |
"-a" | |
end | |
flag2 = commit != "" ? "--contains #{commit}" : "" | |
self.cmd("branch #{flag1} #{flag2}") | |
.split(/\n/) | |
.map { |b| b.gsub(/\A\*?\p{Space}+/){""} } | |
.keep_if { |b| !b.match(/\A(origin\/HEAD|HEAD)/) } | |
end | |
def self.getHeads(branches=nil) | |
if branches == nil | |
branches = self.getBranches(:local) | |
end | |
branches.map { |b| self.cmd("rev-list #{b}").split(/\n/)[0] } | |
end | |
def self.getTails(branches=nil) | |
if branches == nil | |
branches = self.getBranches(:local) | |
end | |
branches.map { |b| self.cmd("rev-list #{b}").split(/\n/)[-1] } | |
end | |
def self.rmRemote(name="origin") | |
self.cmd("remote rm #{name}") | |
end | |
def self.reset(sha, branches=nil) | |
if branches.respond_to?('each') | |
branches.each { |branch| self.checkout(branch); self.reset(sha) } | |
else | |
self.cmd("reset --hard #{sha}") | |
end | |
end | |
end | |
def main(args) | |
# Process shell arguments and get current directory | |
options = Options.parse(args) | |
cwd = FileUtils.pwd() | |
if $verbose | |
pp(args) | |
pp(options) | |
end | |
# Get full paths to new repo (and tmp repo, if specified) | |
new_path = Pathname.new(cwd).join(options.new_path).realdirpath() | |
if options.use_temp | |
tmp_path = Pathname.new(cwd).join(options.tmp_path).realdirpath() | |
end | |
# Clone new repository (and temp repository, if needed) from old repository | |
# Make temp repository origin of new repository for grafting purposes | |
# Checkout all needed branches as well | |
FileUtils.cd(cwd) | |
if options.use_temp | |
Git.fullClone(options.base_path, tmp_path, options.commit) | |
Git.fullClone(tmp_path, new_path, options.commit) | |
else | |
Git.fullClone(options.base_path, new_path, options.commit) | |
end | |
# Remove old origin from new repository (if using tmp repo) | |
if options.use_temp | |
FileUtils.cd(new_path) | |
puts("Removing 'origin' from #{new_path}") | |
Git.rmRemote("origin") | |
end | |
# Filter new repository | |
FileUtils.cd(new_path) | |
#Git.checkout("master") | |
puts("Extracting subdirectories #{options.subdirs1} in #{new_path}") | |
Git.filter(options.subdirs1, no_common_subdirs=options.no_subdir) | |
# Filter temporary repository (if specified) | |
if options.use_temp | |
FileUtils.cd(tmp_path) | |
# Reset all branches to a certain commit (if specified) | |
if options.commit != "" | |
puts("Resetting all branches in #{tmp_path} to #{options.commit}") | |
Git.reset(options.commit, Git.getBranches(:local)) | |
end | |
#Git.checkout("master") | |
puts("Extracting subdirectories #{options.subdirs2} in #{tmp_path}") | |
Git.filter(options.subdirs2, no_common_subdirs=options.no_subdir) | |
end | |
# Now stitch together both repositories (if needed) | |
if options.use_temp | |
FileUtils.cd(new_path) | |
# Add new filtered tmp repo as origin | |
Git.addRemote(tmp_path, "origin") | |
Git.fetch("origin") | |
tails = Git.getTails() | |
heads = Git.getHeads(Git.getBranches(:remote)) | |
comb = tails.zip(heads).uniq().map { |c| "#{c[0]} #{c[1]}" }.join("\n") | |
if $verbose | |
puts("Heads length: #{heads.length}, Tails length: #{tails.length}") | |
pp(comb) | |
end | |
# Create grafts file in new repository | |
File.open('.git/info/grafts', 'a') { |f| f.write(comb) } | |
# Call git filter-branch to stitch history together | |
Git.filter() | |
end | |
end | |
main(ARGV) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment