Skip to content

Instantly share code, notes, and snippets.

@mhl
Created January 3, 2011 13:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mhl/763439 to your computer and use it in GitHub Desktop.
Save mhl/763439 to your computer and use it in GitHub Desktop.
#!/usr/bin/python2.6
# This script "unsubmodulizes" a git repository - i.e. it takes a git
# repository with submodules and replaces the submodules with the
# history of the submodule merged into a subdirectory of the same
# name. I knocked this up quickly as an answer to this stackoverflow
# question:
#
# http://stackoverflow.com/questions/4542729/undo-submodulization-in-git
#
# ... and so it hasn't been tested much: use entirely at your own
# risk, etc. Afterwards, your git repository will have multiple root
# commits.
import datetime
import re
import sys
from subprocess import Popen, PIPE, check_call, call
# Exit unless the working tree matches the index and the index matches
# HEAD:
if 0 != call(["git","diff","--exit-code"]):
print >> sys.stderr, "There are unstaged changes - git status should be clean"
sys.exit(1)
if 0 != call(["git","diff","--cached","--exit-code"]):
print >> sys.stderr, "There are changes staged but not committed - git status should be clean"
sys.exit(1)
# Find the names of every submodule and the commit that the submodule
# should be at:
submodules = []
p = Popen(["git","ls-files","--error-unmatch","--stage"],stdout=PIPE)
output = p.communicate()[0]
if p.returncode != 0:
raise Exception, "Finding the submodules failed"
for line in output.decode().splitlines(False):
m = re.search('^160000 ([a-f0-9]{40}) \d+\s+(.*)$',line)
if m:
c, s = m.groups()
if re.search('[:+]',s):
# ':' or '+' in the submodule name will certainly cause
# trouble when we create a remote name based on it, but
# this isn't an exhaustive check (FIXME)
raise Exception, "A remote name created from "+s+" is unlikely to be valid"
submodules.append((s,c))
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
for submodule, commit in submodules:
print "=== Doing", submodule
# Create temporary names for the remote and branch, taking very
# cursory steps to avoid collisions with existing branches /
# remotes:
remote_name = "submodule-"+submodule+"-"+timestamp
branch_name = "tmp-"+submodule+"-"+timestamp
# Find the URL of the submodule:
p = Popen(["git","config","-f",".gitmodules","--get",'submodule.'+submodule+'.url'],stdout=PIPE)
submodule_url = p.communicate()[0].decode().strip()
if p.returncode != 0:
raise Exception, "Getting the submodule URL failed"
print "Got submodule:", submodule, "at commit", commit, "with URL", submodule_url
# Add a remote for the submodule's URL:
check_call(["git","remote","add",remote_name,submodule_url])
# Fetch all the objects required for the submodule:
check_call(["git","fetch",remote_name])
# Create a temporary branch based on the committed submodule version:
check_call(["git","branch",branch_name,commit])
# Commit removal of the submodule:
check_call(["git","rm","--cached",submodule])
check_call(["git","commit","-m","Removed the submodule "+submodule])
# Move the existing submodule out of the way:
check_call(["mv",submodule,submodule+"."+timestamp])
# Merge in the branch to the subdirectory:
check_call(["git","merge","-s","ours","--no-commit",branch_name])
check_call(["git","read-tree","--prefix="+submodule,"-u",branch_name])
check_call(["git","commit","-m","Merge in "+submodule+" as a subdirectory"])
# Remove the branch and remote that the script created:
check_call(["git","branch","-d",branch_name])
check_call(["git","remote","rm",remote_name])
# Finally, get the subdirectory from the index:
check_call(["git","checkout","--",submodule])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment