Last active
December 12, 2017 18:48
-
-
Save bgribble/0931a7ca4f994031813b0e80e57f3706 to your computer and use it in GitHub Desktop.
Split a source file into chunks, preserving git history
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
''' | |
git-chunks - split a source file into multiple chunks, preserving Git | |
history of the source text | |
You have a too-big file foo.py that is old and crufty. In | |
your editor split it into multiple files foo-bar.py, | |
foo-baz.py, and foo-quux.py, with some desiccated remains | |
in foo.py still. So from Git's perspective you have unstaged | |
changes in foo.py and untracked files foo-bar.py, foo-baz.py, | |
and foo-quux.py. | |
If you just "git add" those 3, "git blame" will forget all | |
about the history of the contents of those files and show you as | |
the author on today's date. | |
So: | |
git-chunks.py foo.py foo-bar.py foobaz.py foo-quux.py | |
After that, you will still have unstaged changes in foo.py | |
and the other 3 files will be committed with intact history. | |
Inspiration from: | |
https://beyermatthias.de/blog/2014/09/24/splitting-files-while-preserving-history-in-git/ | |
''' | |
import argparse | |
import random | |
import string | |
from subprocess import getoutput | |
debug = False | |
dry_run = False | |
def shelly(cmd, force_run=False): | |
global debug | |
global dry_run | |
output = None | |
if force_run or not dry_run: | |
output = getoutput(cmd) | |
if debug and not dry_run: | |
print("-------------------------------") | |
if debug or dry_run: | |
print(" [shell] %s" % cmd) | |
if debug and not dry_run: | |
for l in output.strip().split('\n'): | |
print(" ", l) | |
return output | |
def salty(salt_size): | |
return ''.join(random.choices(string.ascii_uppercase, k=salt_size)) | |
def safe_name(name): | |
return name.replace('/', '-') | |
def add_chunk(source_file, chunk_file): | |
print(" Adding chunk file %s" % chunk_file) | |
# get current branch name | |
orig_branch = shelly("git status | head -1 | cut -d ' ' -f 3-", force_run=True) | |
chunk_branch = '%s_chunk-%s_%s' % (orig_branch, safe_name(chunk_file), salty(6)) | |
# save chunk file out of the way | |
shelly("mv %s .%s_chunk_tmp" % (chunk_file, safe_name(chunk_file))) | |
# checkout a new branch | |
shelly("git checkout -b %s" % chunk_branch) | |
# rename the source file | |
shelly("git mv %s %s" % (source_file, chunk_file)) | |
# commit | |
shelly("git commit -m 'Create %s from %s'" % (chunk_file, source_file)) | |
# copy over the saved chunk file | |
shelly("mv .%s_chunk_tmp %s" % (safe_name(chunk_file), chunk_file)) | |
# commit | |
shelly("git add %s" % chunk_file) | |
shelly("git commit -m 'Save chunk %s'" % chunk_file) | |
# checkout original branch | |
shelly("git checkout %s" % orig_branch) | |
# merge the chunk branch | |
shelly("git merge --no-commit --no-ff %s" % chunk_branch) | |
# get back the deleted file | |
shelly("git checkout HEAD %s" % source_file) | |
shelly("git checkout --theirs %s" % chunk_file) | |
shelly("git commit -m 'Merge chunk %s of %s'" % (chunk_file, source_file)) | |
# remove the temp branch | |
shelly("git branch -d %s" % chunk_branch) | |
def split_chunks(source_file, chunk_files): | |
print("Splitting original file %s into chunks" % source_file) | |
# save state of original file (should be uncommitted) | |
shelly("mv %s .%s_chunk_tmp" % (source_file, source_file)) | |
# restore source file HEAD as basis for branches | |
shelly("git checkout %s" % source_file) | |
# add all the chunk files | |
for chunk in chunk_files: | |
add_chunk(source_file, chunk) | |
# restore the edited version of the source file (will leave it | |
# with uncommitted changes) | |
shelly("mv .%s_chunk_tmp %s" % (source_file, source_file)) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Split file into multiple chunks") | |
parser.add_argument('--debug', action="store_true", help="Print extra debug info") | |
parser.add_argument('--dry-run', action="store_true", help="Don't execute commands") | |
parser.add_argument('source_file', help='Original file to split') | |
parser.add_argument('chunk_files', nargs='+', | |
help='Paths to untracked working files containing chunks') | |
args = vars(parser.parse_args()) | |
debug = args.get('debug') | |
dry_run = args.get('dry_run') | |
if debug: | |
print("Arguments:", args) | |
start_commit = shelly("git log | head -1 | cut -d ' ' -f 2", force_run=True) | |
print("HEAD before starting is commit %s" % start_commit) | |
split_chunks(args.get('source_file'), args.get('chunk_files')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment