-
-
Save anonymous/f6902cb4e3534f07ba48 to your computer and use it in GitHub Desktop.
The scripts I used to convert from SVN to Git
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use SVN::Core; | |
use SVN::Ra; | |
use Text::Glob; | |
use Data::Dumper; | |
# Usage: perl buildSVNTree.pl > svnBranches.txt | |
# (messages will appear on STDERR) | |
# Convert a path to the canonical branch name | |
# For branches this is just the directory name | |
# For tags, it includes tags/directory name to avoid name clashes with branches | |
sub path2name | |
{ | |
my $path = shift; | |
my $name = $path; | |
$name =~ s{^.*?((?:tags/)?[^/]+)$}{$1}; | |
return $name; | |
} | |
# Places where branches live (see Text::Glob for syntax) | |
# Our repo is /trunk/project rather than /project/trunk, modify as needed | |
my $branchLocations = '/{trunk,branches/*,tags/*}'; | |
# Identify svn copies | |
my @interesting_copies; # list of copies; hashref with fields ( path, rev, fromPath, fromRev ) | |
my @branch_revs; # svn revs with likely branches in them ($branch_revs[$rev] = root of branch) | |
my @log_msgs; # svn log messages, useful for debugging | |
sub buildCopyList | |
{ | |
my $pathHash = shift; # { full path => _p_svn_log_changed_path_t object } | |
my $rev = shift; | |
my $author = shift; | |
my $date = shift; | |
my $logmsg = shift; | |
my $pool = shift; # svn pool object | |
print STDERR "$rev\n" unless $rev % 500; # helpful status | |
# for understanding the non-standard copies | |
if( defined( $logmsg ) ) | |
{ | |
my @all_lines = split( /\n/, $logmsg ); | |
$log_msgs[$rev] = $all_lines[0]; # just use first line to save display space later | |
} | |
$branch_revs[$rev] = 0; # initialize to "not the root of a branch" | |
foreach my $path ( keys( %$pathHash ) ) | |
{ | |
my $commit = $pathHash->{$path}; | |
my $fromRev = $commit->copyfrom_rev(); # See SVN::Core | |
my $fromPath = $commit->copyfrom_path(); # See SVN::Core | |
unless( $fromRev == $SVN::Core::INVALID_REVNUM ) | |
{ | |
next unless $commit->action() eq 'A'; # not fool-proof, but eliminates most of cvs2svn's noise | |
unless( $branch_revs[$rev] ) | |
{ | |
$branch_revs[$rev] = $path; # rev contains a branch at path | |
} | |
else | |
{ | |
# This rev has already been marked as the root of a branch | |
# I don't think this happens for most svn copies, but cvs2svn does | |
# some weird stuff to create branches and tags | |
# We want the top-most directory, so count directories and take the smaller | |
my @curDirs = split( m{/}, $path ); | |
my @prevDirs = split( m{/}, $branch_revs[$rev] ); | |
$branch_revs[$rev] = $path if @curDirs < @prevDirs; # prefer the shortest path | |
} | |
# store for further analysis later | |
push( @interesting_copies, { path => $path, rev => $rev, fromPath => $fromPath, fromRev => $fromRev } ); | |
} | |
} | |
} | |
# Return (via global variable) the rev of matching commit | |
# Just munges global variable each time, so last one wins | |
my $globalParentRev; | |
sub setParentRev | |
{ | |
my $pathHash = shift; # { full path => _p_svn_log_changed_path_t object } | |
my $rev = shift; | |
my $author = shift; | |
my $date = shift; | |
my $logmsg = shift; | |
my $pool = shift; # svn pool object | |
$globalParentRev = $rev; | |
} | |
# Return (via global variable) the number of files changed in a commit | |
# Useful for identifying cvs2svn copies (lots of files) vs normal svn copies (usually 1 or 2 files) | |
my $globalNumChangedFiles; | |
sub setNumChangedFiles | |
{ | |
my $pathHash = shift; # { full path => _p_svn_log_changed_path_t object } | |
my $rev = shift; | |
my $author = shift; | |
my $date = shift; | |
my $logmsg = shift; | |
my $pool = shift; # svn pool object | |
$globalNumChangedFiles = keys( %$pathHash ); | |
} | |
# svnURL can be just about any svn URL, but the process is a lot faster | |
# if you have a local mirror | |
my $svnURL = 'file:///path/to/svn/repo'; | |
my $ra = SVN::Ra->new( $svnURL ); | |
my $svnHead = $ra->get_latest_revnum(); | |
# First identify all the revs with svn copies | |
my $paths = ''; # don't limit paths at this time | |
my $start = 0; # start at root | |
my $end = $svnHead; # run to head | |
my $limit = 0; # call the callback as many times as you'd like | |
my $discover_changed_paths = 1; # tell the callback what paths were modified | |
my $strict_node_history = 0; # probably doesn't matter for this run; equivalent to stop-on-copy | |
$ra->get_log( $paths, $start, $end, $limit, $discover_changed_paths, | |
$strict_node_history, \&buildCopyList ); | |
# Some of the following code uses these, other svn::ra calls use different... | |
$end = 0; # run backward in time to root | |
$limit = 1; # just want the most recent edit to that path | |
$discover_changed_paths = 0; # don't need path information | |
$strict_node_history = 1; # feel free to stop-on-copy | |
# attempt to identify branches | |
# %branches hash eventually looks like: | |
# path -> { branchName -> { branchrev, deleted, parent, children } } | |
# preload trunk because it's not copied from anywhere | |
my %branches = ( '/trunk' => { trunk => { branchrev => 1, deleted => 0 } } ); | |
# our svn layout is /trunk/project, so some operations need to know project name | |
my $project = "insert-project-name-here"; | |
foreach my $copy ( @interesting_copies ) | |
{ | |
my $fromPath = $copy->{'fromPath'}; | |
my $fromRev = $copy->{'fromRev'}; | |
my $path = $copy->{'path'}; | |
my $rev = $copy->{'rev'}; | |
my $svnFromPath = $fromPath; | |
$svnFromPath =~ s{^/}{}; # Apparently recent versions of SVN don't like leading slashes | |
my $fileType = $ra->check_path( $svnFromPath, $fromRev ); | |
# Note: the paths here are dependent on svn layout, they work for me, but YMMV | |
if( $fileType == $SVN::Node::dir && # source is a directory | |
!( $fromPath =~ m{^/trunk} && $path =~ m{^/trunk} ) && # not an internal copy within the trunk | |
!( $fromPath =~ m{^/(branches|tags)/([^/]+)/} && $path =~ m{^/$1/$2/} ) && # not an internal copy within a branch or tag | |
!( $fromPath =~ m{^/vendor} ) ) # not a vendor branch from CVS (personal preference) | |
{ | |
if( $branch_revs[$rev] eq $path ) # this path is the highest directory copied | |
{ | |
if( Text::Glob::match_glob( $branchLocations, $fromPath ) && # the fromPath is an accepted branch location | |
Text::Glob::match_glob( $branchLocations, $path ) ) # the toPath is an accepted branch location | |
{ | |
$path =~ s{(.+?)/$project$}{$1}; # strip the project subdirectory if it was explicitly named in copy | |
my $branchName = path2name( $path ); | |
if( defined( $branches{$path} ) ) | |
{ | |
# branch at this location already exists, rename to avoid clashes | |
if( defined( $branches{$path}{$branchName} ) ) | |
{ | |
# Previous branch at this location hasn't been renamed yet; do it | |
my $origRev = $branches{$path}{$branchName}{'branchrev'}; | |
my $newName = $branchName . "@" . $origRev; | |
$branches{$path}{$newName} = $branches{$path}{$branchName}; | |
# Remove the original (ambiguous) entry | |
delete( $branches{$path}{$branchName} ); | |
# Update any children for the new branch name | |
if( defined( $branches{$path}{$newName}{'children'} ) ) | |
{ | |
foreach my $child ( @{ $branches{$path}{$newName}{'children'} } ) | |
{ | |
my $childPath = $child->{'path'}; | |
my $childName = $child->{'name'}; | |
if( $branches{$childPath}{$childName}{'parent'}{'name'} eq $branchName ) | |
{ | |
$branches{$childPath}{$childName}{'parent'}{'name'} = $newName; | |
} | |
} | |
} | |
# Update the parent for the new branch name | |
my $parentPath = $branches{$path}{$newName}{'parent'}{'path'}; | |
my $parentName = $branches{$path}{$newName}{'parent'}{'name'}; | |
foreach my $childOfParent ( @{ $branches{$parentPath}{$parentName}{'children'} } ) | |
{ | |
if( $childOfParent->{'name'} eq $branchName ) | |
{ | |
$childOfParent->{'name'} = $newName; | |
last; | |
} | |
} | |
} | |
# mark all other branches at this location as deleted | |
foreach my $oldBranch ( keys( %{ $branches{$path} } ) ) | |
{ | |
$branches{$path}{$oldBranch}{'deleted'} = 1; | |
} | |
} | |
# Add child information to parent path | |
# Now update parent rev to the last rev that actually modified the parent path | |
# (because SVN revs are global, just entering 'svn cp foo bar' will result | |
# in copyfrom_rev being set to HEAD rather than the last rev that edited | |
# the path being copied. For Git, we want the last edit to the parent path) | |
$svnFromPath .= "/$project" unless $svnFromPath =~ m{/$project$} or | |
$ra->check_path( "$svnFromPath/$project", $fromRev ) == $SVN::Node::none; | |
$ra->get_log( $svnFromPath, $fromRev, $end, $limit, $discover_changed_paths, | |
$strict_node_history, \&setParentRev ); | |
my $lastParentRev = $globalParentRev; | |
$globalParentRev = undef; # At least throw a warning if the parent rev isn't found | |
# recurse up tree to find parent branch if not copied from parent root | |
my $parentPath = $fromPath; | |
$parentPath =~ s{/(.*?)/?[^/]+$}{/$1} until defined( $branches{$parentPath} ) or $parentPath eq '/'; | |
my $parentNameGuess = path2name( $parentPath ); | |
my %parentInfo = ( name => $parentNameGuess, path => $parentPath, rev => $lastParentRev ); | |
my %childInfo = ( name => $branchName, path => $path, rev => $rev ); | |
unless( $parentPath eq '/' ) | |
{ | |
# The branch from this location is already known | |
my $parentName; | |
if( defined( $branches{$parentPath}{$parentNameGuess} ) && | |
$branches{$parentPath}{$parentNameGuess}{'branchrev'} <= $fromRev ) | |
{ | |
$parentName = $parentNameGuess; | |
} | |
else | |
{ | |
# The fromPath is defined, but the name guess or the rev is wrong. | |
# Iterate over the keys of fromPath to try to find a branch that matches | |
my $parentRev = -1; | |
foreach my $branchName ( keys %{ $branches{$parentPath} } ) | |
{ | |
my $rev = $branches{$parentPath}{$branchName}{'branchrev'}; | |
# find the most recent branch that is still older than fromRev | |
if( $rev <= $fromRev && $rev > $parentRev ) | |
{ | |
$parentName = $branchName; | |
$parentRev = $rev; | |
} | |
} | |
$parentInfo{'name'} = $parentName; | |
} | |
# child information is not required (it's not used elsewhere), but it | |
# sometimes helps the human-in-the-loop figure out what's going on | |
if( defined( $branches{$parentPath}{$parentName}{'children'} ) ) | |
{ | |
push( @{ $branches{$parentPath}{$parentName}{'children'} }, \%childInfo ); | |
} | |
else | |
{ | |
$branches{$parentPath}{$parentName}{'children'} = [ \%childInfo ]; | |
} | |
} | |
else | |
{ | |
# Somehow we don't yet know about the branch at this location. | |
# This should only happen if we're going backwards in history or somehow skipped revs | |
# For now, print an error and go on... | |
print STDERR "Branch at $path copied from $fromPath which is not a known branch.\n"; | |
print STDERR "\tChild info not recorded\n"; | |
} | |
# Check if path exists in HEAD | |
# If path doesn't exist in HEAD, it's flagged as "deleted" which is | |
# used by the hideFromGit.pl script to move refs to a hidden namespace | |
my $svnPath = $path; | |
$svnPath =~ s{^/}{}; # Trim leading slash | |
my $fileType = $ra->check_path( $svnPath, $svnHead ); | |
my $branchDeleted = $fileType == $SVN::Node::none ? 1 : 0; | |
$branches{$path}{$branchName}{'branchrev'} = $rev; | |
$branches{$path}{$branchName}{'deleted'} = $branchDeleted; | |
$branches{$path}{$branchName}{'parent'} = \%parentInfo; | |
# Print branching information | |
# print STDERR "$fromPath -> $path @ $rev"; | |
# print STDERR " ($parentPath)" if $parentPath ne $fromPath; | |
# print STDERR " (deleted in HEAD)" if $branchDeleted; | |
# print STDERR "\n"; | |
} | |
else # the path doesn't match expected branches, so inform the user | |
{ | |
# find the last rev that actually changed the parent | |
$ra->get_log( $svnFromPath, $fromRev, $end, $limit, $discover_changed_paths, | |
$strict_node_history, \&setParentRev ); | |
$fromRev = $globalParentRev; | |
$globalParentRev = undef; # At least throw a warning if the parent rev isn't found | |
# get the number of files changed in the current rev (svn copy produces | |
# usually just one or two paths, cvs2svn creates lots) | |
$ra->get_log( $paths, $rev, $rev, 1, 1, $strict_node_history, \&setNumChangedFiles ); | |
my $numChangedFiles = $globalNumChangedFiles; | |
$globalNumChangedFiles = undef; # At least throw a warning if not set | |
# These fall into a couple categories: | |
# 1) actual branches/tags that are copied from a subdir (should be mostly gone now) | |
# 2) "merging" (copying) a directory from one branch to another (ignore!) | |
# 3) screwed up branches/tags (usually deleted and redone in nearby revs) (ignore!) | |
# 4) something that looks like a branch, but is a file-by-file copy of a directory contents? | |
# (this is probably an svn copy where the destination already existed, probably ignore as it's basically a merge) | |
print STDERR "Possible branch: $fromPath @ $fromRev -> $path @ $rev ($numChangedFiles files changed)\n"; # a large number of changed files tends to be a cvs2svn artifact | |
print STDERR "\tr" . $rev . ": " . $log_msgs[$rev] . "\n"; | |
print STDERR "\tr" . ($rev+1) . ": " . $log_msgs[$rev+1] . "\n"; | |
print STDERR "\tr" . ($rev+2) . ": " . $log_msgs[$rev+2] . "\n"; | |
} | |
} | |
} | |
elsif( $fileType == $SVN::Node::dir ) | |
{ | |
print STDERR "*** rejected copy from $fromPath to $path @ $rev\n"; | |
} | |
} | |
# Serialize it all for use by a later Perl script | |
print Dumper( \%branches ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
# svn log svn-url | perl fetchSVNNames.pl | |
my %nameHash; | |
while( <> ) | |
{ | |
next unless /^r(\d+)\s\|\s([A-Za-z0-9 ()]+)/; | |
print "$1\n"; | |
$nameHash{$2} = 1; | |
} | |
foreach my $name ( keys( %nameHash ) ) | |
{ | |
print "$name\n"; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Cwd; | |
use IO::File; | |
use Data::Dumper; | |
# install a SIGINT handler just to make canceling a bit easier. | |
# The way this script is put together, it will still require hitting CTRL-C a | |
# couple times in a row (kill all children, then kill this script)... | |
$SIG{'INT'} = sub { | |
# Experimental data implies Perl forwards signals to child processes during | |
# system() calls, so the only time this process will catch SIGINT is if no | |
# children are running, thus it's safe to exit without reaping children... | |
# I think... | |
print "received SIGINT, exiting...\n"; | |
exit(1); | |
}; | |
sub usage | |
{ | |
return "Usage: $0 svnBranchFile\n"; | |
} | |
die usage() unless @ARGV == 1; | |
# Bash script that git will evaluate every commit to translate svn names to | |
# git names (add/modify as necessary, fetchSVNNames.pl can be helpful) | |
my $authorScript = <<EndOfScript | |
if [ "\$GIT_COMMITTER_NAME" = "(no author)" ]; then | |
export GIT_COMMITTER_NAME="nobody" | |
export GIT_AUTHOR_NAME=\$GIT_COMMITTER_NAME | |
export GIT_COMMITTER_EMAIL="none\@none.com" | |
export GIT_AUTHOR_EMAIL=\$GIT_COMMITTER_EMAIL | |
elif [ "\$GIT_COMMITTER_NAME" = "nobody" ]; then | |
export GIT_COMMITTER_NAME="nobody" | |
export GIT_AUTHOR_NAME=\$GIT_COMMITTER_NAME | |
export GIT_COMMITTER_EMAIL="none\@none.com" | |
export GIT_AUTHOR_EMAIL=\$GIT_COMMITTER_EMAIL | |
else | |
echo "Unknown author \$GIT_COMMITTER_NAME"; | |
fi | |
EndOfScript | |
; | |
chomp( $authorScript ); # keep rest of git command on the same line | |
sub do_cmd | |
{ | |
my @args = @_; | |
# For debugging purposes uncomment the next line to print out the shell | |
# command that will be run: | |
# unshift( @args, 'echo' ); | |
# Use the two argument form of system to avoid invoking a shell (see perldoc -f exec) | |
system { $args[0] } @args; | |
# Check output of system | |
if( $? == -1 ) | |
{ | |
print "$0: failed to execute $args[0]: $!\n"; | |
} | |
elsif( $? & 127 ) | |
{ | |
printf( "$0: $args[0] died with signal %d, %s coredump\n", | |
($? & 127), ($? & 128) ? 'with' : 'without' ); | |
} | |
elsif( $? >> 8 ) | |
{ | |
printf( "$0: $args[0] exited with nonzero value %d\n", $? >> 8 ); | |
print "$0: child error message: $!\n"; | |
} | |
} | |
# Load SVN branch history | |
my %svnBranches; | |
# Anonymous block to lexically hide serialized perl structure | |
{ | |
# Load up serialized hash structure | |
my $filename = shift( @ARGV ); | |
my $svnHistoryFile = IO::File->new( $filename ); | |
my $oldSep = $/; | |
$/ = undef; | |
my $svnHistory = <$svnHistoryFile>; | |
$/ = $oldSep; | |
$svnHistoryFile->close(); | |
# Eval into curent scope and store in visible hash | |
# Note: eval is a potential security hole, use wisely! | |
my $tmpRef = eval "my $svnHistory"; | |
die "Loading svn history failed: $@\n" unless defined( $tmpRef ); | |
%svnBranches = %$tmpRef; | |
} | |
my $parentRepo = "/path/to/massive/git/repo/from/svn-fe"; | |
# MacOS Ram Disk: diskutil erasevolume HFS+ "ramdisk" `hdiutil attach -nomount ram://1165430` | |
# Linux: use tmpfs | |
# Windows: ??? | |
my $tempdir = "/Volumes/ramdisk/git"; | |
my $startDir = &Cwd::cwd(); | |
my $svnRepoName = "repoName-given-to-svn-fe"; # for building revmaps | |
my $workDir = "$svnRepoName-git-repos"; | |
my $cleanDir = "$svnRepoName-git-repos-small"; | |
# our svn layout is /trunk/project, so some operations need to know project name | |
my $project = "insert-project-name-here"; | |
my @emptyPaths; | |
my @missingBranchRevs; | |
my $totalPaths = keys( %svnBranches ); | |
my $curPathNum = 1; | |
foreach my $path ( keys( %svnBranches ) ) | |
{ | |
my $repoName = $path; | |
$repoName =~ s{^/}{}; # remove leading slash | |
my $gitPath = "$path/$project"; # kinda hackish to pull out project subdir | |
$gitPath =~ s{^/}{}; # remove leading slash | |
my $curTempDir = "$tempdir"; # could specialize this per branch, but doesn't seem to be necessary | |
my $branchRepo = "$workDir/$repoName.git"; | |
# helpful status | |
print "Begin repo $repoName ($curPathNum/$totalPaths)\n"; | |
$curPathNum++; | |
# No this script doesn't use Git.pm. I ran into too many problems | |
# attempting to execute simple Git commands (and I don't remember them | |
# now), so I just shell out to git... | |
# git clone --bare parentRepo dirRepo | |
do_cmd( qw( git clone --bare ), $parentRepo, $branchRepo ); | |
# cd dirRepo | |
chdir( $branchRepo ); | |
# git filter-branch --env-filter authorCmd --subdirectory-filter $dir -d tempdir -- --all | |
$? = 0; # dangerous, but we don't want errors from previous commands getting flagged as filter-branch errors | |
do_cmd( qw( git filter-branch --env-filter ), $authorScript, | |
'--subdirectory-filter', $gitPath, '-d', $curTempDir, '--', '--all' ); | |
# detect empty repository | |
if( $? >> 8 == 1 ) # filter-branch exits 1 when nothing matches the subdirectory filter | |
{ | |
push( @emptyPaths, $path ); | |
print "Cleaning up empty repo: $repoName\n"; | |
chdir( $startDir ); | |
do_cmd( qw( rm -rf ), $branchRepo ); | |
$? = 0; | |
print "\n"; # blank line before starting next branch | |
next; # continue to next path | |
} | |
$? = 0; | |
# Don't need to do any ref translations for the trunk | |
unless( $path eq '/trunk' ) | |
{ | |
# Create a revmap since git object ids have changed | |
my $gitLogString = `git log --reverse`; # reverse causes commits to be listed in oldest-first order | |
my @gitLogLines = split( /\n/, $gitLogString ); | |
my ( $sha, $rev, @svnRevs, @gitCommits, %rev2idx ); | |
my $revIdx = 0; | |
foreach my $line ( @gitLogLines ) | |
{ | |
# process it similar to creating revmaps from the command line | |
# there are more efficient ways, but this logic has already been tested | |
$sha = $1 if $line =~ /^commit\s([0-9a-fA-F]+)/; | |
if( $line =~ /$svnRepoName@(\d+)/ ) | |
{ | |
$rev = $1; | |
push( @svnRevs, $rev ); | |
$rev2idx{$rev} = $revIdx++; | |
push( @gitCommits, $sha ); | |
} | |
} | |
# Translate svn branches/tags to git branches/tags | |
print "Converting svn branches/tags to git refs\n"; | |
my @branches = keys( %{ $svnBranches{$path} } ); | |
# sort the branches based on svn creation rev | |
@branches = sort { $svnBranches{$path}{$a}{'branchrev'} <=> | |
$svnBranches{$path}{$b}{'branchrev'} } @branches; | |
for( my $b = 0; $b < @branches; $b++ ) | |
{ | |
my $branch = $branches[$b]; | |
if( !defined( $rev2idx{ $svnBranches{$path}{$branch}{'branchrev'} } ) ) | |
{ | |
# if the branch didn't make it through the subdirectory filter, don't create a branch/tag for it | |
print "\tBranch $branch not found in current revmap, skipping...\n"; | |
push( @missingBranchRevs, $branch ); | |
next; | |
} | |
my $refObj; | |
if( ($b+1) < @branches ) | |
{ | |
# Another branch/tag follows this one at the same path. Therefore the | |
# current branch needs to point to the commit object just before the | |
# new branch is created. | |
my $nextBranchRev = $svnBranches{$path}{$branches[$b+1]}{'branchrev'}; # svn rev when the next branch/tag is created | |
my $nextBranchIdx = $rev2idx{$nextBranchRev}; # index into gitCommits that next branch is created | |
my $curBranchIdx = $nextBranchIdx - 1; # one git commit before new branch/tag is created | |
$refObj = $gitCommits[$curBranchIdx]; | |
} | |
else | |
{ | |
$refObj = $gitCommits[-1]; # branch/tag goes all the way to most recent rev touching this path | |
} | |
# Create git branch or tag | |
if( $branch =~ /^tags/ ) | |
{ | |
my $tagName = $branch; | |
$tagName =~ s{^tags/}{}; # git tags are independent of branches, so don't need leading tags/ | |
# Set up environment variables so tag object is created with the correct metadata | |
my ($committerName, $committerEmail, $committerDate, $logMsg); | |
my $gitVars = `git log -1 --pretty=format:'"%cn" "%ce" "%cD" "%B"'`; | |
(($committerName, $committerEmail, $committerDate, $logMsg) = ($gitVars =~ /"(.+?)"/sg)) || | |
die "Unable to parse log output: $gitVars\n"; | |
$ENV{'GIT_COMMITTER_NAME'} = "$committerName"; | |
$ENV{'GIT_COMMITTER_EMAIL'} = "$committerEmail"; | |
$ENV{'GIT_COMMITTER_DATE'} = "$committerDate"; | |
$ENV{'GIT_AUTHOR_NAME'} = $ENV{'GIT_COMMITTER_NAME'}; # probably overkill, but set 'em all to be sure | |
$ENV{'GIT_AUTHOR_EMAIL'} = $ENV{'GIT_COMMITTER_EMAIL'}; | |
$ENV{'GIT_AUTHOR_DATE'} = $ENV{'GIT_COMMITTER_DATE'}; | |
print "\tTagging $tagName from $refObj\n"; | |
do_cmd( qw( git tag -a -m ), $logMsg, $tagName, $refObj ); | |
} | |
else | |
{ | |
print "\tCreating branch $branch from $refObj\n"; | |
do_cmd( qw( git branch ), $branch, $refObj ); | |
} | |
} | |
} | |
#cd .. | |
chdir( $startDir ); | |
# The slow way to remove dead objects (but doesn't require another directory, also saves space by packing objects) | |
# rm -r .git/refs/original/ | |
# git reflog expire --expire=now --all | |
# git gc --aggressive | |
# git prune | |
# The fast way | |
# clone treats a file URL as a URL and thus doesn't make hard links | |
do_cmd( qw( git clone --bare ), "file:///$startDir/$branchRepo", "$startDir/$cleanDir/$repoName.git" ); | |
print "\n"; # blank line before starting next branch | |
} | |
print "The following are branches with missing initial revs:\n"; | |
print Dumper( \@missingBranchRevs ); | |
print "\nThe following are branches that produced empty repositories in git:\n"; | |
print Dumper( \@emptyPaths ); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
# Usage: grep *.revmap | perl genJointRevMap.pl > joint.revmap | |
# Or: find . -name "*.revmap" -exec grep . '{}' + | genJointRevmap.pl > joint.revmap | |
my @revs; | |
while( <> ) | |
{ | |
if( m{(?:\./)?([\w.\-/]+?).revmap:(\d+)\s+([0-9a-fA-F]+)} ) | |
{ | |
my $repo = $1; | |
my $rev = $2; | |
my $sha = $3; | |
if( $revs[$rev] ) | |
{ | |
# Two (or more) commits have the same rev touching them | |
my $old_rev = $revs[$rev]; | |
$old_rev =~ s/^\d+\s(?:multi-repo\s)?//; # strip rev number | |
$revs[$rev] = "$rev\tmulti-repo\t$old_rev\t$repo\t$sha"; | |
} | |
else | |
{ | |
$revs[$rev] = "$rev\t$repo\t$sha"; | |
} | |
} | |
} | |
foreach my $rev ( @revs ) { print "$rev\n" if $rev; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Cwd; | |
use IO::File; | |
use Data::Dumper; | |
# Note: revmapDestDir/gitRepo.revmap must exist. Thus if gitRepo contains | |
# directories (path/to/gitRepo), then destDir/path/to/gitRepo must exist | |
sub usage | |
{ | |
return "Usage: $0 gitRepo svnRepoName revmapDestDir\n"; | |
} | |
die usage() unless @ARGV == 3; | |
my $gitRepo = shift( @ARGV ); | |
$gitRepo =~ s{/\s*$}{}; # strip trailing slash and whitespace | |
my $svnRepoName = shift( @ARGV ); | |
my $destDir = shift( @ARGV ); | |
my $startDir = Cwd::cwd(); | |
chdir( $gitRepo ); | |
my $gitLog = `git log --all --date-order`; # note: multiple commit objects can have the same svn rev | |
chdir( $startDir ); | |
my @results = $gitLog =~ /^commit\s([0-9a-fA-F]{40,40}) # git commit ID | |
.+? # git commit details | |
^\s+git-svn-id:\s$svnRepoName@(\d+)/msxg; # svn rev number | |
my $fh = new IO::File( ">$destDir/$gitRepo.revmap" ); | |
for( my $i = 0; $i < @results; $i+=2 ) | |
{ | |
# svnRev (tab) gitSHA | |
print $fh "$results[$i+1]\t$results[$i]\n"; | |
} | |
$fh->close(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use SVN::Core; | |
use SVN::Ra; | |
use Git; | |
use IO::File; | |
use IO::Pty::Easy; | |
use Data::Dumper; | |
# Concept: use git patch-id to verify that diffs in SVN were correctly | |
# translated to Git (note that the git diff is dependent on history, so | |
# this also checks that parentage was handled correctly). Both VCSs are | |
# forced to use the system diff for consistency (the two algoritms are | |
# slightly different). In our repo, this script does find differences, | |
# but almost all of them can be explained. The tricky one to watch for | |
# is a SVN commit that changes multiple branches -- this will almost | |
# always fail because I didn't build jointRevmap concepts into it... | |
# Usage: gitValidation.pl svnBranches.txt nonJointRevmap | |
# (This script was written before I got in the habit of | |
# generating joint revmaps for a single repository, so | |
# it just deals with the fact that some svn revs get clobbered) | |
die "Must supply svn history file and revmap\n" unless @ARGV == 2; | |
# Load SVN branch history | |
my %svnBranchHistory; | |
# Anonymous block to lexically hide serialized perl structure | |
{ | |
# Load up serialized hash structure | |
my $filename = shift( @ARGV ); | |
my $svnHistoryFile = new IO::File( $filename ); | |
my $oldSep = $/; | |
$/ = undef; | |
my $svnHistory = <$svnHistoryFile>; | |
$/ = $oldSep; | |
$svnHistoryFile->close(); | |
# Eval into curent scope and store in visible hash | |
my $tmpRef = eval "my $svnHistory"; | |
die "Loading svn history failed: $@\n" unless defined( $tmpRef ); | |
%svnBranchHistory = %$tmpRef; | |
} | |
# Load revmap | |
my @revmap; | |
# Anonymous block to lexically hide loading stuff | |
{ | |
my $filename = shift( @ARGV ); | |
my $revmapFile = new IO::File( $filename ); | |
while( my $line = <$revmapFile> ) | |
{ | |
if( $line =~ m#^(\d+)\smulti-repo# ) | |
{ | |
my $rev = $1; | |
my %repos; | |
while( $line =~ m#([\w.\-/]+?)\s([0-9a-fA-F]{40,40})#g ) | |
{ | |
# path => sha | |
$repos{$1} = $2; | |
} | |
$revmap[$rev] = \%repos; # later this will trip a manual graft | |
} | |
elsif( $line =~ m#^(\d+).+?([0-9a-fA-F]{40,40})# ) | |
{ | |
my $rev = $1; | |
my $sha = $2; | |
$revmap[$rev] = $sha; # TODO this is clobbering the old rev if multiple shas go to a single rev | |
} | |
} | |
$revmapFile->close(); | |
} | |
my %globalChangedFiles; | |
sub findChangedPaths | |
{ | |
my $pathHash = shift; # { full path => _p_svn_log_changed_path_t object } | |
my $rev = shift; | |
my $author = shift; | |
my $date = shift; | |
my $logmsg = shift; | |
my $pool = shift; # svn pool object | |
foreach my $path ( %$pathHash ) | |
{ | |
# Cache the action because later SVN::Ra calls will munge the memory | |
$globalChangedFiles{$path} = $pathHash->{$path}->action() if defined $pathHash->{$path}; | |
} | |
} | |
# svnURL can be just about any svn URL, but the process is a lot faster | |
# if you have a local mirror | |
my $svnURL = 'file:///path/to/svn/repo'; | |
my $ra = new SVN::Ra( $svnURL ); | |
my $gitRepo = 'repo-produced-by-repoFusion.pl'; | |
$ENV{'GIT_EXTERNAL_DIFF'} = "/path/to/mygitdiff.sh"; # force git to use the system diff (for svn use --diff-cmd) | |
$ENV{'GIT_DIR'} = $gitRepo; | |
# Psuedo-TTY (pty) for handling git-patch-id (need a pty to get around output buffering) | |
# (this was seriously broken in my Git.pm, so use IO::Pty::Easy instead) | |
my @gitPatchIDArgs = ( '/path/to/git', 'patch-id' ); | |
my $git = $gitPatchIDArgs[0]; | |
my $gitPatchID = new IO::Pty::Easy; | |
$gitPatchID->spawn( @gitPatchIDArgs ); | |
my $totalIter = 500; # number of revs to test | |
my $maxRev = $ra->get_latest_revnum(); | |
for( my $iter = 0; $iter < $totalIter; $iter++ ) | |
{ | |
my $rev = int( rand( $maxRev ) ) + 1; | |
print "r$rev: "; | |
print "\t" if $rev < 10000; | |
my $limit = 1; | |
$ra->get_log( '', $rev, $rev, $limit, 1, 0, \&findChangedPaths ); | |
my %changedFiles = %globalChangedFiles; | |
%globalChangedFiles = (); | |
my $gitSHA = $revmap[$rev]; | |
unless( defined( $gitSHA ) ) | |
{ | |
print "not found in revmap, skipping...\n"; | |
$iter--; | |
next; | |
} | |
if( ref( $gitSHA ) eq 'HASH' ) | |
{ | |
# TODO we won't get here because the revmap isn't a joint revmap, | |
# just a single repo revmap with multiple sha's per svn rev | |
# dereference multi repo commits to the right sha | |
die "multi-repo commit $rev\n"; | |
} | |
my $allFilesMatch = 1; | |
my @skippedCreation; | |
my @skippedOutOfScope; | |
my @skippedBinary; | |
foreach my $file ( keys( %changedFiles ) ) | |
{ | |
$file =~ s{^/}{}; # svn doesn't like leading / | |
# skip directories and other non-file nodes | |
unless( $ra->check_path( $file, $rev ) == $SVN::Node::file ) | |
{ | |
push( @skippedOutOfScope, "/$file" ); | |
next; | |
} | |
unless( $changedFiles{"/$file"} eq 'M' ) | |
{ | |
# TODO check git for "added file" and potentially diff files | |
push( @skippedCreation, "/$file" ); | |
next; | |
} | |
unless( $file =~ m{^.+?/$project/} ) | |
{ | |
# skip files outside desired path | |
push( @skippedOutOfScope, "/$file" ); | |
next; | |
} | |
# need @rev because svn isn't consistent about when in time it extracts file information | |
my $svnDiff = `svn diff --diff-cmd diff -c $rev $svnURL/$file\@$rev`; | |
unless( $svnDiff ) | |
{ | |
# In very rare cases SVN will return an empty diff, just skip the file | |
push( @skippedOutOfScope, "/$file" ); | |
next; | |
} | |
if( $svnDiff =~ /Cannot display: file marked as a binary type/ ) | |
{ | |
push( @skippedBinary, "/$file" ); | |
next; | |
} | |
$svnDiff =~ s{^Index:.+?=+$}{diff --git}ms; # remove svn header | |
$svnDiff =~ s{\s+\(revision \d+\)}{}msg; # remove svn revision from files | |
# get the git diff | |
my $gitFile = $file; | |
$gitFile =~ s{.+?/$project/(.+)}{$1}; # just strip the trunk/$project part | |
my $gitDiff = `$git diff $gitSHA^..$gitSHA -- $gitFile`; | |
$gitDiff = "diff --git\n" . $gitDiff; | |
$gitDiff =~ s{^---.+?_([^/\s]+).*?$}{--- $1}ms; # make src filename look like svn | |
$gitDiff =~ s{^\+\+\+.+?_([^/\s]+).*?$}{+++ $1}ms; # make dest filename look like svn | |
$gitPatchID->write( "$svnDiff\n" ); | |
my $svnPatchID = $gitPatchID->read( 0.5 ); # small but non-zero timeout seems to work | |
die "git patch-id didn't return svn patch ID for $file\@$rev" unless defined $svnPatchID; | |
$svnPatchID =~ s/^([0-9a-fA-F]{40,40})\s+.+\s+/$1/; | |
$gitPatchID->write( "$gitDiff\n" ); | |
my $gitPatchID = $gitPatchID->read( 0.5 ); | |
die "git patch-id didn't return git patch ID for $gitFile\@$gitSHA" unless defined $gitPatchID; | |
$gitPatchID =~ s/^([0-9a-fA-F]{40,40})\s+.+\s+/$1/; | |
unless( $svnPatchID eq $gitPatchID ) | |
{ | |
print "\n\t$file: diffs don't match"; | |
$allFilesMatch = 0; | |
# die; | |
} | |
} | |
if( $allFilesMatch ) | |
{ | |
my $total = keys( %changedFiles ); | |
my $numSkipped = 0; | |
$numSkipped += @skippedCreation if @skippedCreation > 0; | |
$numSkipped += @skippedOutOfScope if @skippedOutOfScope > 0; | |
unless( $numSkipped > 0.8 * $total ) # a reasonable percentage of the commit was checked | |
{ | |
print "ok"; | |
print " (skipped $numSkipped/$total)" if $numSkipped > 0; | |
} | |
else | |
{ | |
print "skipped "; | |
if( $numSkipped == $total ) | |
{ | |
print "all "; | |
} | |
else | |
{ | |
print "most "; | |
} | |
print "files ($numSkipped/$total), trial doesn't count"; | |
$iter--; | |
} | |
} | |
print "\n"; | |
} | |
$gitPatchID->close(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use IO::File; | |
# The concept of this script is to move git refs based on deleted svn | |
# paths to a hidden namespace. In our history this comes from retagging | |
# reusing branch names, or just flat out deleting directories from svn. | |
# | |
# Since svn merge information has not been captured by git, if these refs | |
# are deleted git gc would remove the objects they point to. We don't | |
# want that (though it is personal preference), so we move all these refs | |
# to refs/hidden/heads or refs/hidden/tags. These refs remain in the | |
# central repository (git gc will not remove any objects they point to), | |
# but are not cloned (though they can be explicitly fetched), so they're | |
# there when you need, but not when you don't. | |
# Usage: hideFromGit.pl svnBranches.txt | |
die "Must supply svn history file\n" unless @ARGV == 1; | |
# Load SVN branch history | |
my %svnBranches; | |
# Anonymous block to lexically hide serialized perl structure | |
{ | |
# Load up serialized hash structure | |
my $filename = shift( @ARGV ); | |
my $svnHistoryFile = new IO::File( $filename ); | |
my $oldSep = $/; | |
$/ = undef; | |
my $svnHistory = <$svnHistoryFile>; | |
$/ = $oldSep; | |
$svnHistoryFile->close(); | |
# Eval into curent scope and store in visible hash | |
my $tmpRef = eval "my $svnHistory"; | |
die "Loading svn history failed: $@\n" unless defined( $tmpRef ); | |
%svnBranches = %$tmpRef; | |
} | |
my $gitRepoName = "repo-produced-by-repoFusion.pl"; | |
my $cleanDir = "$gitRepoName-git-repos-small"; | |
my $finalRepo = "$cleanDir/$gitRepoName.git"; | |
# read packed-refs if it exists | |
my $packedRefsFile = new IO::File( "$finalRepo/packed-refs" ); | |
my $packedRefs = ''; | |
if( defined( $packedRefs ) ) | |
{ | |
my $oldSep = $/; | |
$/ = undef; # slurp entire file at once | |
$packedRefs = <$packedRefsFile>; | |
$/ = $oldSep; | |
$packedRefsFile->close(); | |
# just to be safe | |
`cp $finalRepo/packed-refs $finalRepo/backup-packed-refs`; | |
} | |
foreach my $path ( keys( %svnBranches ) ) | |
{ | |
foreach my $branch ( keys( %{ $svnBranches{$path} } ) ) | |
{ | |
next unless $svnBranches{$path}{$branch}{'deleted'}; | |
# branches are in refs/heads/branchName, tags are refs/tags/tagName | |
my $branchName = $branch; | |
unless( $branchName =~ m{^tags/} ) | |
{ | |
$branchName = "heads/$branch"; | |
} | |
if( -e "$finalRepo/refs/$branchName" ) | |
{ | |
unless( -e "$finalRepo/refs/hidden/$branchName" ) | |
{ | |
rename( "$finalRepo/refs/$branchName", | |
"$finalRepo/refs/hidden/$branchName" ); | |
} | |
else | |
{ | |
print STDERR "Unable to hide $branchName, another ref already exists\n"; | |
} | |
} | |
elsif( $packedRefs =~ s{refs/$branchName$}{refs/hidden/$branchName}m ) | |
{ | |
# only get here if substitution succeeded, so nothing more to do | |
} | |
else | |
{ | |
print STDERR "Unable to hide $branchName (probably doesn't exist in git repo?)\n"; | |
} | |
} | |
} | |
if( $packedRefs ) | |
{ | |
my $packedRefsFile = new IO::File( ">$finalRepo/packed-refs" ); | |
die $! unless defined( $packedRefsFile ); | |
print $packedRefsFile $packedRefs; | |
$packedRefsFile->close(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Wrapper for git to use the system diff | |
LEFT=$2 | |
RIGHT=$5 | |
diff -u $LEFT $RIGHT | |
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Cwd; | |
use IO::File; | |
use Data::Dumper; | |
# install a SIGINT handler just to make canceling a bit easier. | |
# The way this script is put together, it will still require hitting CTRL-C a | |
# couple times in a row (kill all children, then kill this script)... | |
$SIG{'INT'} = sub { | |
# Experimental data implies Perl forwards signals to child processes during | |
# system() calls, so the only time this process will catch SIGINT is if no | |
# children are running, thus it's safe to exit without reaping children... | |
# I think... | |
print "received SIGINT, exiting...\n"; | |
exit(1); | |
}; | |
sub usage | |
{ | |
return "Usage: $0 svnBranchFile jointRevmapFile\n"; | |
} | |
die usage() unless @ARGV == 2; | |
sub do_cmd | |
{ | |
my @args = @_; | |
# For debugging purposes uncomment the next line to print out the shell | |
# command that will be run: | |
# unshift( @args, 'echo' ); | |
# Use the two argument form of system to avoid invoking a shell (see perldoc -f exec) | |
system { $args[0] } @args; | |
# Check output of system | |
if( $? == -1 ) | |
{ | |
print "$0: failed to execute $args[0]: $!\n"; | |
} | |
elsif( $? & 127 ) | |
{ | |
printf( "$0: $args[0] died with signal %d, %s coredump\n", | |
($? & 127), ($? & 128) ? 'with' : 'without' ); | |
} | |
elsif( $? >> 8 ) | |
{ | |
printf( "$0: $args[0] exited with nonzero value %d\n", $? >> 8 ); | |
print "$0: child error message: $!\n"; | |
} | |
} | |
# Load SVN branch history | |
my %svnBranches; | |
# Anonymous block to lexically hide serialized perl structure | |
{ | |
# Load up serialized hash structure | |
my $filename = shift( @ARGV ); | |
my $svnHistoryFile = new IO::File( $filename ); | |
my $oldSep = $/; | |
$/ = undef; | |
my $svnHistory = <$svnHistoryFile>; | |
$/ = $oldSep; | |
$svnHistoryFile->close(); | |
# Eval into curent scope and store in visible hash | |
my $tmpRef = eval "my $svnHistory"; | |
die "Loading svn history failed: $@\n" unless defined( $tmpRef ); | |
%svnBranches = %$tmpRef; | |
} | |
# Load revmap | |
my @revmap; | |
# Anonymous block to lexically hide loading stuff | |
{ | |
my $filename = shift( @ARGV ); | |
my $revmapFile = new IO::File( $filename ); | |
while( my $line = <$revmapFile> ) | |
{ | |
if( $line =~ m#^(\d+)\smulti-repo# ) | |
{ | |
my $rev = $1; | |
my %repos; | |
while( $line =~ m#([\w.\-/]+?)\s([0-9a-fA-F]{40,40})#g ) | |
{ | |
# path => sha | |
$repos{$1} = $2; | |
} | |
$revmap[$rev] = \%repos; # later this will trip some custom logic | |
} | |
elsif( $line =~ m#^(\d+).+?([0-9a-fA-F]{40,40})# ) | |
{ | |
my $rev = $1; | |
my $sha = $2; | |
$revmap[$rev] = $sha; | |
} | |
} | |
$revmapFile->close(); | |
} | |
# MacOS Ram Disk: diskutil erasevolume HFS+ "ramdisk" `hdiutil attach -nomount ram://1165430` | |
# Linux: use tmpfs | |
# Windows: ??? | |
mmy $tempdir = "/Volumes/ramdisk/git"; | |
my $startDir = &Cwd::cwd(); | |
# Really should take output from filterBranch.pl to determine where all the | |
# repos are, but it's a lot easier to just keep the variables the same and hope | |
# for the best. | |
my $svnRepoName = "svnRepoName-used-in-filterBranch"; | |
my $workDir = "$svnRepoName-git-repos"; | |
my $cleanDir = "$svnRepoName-git-repos-small"; | |
# Similar to filterBranch.pl: No, this script doesn't use Git.pm. I ran into | |
# too many problems with it (I don't remember the specifics), so it ended up | |
# being easier to shell out to git instead... | |
# Start with the trunk | |
my $trunkRepo = "$cleanDir/trunk.git"; | |
my $fusionRepo = "$workDir/$svnRepoName.git"; | |
do_cmd( qw( git clone --bare ), $trunkRepo, $fusionRepo ); | |
chdir( $fusionRepo ); | |
mkdir( "info" ) unless -e "info"; | |
my $graftFile = new IO::File( ">info/working_grafts" ); # git tries to read the grafts during the fetches, so keep them out of the way | |
die $! unless defined( $graftFile ); | |
# Pull branches/tags from small repos | |
my $totalPaths = keys( %svnBranches ); | |
my $curPathNum = 2; # already did 1 ;) | |
my $graftFixupReqd = 0; | |
foreach my $path ( keys( %svnBranches ) ) | |
{ | |
next if $path eq '/trunk'; | |
unless( $curPathNum % 150 ) | |
{ | |
# my system eventually starts complaining about "too many open files" | |
# so run git gc occasionally to clean up everything | |
# (150 is pretty arbitrary) | |
print "Running git gc...\n"; | |
do_cmd( qw( git gc ) ); | |
print "\n"; | |
} | |
my $repoName = $path; | |
$repoName =~ s{^/}{}; # remove leading slash | |
if( -e $branchRepo ) | |
{ | |
print "Fetching from repo $repoName ($curPathNum/$totalPaths)\n"; | |
$curPathNum++; | |
my $remoteRefs = `git ls-remote $branchRepo`; | |
my @refs; | |
my $grafts; | |
foreach my $branch ( keys( %{ $svnBranches{$path} } ) ) | |
{ | |
my $branchName = $branch; | |
# branches are in refs/heads/branchName, tags are refs/tags/tagName | |
unless( $branchName =~ m{^tags/} ) | |
{ | |
$branchName = "heads/$branch"; | |
} | |
# Skip branch if remote repo doesn't have a matching ref | |
next unless $remoteRefs =~ m{refs/$branchName$}m; | |
# Add branch to list of refs to fetch from remote | |
push( @refs, "refs/$branchName:refs/$branchName" ); | |
# Create grafts | |
# Grab revs of interest | |
my $childRev = $svnBranches{$path}{$branch}{'branchrev'}; | |
my $parentRev = $svnBranches{$path}{$branch}{'parent'}{'rev'}; | |
my $childSHA = $revmap[$childRev]; | |
if( !defined( $childSHA ) ) | |
{ | |
print "\tUndefined child rev r$childRev in revmap\n"; | |
$graftFixupReqd = 1; | |
$childSHA = "* r$childRev @ $path ($svnBranches{$path}{$branch}{'parent'}{'name'})"; | |
} | |
elsif( ref( $childSHA ) eq 'HASH' ) | |
{ | |
# The child comes from an svn rev that touches multiple git repos | |
# The revmap should have enough information in it to resolve the | |
# ambiguity, so just do it. | |
$childSHA = $childSHA->{"$repoName.git"}; | |
} | |
my $parentSHA = $revmap[$parentRev]; | |
if( !defined( $parentSHA ) ) | |
{ | |
print "\tUndefined parent rev r$parentRev in revmap\n"; | |
$graftFixupReqd = 1; | |
$parentSHA = "* r$parentRev @ $svnBranches{$path}{$branch}{'parent'}{'path'} ($branch)"; | |
} | |
elsif( ref( $parentSHA ) eq 'HASH' ) | |
{ | |
# The parent comes from an svn rev that touches multiple git repos | |
# The revmap should have enough information in it to resolve the | |
# ambiguity, so just do it. | |
my $parentRepoName = $svnBranches{$path}{$branch}{'parent'}{'path'}; | |
$parentRepoName =~ s{^/}{}; # remove leading slash | |
$parentSHA = $parentSHA->{"$parentRepoName.git"}; | |
} | |
$grafts .= "$childSHA $parentSHA\n"; | |
} | |
do_cmd( qw( git fetch ), $branchRepo, @refs ); | |
if( defined( $grafts ) ) | |
{ | |
print "Adding grafts...\n"; | |
print $graftFile $grafts; | |
} | |
print "\n"; # blank line before starting next branch | |
} | |
else | |
{ | |
print "Skipping non-existant repo $repoName ($curPathNum/$totalPaths)\n\n"; | |
$curPathNum++; | |
} | |
} | |
$graftFile->close(); | |
if( $graftFixupReqd ) | |
{ | |
# pause here and let the user fix the manual grafts | |
print "Manual graft fixing is required. Please edit $fusionRepo/info/working_grafts before continuing\n"; | |
print "Press Enter to continue..."; | |
<>; | |
print "\n"; | |
} | |
rename( "info/working_grafts", "info/grafts" ); | |
# filter-branch to commit grafts | |
# Have to specify "tag-name-filter cat" to enable tags to be modified to point | |
# to new objects | |
print "Committing grafts to permant history...\n"; | |
do_cmd( qw( git filter-branch --tag-name-filter cat ), '-d', $tempdir, '--', '--all' ); | |
do_cmd( qw( rm -rf refs/original ) ); # work around a bug in clone that breaks tags if refs/original exists | |
print "\n"; | |
#cd .. | |
chdir( $startDir ); | |
# clone to file:/// repo to remove any cruft | |
# clone treats a file URL as a URL and thus doesn't make hard links | |
do_cmd( qw( git clone --bare ), "file:///$startDir/$fusionRepo", "$startDir/$cleanDir/$svnRepoName.git" ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment