Skip to content

Instantly share code, notes, and snippets.

@bos
Created March 14, 2012 03:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bos/2033956 to your computer and use it in GitHub Desktop.
Save bos/2033956 to your computer and use it in GitHub Desktop.
5x to 8x speedup in "hg convert"
# HG changeset patch
# User Bryan O'Sullivan <bos@serpentine.com>
# Date 1331697209 25200
# Branch stable
# Node ID 9c15f20c0418fbad1da202f72dc894372538beba
# Parent 6344043924497cd06d781d9014c66802285072e4
imported patch libgit2.patch
diff -r 634404392449 -r 9c15f20c0418 hgext/convert/git.py
--- a/hgext/convert/git.py Sun Jan 01 13:37:30 2012 -0600
+++ b/hgext/convert/git.py Tue Mar 13 20:53:29 2012 -0700
@@ -12,7 +12,22 @@
from common import NoRepo, commit, converter_source, checktool
-class convert_git(converter_source):
+class convert_git_base(converter_source):
+ def __init__(self, ui, path, rev):
+ super(convert_git_base, self).__init__(ui, path, rev=rev)
+
+ if os.path.isdir(path + "/.git"):
+ path += "/.git"
+ if not os.path.exists(path + "/objects"):
+ raise NoRepo(_("%s does not look like a Git repository") % path)
+
+ checktool('git', 'git')
+ self.path = path
+
+ # pygit2 0.16 has no support for diffing, so we have to shell out
+ # to git for diffs. the methods below would otherwise belong in
+ # convert_git_plain.
+
# Windows does not support GIT_DIR= construct while other systems
# cannot remove environment variable. Just assume none have
# both issues.
@@ -39,47 +54,6 @@
else:
return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb')
- def gitread(self, s):
- fh = self.gitopen(s)
- data = fh.read()
- return data, fh.close()
-
- def __init__(self, ui, path, rev=None):
- super(convert_git, self).__init__(ui, path, rev=rev)
-
- if os.path.isdir(path + "/.git"):
- path += "/.git"
- if not os.path.exists(path + "/objects"):
- raise NoRepo(_("%s does not look like a Git repository") % path)
-
- checktool('git', 'git')
-
- self.path = path
-
- def getheads(self):
- if not self.rev:
- heads, ret = self.gitread('git rev-parse --branches --remotes')
- heads = heads.splitlines()
- else:
- heads, ret = self.gitread("git rev-parse --verify %s" % self.rev)
- heads = [heads[:-1]]
- if ret:
- raise util.Abort(_('cannot retrieve git heads'))
- return heads
-
- def catfile(self, rev, type):
- if rev == hex(nullid):
- raise IOError()
- data, ret = self.gitread("git cat-file %s %s" % (type, rev))
- if ret:
- raise util.Abort(_('cannot read %r object at %s') % (type, rev))
- return data
-
- def getfile(self, name, rev):
- data = self.catfile(rev, "blob")
- mode = self.modecache[(name, rev)]
- return data, mode
-
def getchanges(self, version):
self.modecache = {}
fh = self.gitopen("git diff-tree -z --root -m -r %s" % version)
@@ -106,6 +80,105 @@
raise util.Abort(_('cannot read changes in %s') % version)
return (changes, {})
+ def getchangedfiles(self, version, i):
+ changes = []
+ if i is None:
+ fh = self.gitopen("git diff-tree --root -m -r %s" % version)
+ for l in fh:
+ if "\t" not in l:
+ continue
+ m, f = l[:-1].split("\t")
+ changes.append(f)
+ else:
+ fh = self.gitopen('git diff-tree --name-only --root -r %s "%s^%s" --'
+ % (version, version, i + 1))
+ changes = [f.rstrip('\n') for f in fh]
+ if fh.close():
+ raise util.Abort(_('cannot read changes in %s') % version)
+
+ return changes
+
+def hexoid(obj):
+ # pygit2's "hex" property is unicode, but "oid" is str
+ return obj.oid.encode('hex')
+
+class convert_git_pygit2(convert_git_base):
+ def __init__(self, ui, path, rev):
+ super(convert_git_pygit2, self).__init__(ui, path, rev=rev)
+ import pygit2
+ self.repo = pygit2.init_repository(path, True)
+
+ def getheads(self):
+ if not self.rev:
+ return [hexoid(self.repo.lookup_reference(r).resolve())
+ for r in self.repo.listall_references()
+ if (r.startswith('refs/heads/') or
+ r.startswith('refs/remotes/'))]
+ else:
+ try:
+ return [hexoid(repo.lookup_reference(self.rev).resolve())]
+ except KeyError:
+ return [hexoid(repo[self.rev])]
+
+ def getcommit(self, rev):
+ def prettyname(sig):
+ if sig.name:
+ return self.recode('%s <%s>' % (sig.name, sig.email))
+ return self.recode(sig.email)
+
+ c = self.repo[rev.decode('hex')]
+ message = c.message
+ author = prettyname(c.author)
+ committer = prettyname(c.committer)
+ if author != committer:
+ message += '\ncommitter: %s\n' % committer
+ return commit(parents=[hexoid(p) for p in c.parents],
+ date='%s %s' % (c.commit_time, c.commit_time_offset),
+ author=author, desc=self.recode(message), rev=rev)
+
+ hex_nullid = hex(nullid)
+
+ def getfile(self, name, rev):
+ if rev == self.hex_nullid:
+ raise IOError
+ return self.repo[rev.decode('hex')].data, self.modecache[(name,rev)]
+
+ def gettags(self):
+ return dict((r.split('/',2)[-1],
+ hexoid(self.repo.lookup_reference(r).resolve()))
+ for r in self.repo.listall_references()
+ if r.startswith('refs/tags/'))
+
+class convert_git_plain(convert_git_base):
+ def gitread(self, s):
+ fh = self.gitopen(s)
+ data = fh.read()
+ return data, fh.close()
+
+ def getheads(self):
+ if not self.rev:
+ heads, ret = self.gitread('git rev-parse --branches --remotes')
+ heads = heads.splitlines()
+ else:
+ heads, ret = self.gitread("git rev-parse --verify %s" % self.rev)
+ heads = [heads[:-1]]
+ if ret:
+ raise util.Abort(_('cannot retrieve git heads'))
+ return heads
+
+ def catfile(self, rev, type):
+ if rev == hex(nullid):
+ raise IOError()
+ data, ret = self.gitread("git cat-file %s %s" % (type, rev))
+ if ret:
+ raise util.Abort(_('cannot read %r object at %s') % (type, rev))
+ return data
+
+ def getfile(self, name, rev):
+ data = self.catfile(rev, "blob")
+ mode = self.modecache[(name, rev)]
+ return data, mode
+
def getcommit(self, version):
c = self.catfile(version, "commit") # read the commit hash
end = c.find("\n\n")
@@ -159,24 +232,6 @@
return tags
- def getchangedfiles(self, version, i):
- changes = []
- if i is None:
- fh = self.gitopen("git diff-tree --root -m -r %s" % version)
- for l in fh:
- if "\t" not in l:
- continue
- m, f = l[:-1].split("\t")
- changes.append(f)
- else:
- fh = self.gitopen('git diff-tree --name-only --root -r %s "%s^%s" --'
- % (version, version, i + 1))
- changes = [f.rstrip('\n') for f in fh]
- if fh.close():
- raise util.Abort(_('cannot read changes in %s') % version)
-
- return changes
-
def getbookmarks(self):
bookmarks = {}
@@ -203,3 +258,9 @@
pass
return bookmarks
+
+def convert_git(ui, path, rev=None):
+ try:
+ return convert_git_pygit2(ui, path, rev)
+ except ImportError:
+ return convert_git_plain(ui, path, rev)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment