Skip to content

Instantly share code, notes, and snippets.

@5263
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 5263/7e7b74c43b4bb500eff1 to your computer and use it in GitHub Desktop.
Save 5263/7e7b74c43b4bb500eff1 to your computer and use it in GitHub Desktop.
blobhash recursive
#!/usr/bin/env python
def blobhash(str1):
import hashlib
hash1=hashlib.sha1("blob %d\0" % len(str1))
hash1.update(str1)
return hash1.hexdigest()
def walk(dir1,extension=".py"):
import os
for root, dirs, files in os.walk(dir1):
for name in files:
if name.lower().endswith(extension):
filepath=os.path.join(root,name)
h1=blobhash(open(filepath,'rb').read())
print "%s %s" % (h1, filepath.replace('\\','/'))
def findblobs(hashes,ref='HEAD',stopat=None):
firsts={}
lasts={}
import os
if ref is not None:
logcmd="git log %s --pretty=format:\"%%T %%H %%s\"" % ref
else:
logcmd="git reflog --pretty=format:\"%T %H %s\""
mbfh=os.popen(logcmd)
commit =None
for cline in mbfh.read().split('\n'):
if cline.strip():
try:
tree,commit,subject = cline.split(' ',2)
lstr=os.popen("git ls-tree -r %s" % tree)
for bline in lstr.read().split('\n'):
if bline.strip():
odata,opath = bline.split('\t',1)
oflags,otype,ohash = odata.split(' ',2)
for h1 in hashes:
if ohash.startswith(h1):
firsts[h1] = commit
if lasts.get(h1) is None:
lasts[h1] = commit
except ValueError:
print cline
#print commit, subject, bline
lastcommit = commit
lastsubject = subject
lastbline = bline
if all ((lasts.get(h1) is not None and firsts.get(h1) != commit) for h1 in hashes):
#print 'finished'
#print firsts,lasts
break
if stopat is True and all ((lasts.get(h1) is not None and \
firsts.get(h1) == commit) for h1 in hashes):
break
if stopat is not None and stopat is not True and \
commit.startswith(stopat):
break
lstr.close
mbfh.close()
return (dict((h1,(firsts.get(h1),lasts.get(h1))) for h1 in hashes),commit)
def findblob(h1,ref='HEAD'):
"""search through all commits and trees
returns the last commit in which the blob is present and the
previous commit in which the the blob was introduced.
This does all apearanches since the search is stopped afterwards"""
first = None
last = None
import os
mbfh=os.popen("git log %s --pretty=format:\"%%T %%h %%s\"" % ref)
for cline in mbfh.read().split('\n'):
tree,commit,subject = cline.split(' ',2)
lstr=os.popen("git ls-tree -r %s" % tree)
for bline in lstr.read().split('\n'):
if bline.strip():
odata,opath = bline.split('\t',1)
oflags,otype,ohash = odata.split(' ',2)
if ohash.startswith(h1):
first = commit
if last is None:
last = commit
print commit, subject, bline
lastcommit = commit
lastsubject = subject
lastbline = bline
if last is not None and first != commit:
print 'first appeard in '
print lastcommit, lastsubject, lastbline
return (first,last)
lstr.close
mbfh.close()
def refcount(ref):
import os
mbfh=os.popen("git --no-replace-objects rev-list --count %s" % ref)
try:
return int(mbfh.read().strip())
except ValueError:
pass
def refparseverify(ref):
import os
rpv=os.popen("git rev-parse --verify %s" % ref)
ref=rpv.read().strip()
if rpv.close() is None:
return ref
def gitfiletype(ref):
import os
rpv=os.popen("git cat-file -t %s" % ref)
ref=rpv.read().strip()
if rpv.close() is None:
return ref
def searchblob(str1,ref='HEAD',stopat=None):
hashes=[]
nonpresentblobs=[]
names={}
for line in str1.split('\n'):
if line.strip():
h1,name = line.strip().split(' ',1)
if gitfiletype(h1) != 'blob':
print '%s %s not in repo' % (h1,name)
else:
hashes.append(h1)
if h1 not in names:
names[h1] = name
else:
names[h1] +=','+name
ref = refparseverify(ref)
if ref is None:
raise ValueError('invalid ref')
d1,stoppedat=findblobs(hashes,ref=ref,stopat=stopat)
notfound=[]
for h1 in hashes:
value=d1.get(h1,(None,None))
if value == (None,None):
notfound.append(h1)
print '%s %s..%s %s' % (h1[:8],\
refcount(value[0]),refcount(value[1]),names.get(h1,'?'))
print 'searched %s..%s starting with %s' % \
(refcount(stoppedat),refcount(ref),ref)
#if len(notfound) > 0:
# print 'notfound ',notfound
# d2,stoppedat=findblobs(notfound,ref='--all',stopat=stopat)
# print '--all ',d2
# d3,stoppedat=findblobs(notfound,ref=None)
# print 'reflog ',d3
def findbaseforpath(patch):
shorthashes=set()
fullhashes=set()
"index fec529f..4b20705 100644"
for line in patch.split('\n'):
sline=line.strip()
if sline.startswith('index '):
shorthash=sline[6:13]
if shorthash != '0000000':
shorthashes.add(shorthash)
fullhash=refparseverify(shorthash)
if fullhash is not None:
fullhashes.add(fullhash)
else:
raise ValueError('Blob %s not present' % shorthash)
print fullhashes
print shorthashes
fbresults,lastcommit= findblobs(tuple(fullhashes),ref='HEAD',\
stopat=True)#'aaeba38e781103663e772e8757f0203af38498b9')
print lastcommit
testdata="""cee38a2c1830bfde8b8d4dd46aeb41a64a71303f Draft.py
c9272acba2a0243a5f8eda6f57c04908a20de462 importDWG.py
00f845039587cd0ce43be796b610b0263186bc57 importDXF.py
f9f48bd8ad766841cb90049ab5c4b7c257909378 importSVG.py
6c2f613bcfc0379ac1f9bb90a01efd24df3f33c1 TestDraft.py
"""
if __name__ == '__main__':
import sys
if len(sys.argv):
findbaseforpath(sys.stdin.read())
#for dir1 in (sys.argv[1:] or (".")):
# walk(dir1)
#print findblob(*sys.argv[1:])
#for key,value in findblobs(sys.argv[1].split(','),sys.argv[2]).iteritems():
# print '%s %s..%s' % (key,refcount(value[0]),refcount(value[1]))
#searchblob(testdata,ref='origin/master',\
# stopat='691fd1128672c8bd472cece87c9e9d07b71d6fee')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment