Skip to content

Instantly share code, notes, and snippets.

@thcipriani
Last active March 23, 2016 04:04
Show Gist options
  • Save thcipriani/f9c19c548d423ef4fbcd to your computer and use it in GitHub Desktop.
Save thcipriani/f9c19c548d423ef4fbcd to your computer and use it in GitHub Desktop.
A Dumb Git Object Parser.
#!/usr/bin/env python
#
# git-obj-parser.py
# ~~~~~~~~~~~~~~~~~
# Run from a git work dir, creates a list of all objects with their
# contents in a format like:
#
# > BLOB 1b9f426a8407ffee551ad2993c5d7d3780296353
# -----------------------------------------------
# > TREE 098e6de29daf4e55f83406b49f5768df9bc7d624
# BLOB 1b9f426a8407ffee551ad2993c5d7d3780296353 README
# -----------------------------------------------
#
import os
import glob
import re
import zlib
BASE_PATH = os.path.join('.git', 'objects')
TYPES = ['blob', 'tree', 'commit']
def tree(obj):
"""Handle tree objects"""
tree_pattern = re.compile(r'''
(?P<octal>[0-9]+) # Octal
\s # Space
(?P<file>[^\0]+) # Filename
\0 # Null
(?P<sha>.{20}) # SHA
''', re.VERBOSE)
obj = re.split('tree \d+\0', obj)[1]
matches = tree_pattern.finditer(obj)
for match in matches:
obj_type = 'tree' if match.group('octal') == '40000' else 'blob'
print '{type}\t{sha}\t{name}'.format(
type=obj_type.upper(),
name=match.group('file'),
sha=match.group('sha').encode('hex'))
def commit(obj):
"""Parse commit objects"""
commit_pattern = re.compile(r'''
(?P<type>(tree|parent)) # Either a tree or a parent object
\s # Space
(?P<sha>[a-f0-9]{40}) # SHA1
''', re.VERBOSE)
obj = re.split(b'commit \d+\0', obj)[1]
matches = commit_pattern.finditer(obj)
for match in matches:
print '{type}\t{sha}'.format(
type=match.group('type').upper(), sha=match.group('sha'))
def blob(obj):
"""Handle blob objects"""
# the content of blobs doesn't really matter
pass
def main():
for obj in glob.glob(os.path.join(BASE_PATH, '**', '*')):
info = {}
info['name'] = ''.join(obj[len(BASE_PATH):].split('/'))
with open(obj, 'r') as f:
message = zlib.decompress(f.read())
for obj_type in TYPES:
if message.startswith(obj_type):
info['type'] = obj_type
break
if info['type'] == 'commit':
print '> {}\t{}'.format('COMMIT', info['name'])
commit(message)
if info['type'] == 'tree':
print '> {}\t{}'.format('TREE', info['name'])
tree(message)
if info['type'] == 'blob':
print '> {}\t{}'.format('BLOB', info['name'])
print '-----------------------------------------------'
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment