Last active
March 23, 2016 04:04
-
-
Save thcipriani/f9c19c548d423ef4fbcd to your computer and use it in GitHub Desktop.
A Dumb Git Object Parser.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# git-obj-parser.py | |
# ~~~~~~~~~~~~~~~~~ | |
# Run from a git work dir, creates a list of all objects with their | |
# contents in a format like: | |
# | |
# > BLOB 1b9f426a8407ffee551ad2993c5d7d3780296353 | |
# ----------------------------------------------- | |
# > TREE 098e6de29daf4e55f83406b49f5768df9bc7d624 | |
# BLOB 1b9f426a8407ffee551ad2993c5d7d3780296353 README | |
# ----------------------------------------------- | |
# | |
import os | |
import glob | |
import re | |
import zlib | |
BASE_PATH = os.path.join('.git', 'objects') | |
TYPES = ['blob', 'tree', 'commit'] | |
def tree(obj): | |
"""Handle tree objects""" | |
tree_pattern = re.compile(r''' | |
(?P<octal>[0-9]+) # Octal | |
\s # Space | |
(?P<file>[^\0]+) # Filename | |
\0 # Null | |
(?P<sha>.{20}) # SHA | |
''', re.VERBOSE) | |
obj = re.split('tree \d+\0', obj)[1] | |
matches = tree_pattern.finditer(obj) | |
for match in matches: | |
obj_type = 'tree' if match.group('octal') == '40000' else 'blob' | |
print '{type}\t{sha}\t{name}'.format( | |
type=obj_type.upper(), | |
name=match.group('file'), | |
sha=match.group('sha').encode('hex')) | |
def commit(obj): | |
"""Parse commit objects""" | |
commit_pattern = re.compile(r''' | |
(?P<type>(tree|parent)) # Either a tree or a parent object | |
\s # Space | |
(?P<sha>[a-f0-9]{40}) # SHA1 | |
''', re.VERBOSE) | |
obj = re.split(b'commit \d+\0', obj)[1] | |
matches = commit_pattern.finditer(obj) | |
for match in matches: | |
print '{type}\t{sha}'.format( | |
type=match.group('type').upper(), sha=match.group('sha')) | |
def blob(obj): | |
"""Handle blob objects""" | |
# the content of blobs doesn't really matter | |
pass | |
def main(): | |
for obj in glob.glob(os.path.join(BASE_PATH, '**', '*')): | |
info = {} | |
info['name'] = ''.join(obj[len(BASE_PATH):].split('/')) | |
with open(obj, 'r') as f: | |
message = zlib.decompress(f.read()) | |
for obj_type in TYPES: | |
if message.startswith(obj_type): | |
info['type'] = obj_type | |
break | |
if info['type'] == 'commit': | |
print '> {}\t{}'.format('COMMIT', info['name']) | |
commit(message) | |
if info['type'] == 'tree': | |
print '> {}\t{}'.format('TREE', info['name']) | |
tree(message) | |
if info['type'] == 'blob': | |
print '> {}\t{}'.format('BLOB', info['name']) | |
print '-----------------------------------------------' | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment