Skip to content

Instantly share code, notes, and snippets.

@ikaro1192
Created December 19, 2020 14:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikaro1192/933c380635220da7471b422ec1fb60a2 to your computer and use it in GitHub Desktop.
Save ikaro1192/933c380635220da7471b422ec1fb60a2 to your computer and use it in GitHub Desktop.
CCS †裏† Advent Calendar 2020の20日目の記事ように書いたgitのパーサー
import zlib
import os
import hashlib
def ls_obj():
object_dirs = os.listdir(path='.git/objects')
for object_dir in object_dirs:
if len(object_dir) == 2:
print(object_dir + (os.listdir(path='.git/objects/' + object_dir)[0]))
def ls_ref():
ref_files = os.listdir(path='.git/refs/heads')
for ref_file in ref_files:
print(ref_file)
def cat_ref(name: str):
with open('.git/refs/heads/' + name, 'r') as ref_file:
print(ref_file.read())
def cat_head():
with open('.git/HEAD', 'r') as head_file:
print(head_file.read())
class GitObject:
@property
def object_type(self) -> str:
return "unknown"
def print(self):
print("not implemented")
class Blob(GitObject):
def __init__(self, data: bytes):
self.data = data.decode()
pass
def print(self):
print(self.data)
@property
def object_type(self) -> str:
return "blob"
class TreeElement:
def __init__(self, mode: str, filename: str, sha1_hash: int):
self.mode = mode
self.filename = filename
self.sha1_hash = sha1_hash
def get_tree_elements(data: bytes):
if len(data) == 0:
return []
else:
mode, tmp = data.split(b' ', 1)
filename, tmp2 = tmp.split(b'\0', 1)
sha1_hash = tmp2[:20]
body = tmp2[20:]
return [TreeElement(mode.decode(), filename.decode(), sha1_hash.hex())] + get_tree_elements(body)
class Tree(GitObject):
def __init__(self, data: bytes):
self.elements = get_tree_elements(data)
@property
def object_type(self) -> str:
return "tree"
def print(self):
for element in self.elements:
print("{} {} {}".format(element.mode, element.filename, element.sha1_hash))
class Commit(GitObject):
def __init__(self, data):
self.data = data.decode()
pass
def print(self):
print(self.data)
@property
def object_type(self) -> str:
return "commit"
# メタクラス使うともっとスマートにできるが...
def create_git_object(obj_type: str, data: bytes):
if obj_type == "blob":
return Blob(data)
elif obj_type == "tree":
return Tree(data)
elif obj_type == "commit":
return Commit(data)
else:
return GitObject()
def cat_obj(hash_val):
with open('.git/objects/' + hash_val[:2] + "/" + hash_val[2:], 'rb') as gitobj:
res = zlib.decompress(gitobj.read())
print("hash:{}".format(hashlib.sha1(res).hexdigest()))
header, body = res.split(b'\0', 1)
obj_type, obj_size = header.decode().split()
obj = create_git_object(obj_type, body)
print("type:{}".format(obj.object_type))
obj.print()
def calc_padding(n: int) -> int:
floor = (n - 2) // 8
target = (floor + 1) * 8 + 2
return target - n
def cat_index():
with open('.git/index', 'rb') as index_file:
index_file_data = index_file.read()
header_sig = index_file_data[0:4]
header_ver = index_file_data[4:8]
header_index_number = int.from_bytes(index_file_data[8:12], byteorder='big')
print("version:{}".format(int.from_bytes(header_ver, byteorder='big')))
offset = 12
print("---")
for _ in range(header_index_number):
ctime_s = index_file_data[offset:offset + 4]
ctime_n = index_file_data[offset + 4:offset + 8]
mtime_s = index_file_data[offset + 8:offset + 12]
mtime_n = index_file_data[offset + 12:offset + 16]
dev = index_file_data[offset + 16:offset + 20]
inode = index_file_data[offset + 20:offset + 24]
mode = int.from_bytes(index_file_data[offset + 24:offset + 28], byteorder='big')
uid = int.from_bytes(index_file_data[offset + 28:offset + 32], byteorder='big')
gid = index_file_data[offset + 32:offset + 36]
file_size = int.from_bytes(index_file_data[offset + 36:offset + 40], byteorder='big')
sha1 = index_file_data[offset + 40:offset + 60].hex()
assume_valid_flag = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x800
extended_flag = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x800
stage = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x300
name_length = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x0FFF
name = index_file_data[offset + 62:offset + 62 + name_length].decode()
padding = calc_padding(name_length)
offset = offset + 62 + name_length + padding
print("{} {} {}".format(oct(mode), name, sha1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment