Created
December 19, 2020 14:59
-
-
Save ikaro1192/933c380635220da7471b422ec1fb60a2 to your computer and use it in GitHub Desktop.
CCS †裏† Advent Calendar 2020の20日目の記事ように書いたgitのパーサー
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import zlib | |
import os | |
import hashlib | |
def ls_obj(): | |
object_dirs = os.listdir(path='.git/objects') | |
for object_dir in object_dirs: | |
if len(object_dir) == 2: | |
print(object_dir + (os.listdir(path='.git/objects/' + object_dir)[0])) | |
def ls_ref(): | |
ref_files = os.listdir(path='.git/refs/heads') | |
for ref_file in ref_files: | |
print(ref_file) | |
def cat_ref(name: str): | |
with open('.git/refs/heads/' + name, 'r') as ref_file: | |
print(ref_file.read()) | |
def cat_head(): | |
with open('.git/HEAD', 'r') as head_file: | |
print(head_file.read()) | |
class GitObject: | |
@property | |
def object_type(self) -> str: | |
return "unknown" | |
def print(self): | |
print("not implemented") | |
class Blob(GitObject): | |
def __init__(self, data: bytes): | |
self.data = data.decode() | |
pass | |
def print(self): | |
print(self.data) | |
@property | |
def object_type(self) -> str: | |
return "blob" | |
class TreeElement: | |
def __init__(self, mode: str, filename: str, sha1_hash: int): | |
self.mode = mode | |
self.filename = filename | |
self.sha1_hash = sha1_hash | |
def get_tree_elements(data: bytes): | |
if len(data) == 0: | |
return [] | |
else: | |
mode, tmp = data.split(b' ', 1) | |
filename, tmp2 = tmp.split(b'\0', 1) | |
sha1_hash = tmp2[:20] | |
body = tmp2[20:] | |
return [TreeElement(mode.decode(), filename.decode(), sha1_hash.hex())] + get_tree_elements(body) | |
class Tree(GitObject): | |
def __init__(self, data: bytes): | |
self.elements = get_tree_elements(data) | |
@property | |
def object_type(self) -> str: | |
return "tree" | |
def print(self): | |
for element in self.elements: | |
print("{} {} {}".format(element.mode, element.filename, element.sha1_hash)) | |
class Commit(GitObject): | |
def __init__(self, data): | |
self.data = data.decode() | |
pass | |
def print(self): | |
print(self.data) | |
@property | |
def object_type(self) -> str: | |
return "commit" | |
# メタクラス使うともっとスマートにできるが... | |
def create_git_object(obj_type: str, data: bytes): | |
if obj_type == "blob": | |
return Blob(data) | |
elif obj_type == "tree": | |
return Tree(data) | |
elif obj_type == "commit": | |
return Commit(data) | |
else: | |
return GitObject() | |
def cat_obj(hash_val): | |
with open('.git/objects/' + hash_val[:2] + "/" + hash_val[2:], 'rb') as gitobj: | |
res = zlib.decompress(gitobj.read()) | |
print("hash:{}".format(hashlib.sha1(res).hexdigest())) | |
header, body = res.split(b'\0', 1) | |
obj_type, obj_size = header.decode().split() | |
obj = create_git_object(obj_type, body) | |
print("type:{}".format(obj.object_type)) | |
obj.print() | |
def calc_padding(n: int) -> int: | |
floor = (n - 2) // 8 | |
target = (floor + 1) * 8 + 2 | |
return target - n | |
def cat_index(): | |
with open('.git/index', 'rb') as index_file: | |
index_file_data = index_file.read() | |
header_sig = index_file_data[0:4] | |
header_ver = index_file_data[4:8] | |
header_index_number = int.from_bytes(index_file_data[8:12], byteorder='big') | |
print("version:{}".format(int.from_bytes(header_ver, byteorder='big'))) | |
offset = 12 | |
print("---") | |
for _ in range(header_index_number): | |
ctime_s = index_file_data[offset:offset + 4] | |
ctime_n = index_file_data[offset + 4:offset + 8] | |
mtime_s = index_file_data[offset + 8:offset + 12] | |
mtime_n = index_file_data[offset + 12:offset + 16] | |
dev = index_file_data[offset + 16:offset + 20] | |
inode = index_file_data[offset + 20:offset + 24] | |
mode = int.from_bytes(index_file_data[offset + 24:offset + 28], byteorder='big') | |
uid = int.from_bytes(index_file_data[offset + 28:offset + 32], byteorder='big') | |
gid = index_file_data[offset + 32:offset + 36] | |
file_size = int.from_bytes(index_file_data[offset + 36:offset + 40], byteorder='big') | |
sha1 = index_file_data[offset + 40:offset + 60].hex() | |
assume_valid_flag = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x800 | |
extended_flag = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x800 | |
stage = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x300 | |
name_length = int.from_bytes(index_file_data[offset + 60:offset + 62], byteorder='big') & 0x0FFF | |
name = index_file_data[offset + 62:offset + 62 + name_length].decode() | |
padding = calc_padding(name_length) | |
offset = offset + 62 + name_length + padding | |
print("{} {} {}".format(oct(mode), name, sha1)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment