alexander-hanel/struct_creator.py

## struct_creator.py
__author__ = 'Alexander Hanel'
__date__ = '2018/02/28'
__version__ = "2.0"
__title__ = "struct creator"
import re

"""

Example:

ARGS:
    run(STRING, 32 or 64 (bit), NAMED)

INPUT:
    run('''
    BYTE bType;
    BYTE bVersion;
    WORD Reserved;
    DWORD aiKeyAlg''', 32, "BLOBHEADER")

class BLOBHEADER:
	def __init__(self, data):
		self.bType = None #  BYTE
		self.bVersion = None #  BYTE
		self.Reserved = None #  WORD
		self.aiKeyAlg = None #  DWORD
		self._parse_data(data)

	def _parse_data(self, data):
		# Struct unpacking code. Initiate cur_addr to 0
		cur_addr = 0
		temp_data = data[cur_addr: cur_addr + 1]
		self.bType = struct.unpack("<B", temp_data)[0]
		cur_addr += 1
		temp_data = data[cur_addr: cur_addr + 1]
		self.bVersion = struct.unpack("<B", temp_data)[0]
		cur_addr += 1
		temp_data = data[cur_addr: cur_addr + 2]
		self.Reserved = struct.unpack("<H", temp_data)[0]
		cur_addr += 2
		temp_data = data[cur_addr: cur_addr + 4]
		self.aiKeyAlg = struct.unpack("<I", temp_data)[0]
		cur_addr += 4


Type value sources:
    https://docs.google.com/spreadsheet/ccc?key=0ArV2XEi4eBdpcEs1Q0VjZEc5R1lHZU83SzJkbUVjQmc#gid=0
    [MS-DTYP].pdf (duck duck go it)
"""

def open_file(f):
    """
    simple open file
    """
    # TODO - add open file option
    with open(f) as file_handle:
        return file_handle.readline()


def strip_line(line):
    """
    Split line and remove empty elements in the list
    """
    split_line = line.split(" ")
    return [x for x in split_line if x != ""]


def get_size(line):
    """
    get size for definitions. example UINT8 Name[16], returns Name, 16
    """
    # TODO: Add functionality to parse hex values, have not observed yet.
    if "[" in line:
        sear = re.search("\[(.*?)\]", line)
        value = sear.group(1)
        temp_name = line[:line.find("[")]
        return temp_name, int(value)
    else:
        return line, 1


def parse_line(line, bit):
    """
    extract information about the type, size, name, bit and signed.
    return signed, size, type_name, name, bit
    """
    # TODO: BSTR, LMCSTR, LMSTR, LPCSTR, LPCWSTR,
    # TODO: Bit value is returned but not implemented. No use cases or need to yet.

    if len(line) == 0 or "//" in line[0]:
        return None, None, None, None, None

    name = line[1].strip(";")
    name, multi = get_size(name)

    line_type = line[0].strip("*").upper()
    line_type = line_type.strip("__")

    # Size: single bit
    if line_type in ["BIT"]:
        return False, 1 * multi, line_type, name, True

    # Size: one byte unsigned
    if line_type in ["BOOLEAN", "PBOOLEAN", "BYTE", "PBYTE", "LPBYTE", "UCHAR", "PUCHAR", "UINT8", "CHAR", "PCHAR",
                     "OCTET"]:
        return False, 1 * multi, line_type, name, False

    # Size: one byte signed
    if line_type in ["INT8"]:
        return True, 1 * multi, line_type, name, False

    # Size: two bytes unsigned
    if line_type in ["UINT16", "USHORT", "WORD", "PWORD", "LPWORD"]:
        return False, 2 * multi, line_type, name, False

    # Size: two bytes signed
    if line_type in ["INT16", "SHORT"]:
        return True, 2 * multi, line_type, name, False

    # Size: four bytes unsigned
    if line_type in ["DWORD", "PDWORD", "LPDWORD", "DWORD32", "LPCSTR", "LPCVOID","LPCWSTR", "PSTR", "LPSTR",
                     "LPWSTR", "PWSTR", "UINT", "UINT32", "PULONG", "ULONG32", "NTSTATUS", "BOOL", "PBOOL",
                     "LPBOOL", "ULONG_PTR", "DWORD_PTR", "FLOAT", "HRESULT", "NTSTATUS", "RPC_BINDING_HANDLE"]:
        return False, 4 * multi, line_type, name, False

    # Size: four bytes signed
    if line_type in ["LPINT", "INT", "INT32","LONG", "PLONG", "LPLONG", "LONG32"]:
        return True, 4 * multi, line_type, name, False

    # can be either 32 bit or 64
    if line_type in ["ULONG_PTR", "ULONG", "HANDLE_T", "ADCONNECTION_HANDLE", "HANDLE", "LDAP_UDP_HANDLE",
                     "PCONTEXT_HANDLE", "PPCONTEXT_HANDLE"]:
        if bit == 32:
            return False, 4 * multi, line_type, name, False
        else:
            return False, 8 * multi, line_type, name, False

    # Size: eight bytes unsigned
    if line_type in ["DWORD64", "PDWORD64", "DWORDLONG","PDWORDLONG", "QWORD", "UINT64", "ULONG64", "ULONGLONG",
                     "DOUBLE", "INT64", "LONGLONG", "LONG64"]:
        return False, 8 * multi, line_type, name, False

    # Size: eight bytes signed

    """
    # Size: sixteen bytes unsigned
    if line_type in ["UINT128"]:
        return False, 16 * multi, line_type, name, False
    """
    # signed, size in bytes, type, name, bool for bits
    return None, None, line_type, name, None


def get_code(signed, length, bit, class_name, name):
    """
    format and create python code to unpack binary data.
    """
    if signed:
        if length is 1:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
\t\t%s.%s = struct.unpack("<b", temp_data)[0]
\t\tcur_addr += 1
""" % (class_name, name)

        elif length is 2:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
\t\t%s.%s = struct.unpack("<h", temp_data)[0]
\t\tcur_addr += 2
""" % (class_name, name)

        elif length is 4:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
\t\t%s.%s = struct.unpack("<i", temp_data)[0]
\t\tcur_addr += 4
""" % (class_name, name)

        elif length is 8:
            return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
\t\t%s.%s = struct.unpack("<q", temp_data)[0]
\t\tcur_addr += 8
""" % (class_name, name)
        #TODO finish scenarios
        pass
    else:
        if length is 1:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
\t\t%s.%s = struct.unpack("<B", temp_data)[0]
\t\tcur_addr += 1
""" % (class_name, name)

        elif length is 2:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
\t\t%s.%s = struct.unpack("<H", temp_data)[0]
\t\tcur_addr += 2
""" % (class_name, name)

        elif length is 4:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
\t\t%s.%s = struct.unpack("<I", temp_data)[0]
\t\tcur_addr += 4
""" % (class_name, name)

        elif length is 8:
            return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
\t\t%s.%s = struct.unpack("<Q", temp_data)[0]
\t\tcur_addr += 8
""" % (class_name, name)

        elif length:
            return"""\t\t%s.%s = data[cur_addr: cur_addr + %s]
\t\tcur_addr += %s
""" % (class_name, name, length, length)
    return

def create_class_header(name):
    """ Create class header text """
    return """class %s:
\tdef __init__(self, data):\n""" % name

def create_parser_function_header():
    """ Creates the start of the _parse_data function"""
    return """\n\tdef _parse_data(self, data):
\t\t# Struct unpacking code. Initiate cur_addr to 0
\t\tcur_addr = 0"""

def append_parser_function():
    return """\t\tself._parse_data(data)"""

def run(buffer, bit, class_name=None):
    str_class = ""
    parse = ""
    str_class =  create_class_header(class_name)
    for line in buffer.split("\n"):
        sl = strip_line(line)
        signed, size, type_name, name, b = parse_line(sl, bit)
        # if the line begins with a comment type_name is None
        if type_name is None:
            continue
        # create the definition for the class variables
        str_class += "\t\tself.%s = None #  %s \n" % (name, type_name)
        if size is not None:
            parse +=  get_code(signed, size, b,"self", name)
        elif type_name is not None:
            # TODO - fix struct of structs
            parse += """\t\tself.%s = %s()   # TODO:\n""" % (name, type_name)
            parse += "\t\t# cur_addr = UNKNOWN # Update cur_addr\n"

    print str_class + append_parser_function()
    print create_parser_function_header()
    print parse
	__author__ = 'Alexander Hanel'
	__date__ = '2018/02/28'
	__version__ = "2.0"
	__title__ = "struct creator"
	import re

	"""

	Example:

	ARGS:
	run(STRING, 32 or 64 (bit), NAMED)

	INPUT:
	run('''
	BYTE bType;
	BYTE bVersion;
	WORD Reserved;
	DWORD aiKeyAlg''', 32, "BLOBHEADER")

	class BLOBHEADER:
	def __init__(self, data):
	self.bType = None # BYTE
	self.bVersion = None # BYTE
	self.Reserved = None # WORD
	self.aiKeyAlg = None # DWORD
	self._parse_data(data)

	def _parse_data(self, data):
	# Struct unpacking code. Initiate cur_addr to 0
	cur_addr = 0
	temp_data = data[cur_addr: cur_addr + 1]
	self.bType = struct.unpack("<B", temp_data)[0]
	cur_addr += 1
	temp_data = data[cur_addr: cur_addr + 1]
	self.bVersion = struct.unpack("<B", temp_data)[0]
	cur_addr += 1
	temp_data = data[cur_addr: cur_addr + 2]
	self.Reserved = struct.unpack("<H", temp_data)[0]
	cur_addr += 2
	temp_data = data[cur_addr: cur_addr + 4]
	self.aiKeyAlg = struct.unpack("<I", temp_data)[0]
	cur_addr += 4


	Type value sources:
	https://docs.google.com/spreadsheet/ccc?key=0ArV2XEi4eBdpcEs1Q0VjZEc5R1lHZU83SzJkbUVjQmc#gid=0
	[MS-DTYP].pdf (duck duck go it)
	"""

	def open_file(f):
	"""
	simple open file
	"""
	# TODO - add open file option
	with open(f) as file_handle:
	return file_handle.readline()


	def strip_line(line):
	"""
	Split line and remove empty elements in the list
	"""
	split_line = line.split(" ")
	return [x for x in split_line if x != ""]


	def get_size(line):
	"""
	get size for definitions. example UINT8 Name[16], returns Name, 16
	"""
	# TODO: Add functionality to parse hex values, have not observed yet.
	if "[" in line:
	sear = re.search("\[(.*?)\]", line)
	value = sear.group(1)
	temp_name = line[:line.find("[")]
	return temp_name, int(value)
	else:
	return line, 1


	def parse_line(line, bit):
	"""
	extract information about the type, size, name, bit and signed.
	return signed, size, type_name, name, bit
	"""
	# TODO: BSTR, LMCSTR, LMSTR, LPCSTR, LPCWSTR,
	# TODO: Bit value is returned but not implemented. No use cases or need to yet.

	if len(line) == 0 or "//" in line[0]:
	return None, None, None, None, None

	name = line[1].strip(";")
	name, multi = get_size(name)

	line_type = line[0].strip("*").upper()
	line_type = line_type.strip("__")

	# Size: single bit
	if line_type in ["BIT"]:
	return False, 1 * multi, line_type, name, True

	# Size: one byte unsigned
	if line_type in ["BOOLEAN", "PBOOLEAN", "BYTE", "PBYTE", "LPBYTE", "UCHAR", "PUCHAR", "UINT8", "CHAR", "PCHAR",
	"OCTET"]:
	return False, 1 * multi, line_type, name, False

	# Size: one byte signed
	if line_type in ["INT8"]:
	return True, 1 * multi, line_type, name, False

	# Size: two bytes unsigned
	if line_type in ["UINT16", "USHORT", "WORD", "PWORD", "LPWORD"]:
	return False, 2 * multi, line_type, name, False

	# Size: two bytes signed
	if line_type in ["INT16", "SHORT"]:
	return True, 2 * multi, line_type, name, False

	# Size: four bytes unsigned
	if line_type in ["DWORD", "PDWORD", "LPDWORD", "DWORD32", "LPCSTR", "LPCVOID","LPCWSTR", "PSTR", "LPSTR",
	"LPWSTR", "PWSTR", "UINT", "UINT32", "PULONG", "ULONG32", "NTSTATUS", "BOOL", "PBOOL",
	"LPBOOL", "ULONG_PTR", "DWORD_PTR", "FLOAT", "HRESULT", "NTSTATUS", "RPC_BINDING_HANDLE"]:
	return False, 4 * multi, line_type, name, False

	# Size: four bytes signed
	if line_type in ["LPINT", "INT", "INT32","LONG", "PLONG", "LPLONG", "LONG32"]:
	return True, 4 * multi, line_type, name, False

	# can be either 32 bit or 64
	if line_type in ["ULONG_PTR", "ULONG", "HANDLE_T", "ADCONNECTION_HANDLE", "HANDLE", "LDAP_UDP_HANDLE",
	"PCONTEXT_HANDLE", "PPCONTEXT_HANDLE"]:
	if bit == 32:
	return False, 4 * multi, line_type, name, False
	else:
	return False, 8 * multi, line_type, name, False

	# Size: eight bytes unsigned
	if line_type in ["DWORD64", "PDWORD64", "DWORDLONG","PDWORDLONG", "QWORD", "UINT64", "ULONG64", "ULONGLONG",
	"DOUBLE", "INT64", "LONGLONG", "LONG64"]:
	return False, 8 * multi, line_type, name, False

	# Size: eight bytes signed

	"""
	# Size: sixteen bytes unsigned
	if line_type in ["UINT128"]:
	return False, 16 * multi, line_type, name, False
	"""
	# signed, size in bytes, type, name, bool for bits
	return None, None, line_type, name, None


	def get_code(signed, length, bit, class_name, name):
	"""
	format and create python code to unpack binary data.
	"""
	if signed:
	if length is 1:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
	\t\t%s.%s = struct.unpack("<b", temp_data)[0]
	\t\tcur_addr += 1
	""" % (class_name, name)

	elif length is 2:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
	\t\t%s.%s = struct.unpack("<h", temp_data)[0]
	\t\tcur_addr += 2
	""" % (class_name, name)

	elif length is 4:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
	\t\t%s.%s = struct.unpack("<i", temp_data)[0]
	\t\tcur_addr += 4
	""" % (class_name, name)

	elif length is 8:
	return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
	\t\t%s.%s = struct.unpack("<q", temp_data)[0]
	\t\tcur_addr += 8
	""" % (class_name, name)
	#TODO finish scenarios
	pass
	else:
	if length is 1:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
	\t\t%s.%s = struct.unpack("<B", temp_data)[0]
	\t\tcur_addr += 1
	""" % (class_name, name)

	elif length is 2:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
	\t\t%s.%s = struct.unpack("<H", temp_data)[0]
	\t\tcur_addr += 2
	""" % (class_name, name)

	elif length is 4:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
	\t\t%s.%s = struct.unpack("<I", temp_data)[0]
	\t\tcur_addr += 4
	""" % (class_name, name)

	elif length is 8:
	return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
	\t\t%s.%s = struct.unpack("<Q", temp_data)[0]
	\t\tcur_addr += 8
	""" % (class_name, name)

	elif length:
	return"""\t\t%s.%s = data[cur_addr: cur_addr + %s]
	\t\tcur_addr += %s
	""" % (class_name, name, length, length)
	return

	def create_class_header(name):
	""" Create class header text """
	return """class %s:
	\tdef __init__(self, data):\n""" % name

	def create_parser_function_header():
	""" Creates the start of the _parse_data function"""
	return """\n\tdef _parse_data(self, data):
	\t\t# Struct unpacking code. Initiate cur_addr to 0
	\t\tcur_addr = 0"""

	def append_parser_function():
	return """\t\tself._parse_data(data)"""

	def run(buffer, bit, class_name=None):
	str_class = ""
	parse = ""
	str_class = create_class_header(class_name)
	for line in buffer.split("\n"):
	sl = strip_line(line)
	signed, size, type_name, name, b = parse_line(sl, bit)
	# if the line begins with a comment type_name is None
	if type_name is None:
	continue
	# create the definition for the class variables
	str_class += "\t\tself.%s = None # %s \n" % (name, type_name)
	if size is not None:
	parse += get_code(signed, size, b,"self", name)
	elif type_name is not None:
	# TODO - fix struct of structs
	parse += """\t\tself.%s = %s() # TODO:\n""" % (name, type_name)
	parse += "\t\t# cur_addr = UNKNOWN # Update cur_addr\n"

	print str_class + append_parser_function()
	print create_parser_function_header()
	print parse