Skip to content

Instantly share code, notes, and snippets.

@alexander-hanel
Last active August 13, 2018 10:26
Show Gist options
  • Save alexander-hanel/1e33611ea310e3be34e5804b9ad6f85c to your computer and use it in GitHub Desktop.
Save alexander-hanel/1e33611ea310e3be34e5804b9ad6f85c to your computer and use it in GitHub Desktop.
__author__ = 'Alexander Hanel'
__date__ = '2018/02/28'
__version__ = "2.0"
__title__ = "struct creator"
import re
"""
Example:
ARGS:
run(STRING, 32 or 64 (bit), NAMED)
INPUT:
run('''
BYTE bType;
BYTE bVersion;
WORD Reserved;
DWORD aiKeyAlg''', 32, "BLOBHEADER")
class BLOBHEADER:
def __init__(self, data):
self.bType = None # BYTE
self.bVersion = None # BYTE
self.Reserved = None # WORD
self.aiKeyAlg = None # DWORD
self._parse_data(data)
def _parse_data(self, data):
# Struct unpacking code. Initiate cur_addr to 0
cur_addr = 0
temp_data = data[cur_addr: cur_addr + 1]
self.bType = struct.unpack("<B", temp_data)[0]
cur_addr += 1
temp_data = data[cur_addr: cur_addr + 1]
self.bVersion = struct.unpack("<B", temp_data)[0]
cur_addr += 1
temp_data = data[cur_addr: cur_addr + 2]
self.Reserved = struct.unpack("<H", temp_data)[0]
cur_addr += 2
temp_data = data[cur_addr: cur_addr + 4]
self.aiKeyAlg = struct.unpack("<I", temp_data)[0]
cur_addr += 4
Type value sources:
https://docs.google.com/spreadsheet/ccc?key=0ArV2XEi4eBdpcEs1Q0VjZEc5R1lHZU83SzJkbUVjQmc#gid=0
[MS-DTYP].pdf (duck duck go it)
"""
def open_file(f):
"""
simple open file
"""
# TODO - add open file option
with open(f) as file_handle:
return file_handle.readline()
def strip_line(line):
"""
Split line and remove empty elements in the list
"""
split_line = line.split(" ")
return [x for x in split_line if x != ""]
def get_size(line):
"""
get size for definitions. example UINT8 Name[16], returns Name, 16
"""
# TODO: Add functionality to parse hex values, have not observed yet.
if "[" in line:
sear = re.search("\[(.*?)\]", line)
value = sear.group(1)
temp_name = line[:line.find("[")]
return temp_name, int(value)
else:
return line, 1
def parse_line(line, bit):
"""
extract information about the type, size, name, bit and signed.
return signed, size, type_name, name, bit
"""
# TODO: BSTR, LMCSTR, LMSTR, LPCSTR, LPCWSTR,
# TODO: Bit value is returned but not implemented. No use cases or need to yet.
if len(line) == 0 or "//" in line[0]:
return None, None, None, None, None
name = line[1].strip(";")
name, multi = get_size(name)
line_type = line[0].strip("*").upper()
line_type = line_type.strip("__")
# Size: single bit
if line_type in ["BIT"]:
return False, 1 * multi, line_type, name, True
# Size: one byte unsigned
if line_type in ["BOOLEAN", "PBOOLEAN", "BYTE", "PBYTE", "LPBYTE", "UCHAR", "PUCHAR", "UINT8", "CHAR", "PCHAR",
"OCTET"]:
return False, 1 * multi, line_type, name, False
# Size: one byte signed
if line_type in ["INT8"]:
return True, 1 * multi, line_type, name, False
# Size: two bytes unsigned
if line_type in ["UINT16", "USHORT", "WORD", "PWORD", "LPWORD"]:
return False, 2 * multi, line_type, name, False
# Size: two bytes signed
if line_type in ["INT16", "SHORT"]:
return True, 2 * multi, line_type, name, False
# Size: four bytes unsigned
if line_type in ["DWORD", "PDWORD", "LPDWORD", "DWORD32", "LPCSTR", "LPCVOID","LPCWSTR", "PSTR", "LPSTR",
"LPWSTR", "PWSTR", "UINT", "UINT32", "PULONG", "ULONG32", "NTSTATUS", "BOOL", "PBOOL",
"LPBOOL", "ULONG_PTR", "DWORD_PTR", "FLOAT", "HRESULT", "NTSTATUS", "RPC_BINDING_HANDLE"]:
return False, 4 * multi, line_type, name, False
# Size: four bytes signed
if line_type in ["LPINT", "INT", "INT32","LONG", "PLONG", "LPLONG", "LONG32"]:
return True, 4 * multi, line_type, name, False
# can be either 32 bit or 64
if line_type in ["ULONG_PTR", "ULONG", "HANDLE_T", "ADCONNECTION_HANDLE", "HANDLE", "LDAP_UDP_HANDLE",
"PCONTEXT_HANDLE", "PPCONTEXT_HANDLE"]:
if bit == 32:
return False, 4 * multi, line_type, name, False
else:
return False, 8 * multi, line_type, name, False
# Size: eight bytes unsigned
if line_type in ["DWORD64", "PDWORD64", "DWORDLONG","PDWORDLONG", "QWORD", "UINT64", "ULONG64", "ULONGLONG",
"DOUBLE", "INT64", "LONGLONG", "LONG64"]:
return False, 8 * multi, line_type, name, False
# Size: eight bytes signed
"""
# Size: sixteen bytes unsigned
if line_type in ["UINT128"]:
return False, 16 * multi, line_type, name, False
"""
# signed, size in bytes, type, name, bool for bits
return None, None, line_type, name, None
def get_code(signed, length, bit, class_name, name):
"""
format and create python code to unpack binary data.
"""
if signed:
if length is 1:
return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
\t\t%s.%s = struct.unpack("<b", temp_data)[0]
\t\tcur_addr += 1
""" % (class_name, name)
elif length is 2:
return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
\t\t%s.%s = struct.unpack("<h", temp_data)[0]
\t\tcur_addr += 2
""" % (class_name, name)
elif length is 4:
return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
\t\t%s.%s = struct.unpack("<i", temp_data)[0]
\t\tcur_addr += 4
""" % (class_name, name)
elif length is 8:
return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
\t\t%s.%s = struct.unpack("<q", temp_data)[0]
\t\tcur_addr += 8
""" % (class_name, name)
#TODO finish scenarios
pass
else:
if length is 1:
return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
\t\t%s.%s = struct.unpack("<B", temp_data)[0]
\t\tcur_addr += 1
""" % (class_name, name)
elif length is 2:
return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
\t\t%s.%s = struct.unpack("<H", temp_data)[0]
\t\tcur_addr += 2
""" % (class_name, name)
elif length is 4:
return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
\t\t%s.%s = struct.unpack("<I", temp_data)[0]
\t\tcur_addr += 4
""" % (class_name, name)
elif length is 8:
return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
\t\t%s.%s = struct.unpack("<Q", temp_data)[0]
\t\tcur_addr += 8
""" % (class_name, name)
elif length:
return"""\t\t%s.%s = data[cur_addr: cur_addr + %s]
\t\tcur_addr += %s
""" % (class_name, name, length, length)
return
def create_class_header(name):
""" Create class header text """
return """class %s:
\tdef __init__(self, data):\n""" % name
def create_parser_function_header():
""" Creates the start of the _parse_data function"""
return """\n\tdef _parse_data(self, data):
\t\t# Struct unpacking code. Initiate cur_addr to 0
\t\tcur_addr = 0"""
def append_parser_function():
return """\t\tself._parse_data(data)"""
def run(buffer, bit, class_name=None):
str_class = ""
parse = ""
str_class = create_class_header(class_name)
for line in buffer.split("\n"):
sl = strip_line(line)
signed, size, type_name, name, b = parse_line(sl, bit)
# if the line begins with a comment type_name is None
if type_name is None:
continue
# create the definition for the class variables
str_class += "\t\tself.%s = None # %s \n" % (name, type_name)
if size is not None:
parse += get_code(signed, size, b,"self", name)
elif type_name is not None:
# TODO - fix struct of structs
parse += """\t\tself.%s = %s() # TODO:\n""" % (name, type_name)
parse += "\t\t# cur_addr = UNKNOWN # Update cur_addr\n"
print str_class + append_parser_function()
print create_parser_function_header()
print parse
@alexander-hanel
Copy link
Author

alexander-hanel commented Feb 28, 2018

To execute the code I typically add the struct definitions to the end of the above script. For example.

run('''
BYTE bType;
BYTE bVersion;
WORD Reserved;
DWORD aiKeyAlg''', 32, "BLOBHEADER ")

If a structure contains an embedded structure, manually calculate the size and add it as BYTE STRUCT_NAME[INTEGER_SIZE]; for example BYTE Name[8];. This can be used to read a struct of n size. The above snippet will output Python code that can be used to parse a binary structure. Make sure to add import struct to your code.

class BLOBHEADER :
	def __init__(self, data):
		self.bType = None #  BYTE 
		self.bVersion = None #  BYTE 
		self.Reserved = None #  WORD 
		self.aiKeyAlg = None #  DWORD 
		self._parse_data(data)

	def _parse_data(self, data):
		# Struct unpacking code. Initiate cur_addr to 0
		cur_addr = 0
		temp_data = data[cur_addr: cur_addr + 1]
		self.bType = struct.unpack("<B", temp_data)[0]
		cur_addr += 1
		temp_data = data[cur_addr: cur_addr + 1]
		self.bVersion = struct.unpack("<B", temp_data)[0]
		cur_addr += 1
		temp_data = data[cur_addr: cur_addr + 2]
		self.Reserved = struct.unpack("<H", temp_data)[0]
		cur_addr += 2
		temp_data = data[cur_addr: cur_addr + 4]
		self.aiKeyAlg = struct.unpack("<I", temp_data)[0]
		cur_addr += 4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment