Created
May 29, 2017 20:45
-
-
Save mzpqnxow/a23e0e3621e778dada9c4bb96ab02ff1 to your computer and use it in GitHub Desktop.
Ingesting ASCII hex dumps into Python strings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from re import ( | |
sub as regex_sub, | |
match as regex_match) | |
from binascii import a2b_hex as ascii_hex_to_binary | |
def asciiHexToBuffer(inbuf, | |
skip_bin_encode=False, | |
is_xxd=False, | |
is_od=False, | |
is_tcpdump=False, | |
is_wireshark=False): | |
""" | |
Take as input inbuf, a newline delimited string consisting of the | |
ASCII representation (in hex) of raw bytes from the od, xxd, tcpdump or | |
wireshark tools' output format. Return either a clean ASCII hex stream | |
or the binary/string version of the bytes as a Python variable | |
Essentially, rip out metadata like offsets and other row prefixes that | |
various tools use as garnish and just load the byte values. | |
Input: | |
inbuf (str): Newline delimited text ASCII hex dump | |
skip_bin_encode (bool): If true, don't perform final conversion | |
is_xxd (bool): If True, handle as an xxd string | |
is_od (bool): If true, handle as an od string. | |
is_tcpdump (bool): If true, handle as a tcpdump string | |
is_wireshark (bool): If true, handle as a wireshark string | |
Output: | |
If skib_bin_encode is True, returns a stream of hex bytes, i.e. | |
'414243444546' | |
If skip_bin_encode is True, return raw bytes, i.e. 'ABCDEFG' | |
Notes: | |
To produce od output compatible with this function, use: | |
$ od -A x -t x1z -v <filename> | |
To produce xxd output compatible with this function, use: | |
$ xxd <filename> | |
To produce tcpdump output compatible with this function use: | |
$ tcpdump -X -vvv | |
... copy and paste the buffer you want ... | |
To produce wireshark output compatible with this function use: | |
$ wireshark ... | |
1. Choose a packet in the top pane, choose follow TCP stream. | |
2. Select one direction of the stream from the dropdown box and | |
do NOT choose the "full" conversation, just one side | |
3. Check 'hex' and save as | |
This is most useful for: | |
xxd, od: running on a third party hosts, a cheap way to copy | |
paste files into Python when you don't want to use | |
base64 or uuencode/uudecode | |
tcpdump: when running on a remote host or even a local host but | |
you don't feel like parsing the cap file and you just | |
want to get a packet into Python quickly to manipulate | |
wireshark: yeah, I don't know why you wouldn't just export raw | |
bytes, it seems much easier. But it's a similar regex | |
so might as well support it | |
""" | |
def err(msg): | |
from sys import stderr | |
stderr.write(msg + '\n') | |
if len(filter( | |
lambda x: x is True, ( | |
is_od, is_tcpdump, is_wireshark, is_xxd))) != 1: | |
raise RuntimeError('must choose one format') | |
running_hex_buffer = '' | |
for ascii_line in inbuf.split('\n'): | |
if is_tcpdump is True: | |
# Just copy/paste the hex that is flowing by | |
hex_byte_line = regex_match(r'^ *0x' + | |
r'([0-9a-fA-F]){1,8}' + | |
r'(:){0,1}(\s)*' + | |
r'(?P<data>(([0-9a-fA-F]{2})\s*)' + | |
r'{1,16})', | |
ascii_line) | |
elif is_od is True: | |
# od -A x -t x1z -v <filename> | |
hex_byte_line = regex_match(r'^([0-9a-fA-F]){1,8} ' + | |
r'(?P<data>(([0-9a-fA-F]{2})\s*)' + | |
r'{1,16})', | |
ascii_line) | |
elif is_wireshark is True: | |
# Follow stream, hex stream, save as (one side of conversation only) | |
hex_byte_line = regex_match(r'^([0-9a-fA-F]){1,8}' + | |
r'(:){0,1}(\s)*(?P<data>' + | |
r'(([0-9a-fA-F]{2})\s*){1,16})', | |
ascii_line) | |
elif is_xxd is True: | |
# xxd <filename> | |
hex_byte_line = regex_match(r'^([0-9a-fA-F]){1,8}:' + | |
r' ' + | |
r'*(?P<data>(([0-9a-fA-F]{2})' + | |
r'\s*){1,16})', | |
ascii_line) | |
else: | |
raise RuntimeError('unknown hex ascii inpur format') | |
if hex_byte_line is not None: | |
running_hex_buffer += hex_byte_line.group('data') | |
else: | |
err('NO match on line: "%s"' % ascii_line) | |
running_hex_buffer = regex_sub(r'\s+', '', running_hex_buffer) | |
if skip_bin_encode is True: | |
return running_hex_buffer | |
else: | |
return ascii_hex_to_binary(running_hex_buffer) | |
def test(): | |
from sys import argv | |
if len(argv) != 2: | |
print('Test suite for hex function ...') | |
print(' Usage: %s <input file>' % (argv[0])) | |
print('You will need to set flags like is_tcpdump yourself in code') | |
exit(0) | |
with open(argv[1], 'rb') as fd: | |
print asciiHexToBuffer(fd.read(), is_tcpdump=True) | |
if __name__ == '__main__': | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment