Created
March 10, 2024 02:18
-
-
Save Joeccp/f8acb8774fe7691e0b0a3c3b8a08fbb2 to your computer and use it in GitHub Desktop.
My hexdump in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
hexdump in python | |
`python hexdump.py -h` to see usage | |
""" | |
from typing import * | |
CONTROL_CHARACTERS_REPRESENTATION_IN_CONTROL_PICTURE: Final[dict[int, str]] = { | |
0: '\N{Symbol For Null}', | |
1: '\N{Symbol For Start of Heading}', | |
2: '\N{Symbol For Start of Text}', | |
3: '\N{Symbol For End of Text}', | |
4: '\N{Symbol For End of Transmission}', | |
5: '\N{Symbol For Enquiry}', | |
6: '\N{Symbol For Acknowledge}', | |
7: '\N{Symbol For Bell}', | |
8: '\N{Symbol For Backspace}', | |
9: '\N{Symbol For Horizontal Tabulation}', | |
10: '\N{Symbol For Line Feed}', | |
11: '\N{Symbol For Vertical Tabulation}', | |
12: '\N{Symbol For Form Feed}', | |
13: '\N{Symbol For Carriage Return}', | |
14: '\N{Symbol For Shift Out}', | |
15: '\N{Symbol For Shift In}', | |
16: '\N{Symbol For Data Link Escape}', | |
17: '\N{Symbol For Device Control One}', | |
18: '\N{Symbol For Device Control Two}', | |
19: '\N{Symbol For Device Control Three}', | |
20: '\N{Symbol For Device Control Four}', | |
21: '\N{Symbol For Negative Acknowledge}', | |
22: '\N{Symbol For Synchronous Idle}', | |
23: '\N{Symbol For End of Transmission Block}', | |
24: '\N{Symbol For Cancel}', | |
25: '\N{Symbol For End of Medium}', | |
26: '\N{Symbol For Substitute}', | |
27: '\N{Symbol For Escape}', | |
28: '\N{Symbol For File Separator}', | |
29: '\N{Symbol For Group Separator}', | |
30: '\N{Symbol For Record Separator}', | |
31: '\N{Symbol For Unit Separator}', | |
127: '\N{Symbol For Delete}', | |
} | |
def hexdump(filepath: str, use_control_picture: bool = False) -> None: | |
""" | |
Print the file in a hexadecimal way -- with some more information. | |
It just does `open(filepath, 'rb')` to open the file. | |
:param filepath: The same rule of doing `open(filepath)` in Python. | |
:param use_control_picture: Use Unicode Control Pictures to represent ASCII control characters. | |
:return: None, it just prints. | |
""" | |
# Expected output: | |
# [00000] 0000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| | |
with open(filepath, 'rb') as file: | |
byte_count: int = 0 | |
row_flag: bool = True | |
print() | |
print(' Offset Byte ASCII') | |
print(' Dec Hex 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF') | |
print('-' * 85) | |
while row_flag: | |
print(f"[{byte_count:0>5}] {hex(byte_count)[2:].upper():0>4}: ", end='') | |
row_in_ascii: str = '' | |
for i in range(16): | |
byte = file.read(1) | |
if byte == b'': | |
# Just hard-code, I don't want any trouble :( | |
space_table: list[int] = [52, 49, 46, 43, 40, 37, 34, 31, 25, 22, 19, 16, 13, 10, 7, 4] | |
print(" " * space_table[byte_count % 16], end='') | |
row_flag = False | |
break | |
byte_count += 1 | |
print(byte.hex().upper(), | |
end=(' ' if byte_count % 8 != 0 else (' ' if byte_count % 16 == 0 else ' '))) | |
byte_as_int: int = int.from_bytes(byte) | |
if 32 <= byte_as_int <= 126: # Printable characters | |
row_in_ascii += chr(byte_as_int) | |
elif byte_as_int <= 127: # Control characters | |
if use_control_picture: | |
row_in_ascii += CONTROL_CHARACTERS_REPRESENTATION_IN_CONTROL_PICTURE[byte_as_int] | |
else: | |
row_in_ascii += '.' | |
else: | |
row_in_ascii += '.' | |
if row_in_ascii: | |
print(f"|{row_in_ascii}|") | |
if byte_count % 16 != 0: | |
print(f"[{byte_count:0>5}] {hex(byte_count)[2:].upper():0>4}:") | |
print() | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser(description='Generate hex dump of a file') | |
parser.add_argument('file', help='file path') | |
parser.add_argument('-c', '--control-picture', | |
help='use Unicode control pictures to represent ASCII control characters', | |
action='store_true') | |
args = parser.parse_args() | |
hexdump(filepath=args.file, | |
use_control_picture=args.control_picture, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment