Skip to content

Instantly share code, notes, and snippets.

@Joeccp
Created March 10, 2024 02:18
Show Gist options
  • Save Joeccp/f8acb8774fe7691e0b0a3c3b8a08fbb2 to your computer and use it in GitHub Desktop.
Save Joeccp/f8acb8774fe7691e0b0a3c3b8a08fbb2 to your computer and use it in GitHub Desktop.
My hexdump in Python
"""
hexdump in python
`python hexdump.py -h` to see usage
"""
from typing import *
CONTROL_CHARACTERS_REPRESENTATION_IN_CONTROL_PICTURE: Final[dict[int, str]] = {
0: '\N{Symbol For Null}',
1: '\N{Symbol For Start of Heading}',
2: '\N{Symbol For Start of Text}',
3: '\N{Symbol For End of Text}',
4: '\N{Symbol For End of Transmission}',
5: '\N{Symbol For Enquiry}',
6: '\N{Symbol For Acknowledge}',
7: '\N{Symbol For Bell}',
8: '\N{Symbol For Backspace}',
9: '\N{Symbol For Horizontal Tabulation}',
10: '\N{Symbol For Line Feed}',
11: '\N{Symbol For Vertical Tabulation}',
12: '\N{Symbol For Form Feed}',
13: '\N{Symbol For Carriage Return}',
14: '\N{Symbol For Shift Out}',
15: '\N{Symbol For Shift In}',
16: '\N{Symbol For Data Link Escape}',
17: '\N{Symbol For Device Control One}',
18: '\N{Symbol For Device Control Two}',
19: '\N{Symbol For Device Control Three}',
20: '\N{Symbol For Device Control Four}',
21: '\N{Symbol For Negative Acknowledge}',
22: '\N{Symbol For Synchronous Idle}',
23: '\N{Symbol For End of Transmission Block}',
24: '\N{Symbol For Cancel}',
25: '\N{Symbol For End of Medium}',
26: '\N{Symbol For Substitute}',
27: '\N{Symbol For Escape}',
28: '\N{Symbol For File Separator}',
29: '\N{Symbol For Group Separator}',
30: '\N{Symbol For Record Separator}',
31: '\N{Symbol For Unit Separator}',
127: '\N{Symbol For Delete}',
}
def hexdump(filepath: str, use_control_picture: bool = False) -> None:
"""
Print the file in a hexadecimal way -- with some more information.
It just does `open(filepath, 'rb')` to open the file.
:param filepath: The same rule of doing `open(filepath)` in Python.
:param use_control_picture: Use Unicode Control Pictures to represent ASCII control characters.
:return: None, it just prints.
"""
# Expected output:
# [00000] 0000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
with open(filepath, 'rb') as file:
byte_count: int = 0
row_flag: bool = True
print()
print(' Offset Byte ASCII')
print(' Dec Hex 0 1 2 3 4 5 6 7 8 9 A B C D E F 0123456789ABCDEF')
print('-' * 85)
while row_flag:
print(f"[{byte_count:0>5}] {hex(byte_count)[2:].upper():0>4}: ", end='')
row_in_ascii: str = ''
for i in range(16):
byte = file.read(1)
if byte == b'':
# Just hard-code, I don't want any trouble :(
space_table: list[int] = [52, 49, 46, 43, 40, 37, 34, 31, 25, 22, 19, 16, 13, 10, 7, 4]
print(" " * space_table[byte_count % 16], end='')
row_flag = False
break
byte_count += 1
print(byte.hex().upper(),
end=(' ' if byte_count % 8 != 0 else (' ' if byte_count % 16 == 0 else ' ')))
byte_as_int: int = int.from_bytes(byte)
if 32 <= byte_as_int <= 126: # Printable characters
row_in_ascii += chr(byte_as_int)
elif byte_as_int <= 127: # Control characters
if use_control_picture:
row_in_ascii += CONTROL_CHARACTERS_REPRESENTATION_IN_CONTROL_PICTURE[byte_as_int]
else:
row_in_ascii += '.'
else:
row_in_ascii += '.'
if row_in_ascii:
print(f"|{row_in_ascii}|")
if byte_count % 16 != 0:
print(f"[{byte_count:0>5}] {hex(byte_count)[2:].upper():0>4}:")
print()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Generate hex dump of a file')
parser.add_argument('file', help='file path')
parser.add_argument('-c', '--control-picture',
help='use Unicode control pictures to represent ASCII control characters',
action='store_true')
args = parser.parse_args()
hexdump(filepath=args.file,
use_control_picture=args.control_picture,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment