Skip to content

Instantly share code, notes, and snippets.

@jakekara
Created December 12, 2016 00:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jakekara/8e667b7d31350692f7439cb5d0324d05 to your computer and use it in GitHub Desktop.
Save jakekara/8e667b7d31350692f7439cb5d0324d05 to your computer and use it in GitHub Desktop.
Just some idle poking around inside the PNG file format with python. Ugly code, maybe-useful comments.
# Exploration in PNG
#
# Just poking through the PNG file structure.
#
# Found weird output when examining MacOS screenshots. They contain an
# "iDOT" chunk, the purpose of which I couldn't determine. This chunk type
# is not part of the PNG standard, and seems to break some programs'
# ability to process these files, according to complaints I'm finding on
# Google. See sample output below. If no one's already done it, maybe there
# is some value in writing e script to strip out these invalid iDOT chunks.
import sys, struct
filename = sys.argv[1]
fh = open(filename, 'rb')
chunksize = 1
# The first eight bits on a PNG are always these:
first_eight = [137,80,78,71,13,10,26,10]
valid_count = 0;
i = 0
while i < 8:
first_byte = fh.read(chunksize)
val = struct.unpack('B',first_byte)[0]
if first_eight[i] == val:
valid_count += 1
else:
print "ERROR: Invalid PNG signature"
exit(-1)
i += 1;
if valid_count == 8:
print "Valid PNG signature"
# Process the image header chunk to output some useful file info.
# I didn't make special functions for any other chunk type
def process_ihdr():
# The ! is necessary because PNGs being Portable "NETWORK" Graphics use
# network byte ordering (which is big-endian).
width = struct.unpack('!I', fh.read(4))[0]
print "width: ", width
height = struct.unpack('!I', fh.read(4))[0]
print "height: ", height
bit_depth = struct.unpack('B', fh.read(1))[0]
colour_type = struct.unpack('B', fh.read(1))[0]
compression_method = struct.unpack('B', fh.read(1))[0]
filter_method = struct.unpack('B', fh.read(1))[0]
interlace_method = struct.unpack('B', fh.read(1))[0]
print "bit_depth: ", bit_depth
print "colour_type: ", colour_type
print "compression_method: ", compression_method
print "filter_method: ", filter_method
print "interlace_method: ", interlace_method
fh.read(4)
# Process all the chunks and print a summary of each.
idats_found = 0
idats_bytes = 0
while (True):
chlen = fh.read(4)
if chlen == "":
break
chunk_length = struct.unpack('!I',chlen)[0]
chunk_type = ""
for i in range(4):
chunk_type += (struct.unpack('c',fh.read(1))[0])
# Instead of printing the same report for each IDAT chunk, just
# print a summary at the end, to reduce flooding your console
if (chunk_type != "IDAT"):
print "=" * 20
print "chunk_length: " + str(chunk_length)
print "chunk_type: ", chunk_type
else:
idats_found += 1
idats_bytes += chunk_length
if (chunk_type == "IHDR"):
process_ihdr()
else:
data = fh.read(chunk_length)
crc = fh.read(4)
print "=" * 20
print "Found a total of " + str(idats_found) + " IDAT chunks consuming " + str(idats_bytes) + " bytes."
# Close the file
fh.close()
# Sample weird output from MacOS screenshot containing non-standard iDOT
# chunk. Don't know what it's there for.
# $ python png_structure.py MY_SCREENSHOT.png
# Valid PNG signature
# ====================
# chunk_length: 13
# chunk_type: IHDR
# width: 2818
# height: 2328
# bit_depth: 8
# colour_type: 6
# compression_method: 0
# filter_method: 0
# interlace_method: 0
# ====================
# chunk_length: 2728
# chunk_type: iCCP
# ====================
# chunk_length: 9
# chunk_type: pHYs
# ====================
# chunk_length: 415
# chunk_type: iTXt
# ====================
# chunk_length: 28
# chunk_type: iDOT
# ====================
# chunk_length: 0
# chunk_type: IEND
# ====================
# Found a total of 642 IDAT chunks consuming 10512756 bytes.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment