jakekara/png_structure.py

## png_structure.py
# Exploration in PNG
#
# Just poking through the PNG file structure.
#
# Found weird output when examining MacOS screenshots. They contain an
# "iDOT" chunk, the purpose of which I couldn't determine. This chunk type
# is not part of the PNG standard, and seems to break some programs'
# ability to process these files, according to complaints I'm finding on
# Google. See sample output below. If no one's already done it, maybe there
# is some value in writing e script to strip out these invalid iDOT chunks.

import sys, struct

filename = sys.argv[1]
fh = open(filename, 'rb')

chunksize = 1

# The first eight bits on a PNG are always these:
first_eight = [137,80,78,71,13,10,26,10]
valid_count = 0;
i = 0
while i < 8:
    first_byte = fh.read(chunksize)
    val = struct.unpack('B',first_byte)[0]
    if first_eight[i] == val:
        valid_count += 1
    else:
        print "ERROR: Invalid PNG signature"
        exit(-1)
    i += 1;

if valid_count == 8:
    print "Valid PNG signature"

# Process the image header chunk to output some useful file info.
# I didn't make special functions for any other chunk type
def process_ihdr():
    # The ! is necessary because PNGs being Portable "NETWORK" Graphics use
    # network byte ordering (which is big-endian).
    width = struct.unpack('!I', fh.read(4))[0]
    print "width: ", width
    height = struct.unpack('!I', fh.read(4))[0]
    print "height: ", height
    bit_depth = struct.unpack('B', fh.read(1))[0]
    colour_type = struct.unpack('B', fh.read(1))[0]
    compression_method = struct.unpack('B', fh.read(1))[0]
    filter_method = struct.unpack('B', fh.read(1))[0]
    interlace_method = struct.unpack('B', fh.read(1))[0]
    print "bit_depth: ", bit_depth
    print "colour_type: ", colour_type
    print "compression_method: ", compression_method
    print "filter_method: ", filter_method
    print "interlace_method: ", interlace_method
    fh.read(4)

# Process all the chunks and print a summary of each.
idats_found = 0
idats_bytes = 0
while (True):
    chlen = fh.read(4)

    if chlen == "":
        break

    chunk_length = struct.unpack('!I',chlen)[0]

    chunk_type = ""
    for i in range(4):
        chunk_type += (struct.unpack('c',fh.read(1))[0])

    # Instead of printing the same report for each IDAT chunk, just
    # print a summary at the end, to reduce flooding your console
    if (chunk_type != "IDAT"):
        print "=" * 20
        print "chunk_length: " + str(chunk_length)
        print "chunk_type: ", chunk_type
    else:
        idats_found += 1
        idats_bytes += chunk_length

    if (chunk_type == "IHDR"):
        process_ihdr()
    else:
        data = fh.read(chunk_length)
        crc = fh.read(4)

print "=" * 20
print "Found a total of " + str(idats_found) + " IDAT chunks consuming " + str(idats_bytes) + " bytes."

# Close the file
fh.close()

# Sample weird output from MacOS screenshot containing non-standard iDOT
# chunk. Don't know what it's there for.

# $ python png_structure.py MY_SCREENSHOT.png
# Valid PNG signature
# ====================
# chunk_length: 13
# chunk_type:  IHDR
# width:  2818
# height:  2328
# bit_depth:  8
# colour_type:  6
# compression_method:  0
# filter_method:  0
# interlace_method:  0
# ====================
# chunk_length: 2728
# chunk_type:  iCCP
# ====================
# chunk_length: 9
# chunk_type:  pHYs
# ====================
# chunk_length: 415
# chunk_type:  iTXt
# ====================
# chunk_length: 28
# chunk_type:  iDOT
# ====================
# chunk_length: 0
# chunk_type:  IEND
# ====================
# Found a total of 642 IDAT chunks consuming 10512756 bytes.
	# Exploration in PNG
	#
	# Just poking through the PNG file structure.
	#
	# Found weird output when examining MacOS screenshots. They contain an
	# "iDOT" chunk, the purpose of which I couldn't determine. This chunk type
	# is not part of the PNG standard, and seems to break some programs'
	# ability to process these files, according to complaints I'm finding on
	# Google. See sample output below. If no one's already done it, maybe there
	# is some value in writing e script to strip out these invalid iDOT chunks.

	import sys, struct

	filename = sys.argv[1]
	fh = open(filename, 'rb')

	chunksize = 1

	# The first eight bits on a PNG are always these:
	first_eight = [137,80,78,71,13,10,26,10]
	valid_count = 0;
	i = 0
	while i < 8:
	first_byte = fh.read(chunksize)
	val = struct.unpack('B',first_byte)[0]
	if first_eight[i] == val:
	valid_count += 1
	else:
	print "ERROR: Invalid PNG signature"
	exit(-1)
	i += 1;

	if valid_count == 8:
	print "Valid PNG signature"

	# Process the image header chunk to output some useful file info.
	# I didn't make special functions for any other chunk type
	def process_ihdr():
	# The ! is necessary because PNGs being Portable "NETWORK" Graphics use
	# network byte ordering (which is big-endian).
	width = struct.unpack('!I', fh.read(4))[0]
	print "width: ", width
	height = struct.unpack('!I', fh.read(4))[0]
	print "height: ", height
	bit_depth = struct.unpack('B', fh.read(1))[0]
	colour_type = struct.unpack('B', fh.read(1))[0]
	compression_method = struct.unpack('B', fh.read(1))[0]
	filter_method = struct.unpack('B', fh.read(1))[0]
	interlace_method = struct.unpack('B', fh.read(1))[0]
	print "bit_depth: ", bit_depth
	print "colour_type: ", colour_type
	print "compression_method: ", compression_method
	print "filter_method: ", filter_method
	print "interlace_method: ", interlace_method
	fh.read(4)

	# Process all the chunks and print a summary of each.
	idats_found = 0
	idats_bytes = 0
	while (True):
	chlen = fh.read(4)

	if chlen == "":
	break

	chunk_length = struct.unpack('!I',chlen)[0]

	chunk_type = ""
	for i in range(4):
	chunk_type += (struct.unpack('c',fh.read(1))[0])

	# Instead of printing the same report for each IDAT chunk, just
	# print a summary at the end, to reduce flooding your console
	if (chunk_type != "IDAT"):
	print "=" * 20
	print "chunk_length: " + str(chunk_length)
	print "chunk_type: ", chunk_type
	else:
	idats_found += 1
	idats_bytes += chunk_length

	if (chunk_type == "IHDR"):
	process_ihdr()
	else:
	data = fh.read(chunk_length)
	crc = fh.read(4)

	print "=" * 20
	print "Found a total of " + str(idats_found) + " IDAT chunks consuming " + str(idats_bytes) + " bytes."

	# Close the file
	fh.close()

	# Sample weird output from MacOS screenshot containing non-standard iDOT
	# chunk. Don't know what it's there for.

	# $ python png_structure.py MY_SCREENSHOT.png
	# Valid PNG signature
	# ====================
	# chunk_length: 13
	# chunk_type: IHDR
	# width: 2818
	# height: 2328
	# bit_depth: 8
	# colour_type: 6
	# compression_method: 0
	# filter_method: 0
	# interlace_method: 0
	# ====================
	# chunk_length: 2728
	# chunk_type: iCCP
	# ====================
	# chunk_length: 9
	# chunk_type: pHYs
	# ====================
	# chunk_length: 415
	# chunk_type: iTXt
	# ====================
	# chunk_length: 28
	# chunk_type: iDOT
	# ====================
	# chunk_length: 0
	# chunk_type: IEND
	# ====================
	# Found a total of 642 IDAT chunks consuming 10512756 bytes.