anthwlock/atom_parser.py Secret

## atom_parser.py
#!/usr/bin/env python

# https://stackoverflow.com/a/18552833
# ISO/IEC 14496-12 - ISO base media file format
#   from: https://standards.iso.org/ittf/PubliclyAvailableStandards/index.html

from sys import argv
import heapq
import os

if len(argv) < 2:
    print("usage: <file.mp4>")
    exit(0)

to_enter = ['moov', 'trak', 'mdia', 'minf', 'stbl', 'edts']
#target = 'stsc'
#target = 'stts'
#target = 'stsz'
#target = 'elst'
target = 'ctts'
#trak_idx = 1
trak_idx = 1

if len(argv) > 2:
    trak_idx = int(argv[2])
print(f"using track_idx = {trak_idx}")
print()

to_name = {
    'stts' : 'Time-to-sample',
    'stsc' : 'Sample-to-chunk',
    'stco' : 'Chunk offset',
    'stss' : 'Sync sample',
    'stsz' : 'Sample size',
    'elst' : 'Edit List',
    'ctts' : 'Composition Offset',
}


f = open(argv[1], 'rb')

def read_utf(subj, n):
    try:
        name = f.read(n).decode('utf-8')
    except UnicodeDecodeError as e:
        print(f"bad {subj}: {e}")
        exit(1)
    return name

def read_len_and_name():
    length = int.from_bytes(f.read(4), byteorder='big') - 8
    name = read_utf('atom name', 4)
    if length == -7:
        length = int.from_bytes(f.read(8), byteorder='big') - 8
    return length, name

def skip(n):
    f.seek(n, os.SEEK_CUR)

cn = "?"
def parse_stsd(n):
    global cn
    skip(12)
    cn = read_utf('codec name', 4)
    skip(n-16)

def find_cn(cur_len):
    orig_off = f.tell()
    skip(cur_len)
    while cn == "?":
        proceed_atom()
    f.seek(orig_off)

lends = parent_ends = []
level = 0
def proceed_atom():
    global trak_idx, level

    if lends and lends[0] <= f.tell():
        heapq.heappop(lends)
        level -= 2

    l, n = read_len_and_name()
    print(f"{' '*level}{n} {l}")
    if l < 0:
        print(f"bad length: {l}")
        exit(1)

    if n == 'trak' and trak_idx:
        trak_idx -= 1
        skip(l)
    elif n == 'stsd':
        parse_stsd(l)
    elif n in to_enter:
        level += 2
        heapq.heappush(lends, f.tell() + l)
    elif n != target:
        skip(l)

    return l, n

while True:
    l, n = proceed_atom()
    if n == target:
        find_cn(l)
        break

def dump_stsc():
    skip(4)
    num_entries = int.from_bytes(f.read(4), byteorder='big')
    print(f"num_entries: {num_entries}")
    print(f"  i   first_chunk  sample_per_chunk  sample_descr_idx")
    for i in range(min(num_entries, 50)):
        first_chunk = int.from_bytes(f.read(4), byteorder='big')
        sample_per_chunk = int.from_bytes(f.read(4), byteorder='big')
        sample_descr_idx = int.from_bytes(f.read(4), byteorder='big')
        print(f"{i:>4} {first_chunk:>9} {sample_per_chunk:>15} {sample_descr_idx:>15}")

def dump_stts():
    skip(4)
    num_entries = int.from_bytes(f.read(4), byteorder='big')
    print(f"num_entries: {num_entries}")
    print(f"  i     n_samples       time/duration")
    for i in range(min(num_entries, 50)):
        n_samples = int.from_bytes(f.read(4), byteorder='big')
        time = int.from_bytes(f.read(4), byteorder='big')
        print(f"{i:>4} {n_samples:>9} {time:>17}")

def dump_stsz():
    skip(4)
    const_sz = int.from_bytes(f.read(4), byteorder='big')
    num_entries = int.from_bytes(f.read(4), byteorder='big')
    print(f"const_sz: {const_sz}")
    print(f"num_entries: {num_entries}")
    if const_sz:
        return
    print(f"  i     size")
    for i in range(min(num_entries, 50)):
        size = int.from_bytes(f.read(4), byteorder='big')
        print(f"{i:>4} {size:>9}")

def dump_elst():
    skip(4)
    num_entries = int.from_bytes(f.read(4), byteorder='big')
    print(f"num_entries: {num_entries}")
    print(f" i   track-dur    media-time    media-rate")
    for i in range(min(num_entries, 50)):
        tr_dur = int.from_bytes(f.read(4), byteorder='big')
        me_time = int.from_bytes(f.read(4), byteorder='big')
        me_rate = int.from_bytes(f.read(4), byteorder='big')
        print(f"{i:>3}  {tr_dur:>9} {me_time:>13} {me_rate:>9}")

def dump_ctts():
    ver = int.from_bytes(f.read(1), byteorder='big')
    print(f"ver: {ver}")
    skip(3)
    num_entries = int.from_bytes(f.read(4), byteorder='big')
    print(f"num_entries: {num_entries}")
    print(f" i  SampleIdx  SampleCount    compositionOffset")
    total_cnt = 0
    pairs = []
    for i in range(min(num_entries, 500)):
        sample_cnt = int.from_bytes(f.read(4), byteorder='big')
        comp_off = int.from_bytes(f.read(4), byteorder='big')
        total_cnt += sample_cnt
        pairs.append((sample_cnt, comp_off))
        print(f"{i:>3} {total_cnt:>9} {sample_cnt:>9} {comp_off:>13}")
    print(f"{total_cnt = }")
    print(pairs)

print(f"found '{n}' (= {to_name[n]}) in '{cn}'-track  // at {f.tell()}\n")

if target == "stsc": dump_stsc()
elif target == "stts": dump_stts()
elif target == "stsz": dump_stsz()
elif target == "elst": dump_elst()
elif target == "ctts": dump_ctts()
	#!/usr/bin/env python

	# https://stackoverflow.com/a/18552833
	# ISO/IEC 14496-12 - ISO base media file format
	# from: https://standards.iso.org/ittf/PubliclyAvailableStandards/index.html

	from sys import argv
	import heapq
	import os

	if len(argv) < 2:
	print("usage: <file.mp4>")
	exit(0)

	to_enter = ['moov', 'trak', 'mdia', 'minf', 'stbl', 'edts']
	#target = 'stsc'
	#target = 'stts'
	#target = 'stsz'
	#target = 'elst'
	target = 'ctts'
	#trak_idx = 1
	trak_idx = 1

	if len(argv) > 2:
	trak_idx = int(argv[2])
	print(f"using track_idx = {trak_idx}")
	print()

	to_name = {
	'stts' : 'Time-to-sample',
	'stsc' : 'Sample-to-chunk',
	'stco' : 'Chunk offset',
	'stss' : 'Sync sample',
	'stsz' : 'Sample size',
	'elst' : 'Edit List',
	'ctts' : 'Composition Offset',
	}


	f = open(argv[1], 'rb')

	def read_utf(subj, n):
	try:
	name = f.read(n).decode('utf-8')
	except UnicodeDecodeError as e:
	print(f"bad {subj}: {e}")
	exit(1)
	return name

	def read_len_and_name():
	length = int.from_bytes(f.read(4), byteorder='big') - 8
	name = read_utf('atom name', 4)
	if length == -7:
	length = int.from_bytes(f.read(8), byteorder='big') - 8
	return length, name

	def skip(n):
	f.seek(n, os.SEEK_CUR)

	cn = "?"
	def parse_stsd(n):
	global cn
	skip(12)
	cn = read_utf('codec name', 4)
	skip(n-16)

	def find_cn(cur_len):
	orig_off = f.tell()
	skip(cur_len)
	while cn == "?":
	proceed_atom()
	f.seek(orig_off)

	lends = parent_ends = []
	level = 0
	def proceed_atom():
	global trak_idx, level

	if lends and lends[0] <= f.tell():
	heapq.heappop(lends)
	level -= 2

	l, n = read_len_and_name()
	print(f"{' '*level}{n} {l}")
	if l < 0:
	print(f"bad length: {l}")
	exit(1)

	if n == 'trak' and trak_idx:
	trak_idx -= 1
	skip(l)
	elif n == 'stsd':
	parse_stsd(l)
	elif n in to_enter:
	level += 2
	heapq.heappush(lends, f.tell() + l)
	elif n != target:
	skip(l)

	return l, n

	while True:
	l, n = proceed_atom()
	if n == target:
	find_cn(l)
	break

	def dump_stsc():
	skip(4)
	num_entries = int.from_bytes(f.read(4), byteorder='big')
	print(f"num_entries: {num_entries}")
	print(f" i first_chunk sample_per_chunk sample_descr_idx")
	for i in range(min(num_entries, 50)):
	first_chunk = int.from_bytes(f.read(4), byteorder='big')
	sample_per_chunk = int.from_bytes(f.read(4), byteorder='big')
	sample_descr_idx = int.from_bytes(f.read(4), byteorder='big')
	print(f"{i:>4} {first_chunk:>9} {sample_per_chunk:>15} {sample_descr_idx:>15}")

	def dump_stts():
	skip(4)
	num_entries = int.from_bytes(f.read(4), byteorder='big')
	print(f"num_entries: {num_entries}")
	print(f" i n_samples time/duration")
	for i in range(min(num_entries, 50)):
	n_samples = int.from_bytes(f.read(4), byteorder='big')
	time = int.from_bytes(f.read(4), byteorder='big')
	print(f"{i:>4} {n_samples:>9} {time:>17}")

	def dump_stsz():
	skip(4)
	const_sz = int.from_bytes(f.read(4), byteorder='big')
	num_entries = int.from_bytes(f.read(4), byteorder='big')
	print(f"const_sz: {const_sz}")
	print(f"num_entries: {num_entries}")
	if const_sz:
	return
	print(f" i size")
	for i in range(min(num_entries, 50)):
	size = int.from_bytes(f.read(4), byteorder='big')
	print(f"{i:>4} {size:>9}")

	def dump_elst():
	skip(4)
	num_entries = int.from_bytes(f.read(4), byteorder='big')
	print(f"num_entries: {num_entries}")
	print(f" i track-dur media-time media-rate")
	for i in range(min(num_entries, 50)):
	tr_dur = int.from_bytes(f.read(4), byteorder='big')
	me_time = int.from_bytes(f.read(4), byteorder='big')
	me_rate = int.from_bytes(f.read(4), byteorder='big')
	print(f"{i:>3} {tr_dur:>9} {me_time:>13} {me_rate:>9}")

	def dump_ctts():
	ver = int.from_bytes(f.read(1), byteorder='big')
	print(f"ver: {ver}")
	skip(3)
	num_entries = int.from_bytes(f.read(4), byteorder='big')
	print(f"num_entries: {num_entries}")
	print(f" i SampleIdx SampleCount compositionOffset")
	total_cnt = 0
	pairs = []
	for i in range(min(num_entries, 500)):
	sample_cnt = int.from_bytes(f.read(4), byteorder='big')
	comp_off = int.from_bytes(f.read(4), byteorder='big')
	total_cnt += sample_cnt
	pairs.append((sample_cnt, comp_off))
	print(f"{i:>3} {total_cnt:>9} {sample_cnt:>9} {comp_off:>13}")
	print(f"{total_cnt = }")
	print(pairs)

	print(f"found '{n}' (= {to_name[n]}) in '{cn}'-track // at {f.tell()}\n")

	if target == "stsc": dump_stsc()
	elif target == "stts": dump_stts()
	elif target == "stsz": dump_stsz()
	elif target == "elst": dump_elst()
	elif target == "ctts": dump_ctts()