Skip to content

Instantly share code, notes, and snippets.

@Steve-Tech
Last active February 12, 2024 12:35
Show Gist options
  • Save Steve-Tech/0f0e37785f3148b03fdb4ab96e4d27ff to your computer and use it in GitHub Desktop.
Save Steve-Tech/0f0e37785f3148b03fdb4ab96e4d27ff to your computer and use it in GitHub Desktop.
Codec Reader for HTML Video Tags, gets the correct codecs parameter for AV1 and H.264 videos.
import json
import mimetypes
import subprocess
import sys
# Codec Reader for HTML Video Tags by Steve-Tech
# Usage: python3 codec-reader.py [-d] file_name
# Requires ffmpeg and ffprobe to be installed.
#
# Supported Codecs:
# Video:
# AV1
# H.264
# Audio:
# AAC
#
debug = False
def main():
global debug
if len(sys.argv) > 1:
debug = "-d" in sys.argv
file_name = sys.argv[-1]
else:
file_name = input("File name: ")
print(get_type(file_name))
def get_codecs(file_name: str) -> tuple[str]:
"""
Returns a tuple of codecs found in the file.
Requires ffprobe to be installed.
"""
output = subprocess.check_output(
["ffprobe", "-of", "json", "-show_streams", file_name],
stderr=subprocess.DEVNULL,
)
codecs = tuple(stream["codec_name"] for stream in json.loads(output)["streams"])
if debug:
print("Found codecs:", codecs)
return codecs
def get_ffmpeg_headers(file_name: str) -> bytes:
"""
Returns the headers from ffmpeg.
Requires ffmpeg to be installed.
"""
return subprocess.run(
[
"ffmpeg",
"-i",
file_name,
"-c:v",
"copy",
"-bsf:v",
"trace_headers",
"-f",
"null",
"/dev/null",
],
stdin=subprocess.DEVNULL,
stderr=subprocess.PIPE,
).stderr
def get_type(file_name: str) -> str:
"""
Returns the mime type of the file including codecs.
"""
mime = mimetypes.guess_type(file_name)[0]
codecs = get_codecs(file_name)
headers = get_ffmpeg_headers(file_name)
type_codecs = []
for codec in codecs:
match codec:
case "av1":
type_codecs.append(get_type_av1(headers))
case "h264":
type_codecs.append(get_type_h264(headers))
case "aac":
type_codecs.append(get_type_aac(headers))
case _:
print(f"Unknown codec: {codec}")
return f"{mime}; codecs={','.join(type_codecs)}"
def str_chk(s, r: str | None = None) -> str:
"""
Returns a string if it is not None, otherwise returns the replacement or raises an error.
"""
if s is not None:
return str(s)
elif r is not None:
return r
else:
raise ValueError("Missing value")
def read_ffmpeg(headers: bytes, item: bytes) -> int | None:
"""
Returns the value of an item from the headers in ffmpeg.
None if the item is not found.
"""
index = headers.find(item)
if index == -1:
return None
stop = headers.index(b"\n", index)
start = headers.rindex(b" ", index, stop)
if debug:
print(headers[index:stop].decode())
return int(headers[start + 1 : stop])
# -------- Video Codecs --------
def get_type_av1(headers: bytes) -> str:
# Based on this: https://jakearchibald.com/2022/html-codecs-parameter-for-av1/
# av01.P.LLT.DD.M.CCC.cp.tc.mc.F
codec = ["av01"]
# P
codec.append(str_chk(read_ffmpeg(headers, b"seq_profile")))
# LLT
codec.append(
str_chk(read_ffmpeg(headers, b"seq_level_idx")).rjust(2, "0")
+ ("H" if read_ffmpeg(headers, b"seq_tier") else "M")
)
# DD
if read_ffmpeg(headers, b"high_bitdepth") == 1:
codec.append("12" if read_ffmpeg(headers, b"twelve_bit") == 1 else "10")
else:
codec.append("08")
# M
codec.append("1" if read_ffmpeg(headers, b"mono_chrome") == 1 else "0")
# CCC
if codec[4] == "1": # M = 1
codec.append("111")
elif codec[1] == "0": # P = 0
codec.append("110")
elif codec[1] == "1": # P = 1
codec.append("000")
elif codec[3] != "12": # DD != 12
codec.append("100")
else:
chroma_x = read_ffmpeg(headers, b"subsampling_x")
chroma_y = read_ffmpeg(headers, b"subsampling_y")
codec.append(f"{chroma_x if chroma_x else 0}{chroma_y if chroma_y else 0}0")
# cp, tc, mc
codec.append(str_chk(read_ffmpeg(headers, b"color_primaries"), "01").rjust(2, "0"))
codec.append(
str_chk(read_ffmpeg(headers, b"transfer_characteristics"), "01").rjust(2, "0")
)
codec.append(
str_chk(read_ffmpeg(headers, b"matrix_coefficients"), "01").rjust(2, "0")
)
# F
codec.append(str_chk(read_ffmpeg(headers, b"color_range"), "0"))
return ".".join(codec)
def get_type_h264(headers: bytes) -> str:
# Based on this: https://blog.pearce.org.nz/2013/11/what-does-h264avc1-codecs-parameters.html with knowledge from AV1
# avc1.PPCCLL
codec = ["avc1"]
# PP
codec.append("{:0>2x}".format(read_ffmpeg(headers, b"profile_idc")))
# CC
constraints = 0
for i in range(6):
constraints |= (
read_ffmpeg(headers, str.encode(f"constraint_set{i}_flag")) or 0
) << (8 - i)
codec.append("{:0>2x}".format(constraints))
# LL
codec.append("{:0>2x}".format(read_ffmpeg(headers, b"level_idc")))
return ".".join(codec)
# -------- Audio Codecs --------
def get_type_aac(headers: bytes) -> str:
# Source: https://developer.mozilla.org/en-US/docs/Web/Media/Formats/codecs_parameter
# mp4a.oo[.A]
# oo = 40 Audio Object
# A = 2 AAC LC (Profiles: Main, Scalable, HQ, LD v2, AAC, HE-AAC, HE-AAC v2)
return "mp4a.40.2"
# -------- Main --------
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment