Skip to content

Instantly share code, notes, and snippets.

@Steve-Tech
Last active November 12, 2024 21:32
Show Gist options
  • Save Steve-Tech/0f0e37785f3148b03fdb4ab96e4d27ff to your computer and use it in GitHub Desktop.
Save Steve-Tech/0f0e37785f3148b03fdb4ab96e4d27ff to your computer and use it in GitHub Desktop.
Codec Reader for HTML Video Tags, gets the correct codecs parameter for AV1 and H.264 videos.
import json
import mimetypes
import subprocess
import sys
# Codec Reader for HTML Video Tags by Steve-Tech
# Usage: python3 codec-reader.py [-d] file_name
# Requires ffmpeg and ffprobe to be installed.
#
# Supported Codecs:
# Video:
# AV1
# H.264
# Audio:
# AAC
#
debug = False
def main():
global debug
if len(sys.argv) > 1:
debug = "-d" in sys.argv
file_name = sys.argv[-1]
else:
file_name = input("File name: ")
print(get_type(file_name))
def get_codecs(file_name: str) -> tuple[str]:
"""
Returns a tuple of codecs found in the file.
Requires ffprobe to be installed.
"""
output = subprocess.check_output(
["ffprobe", "-of", "json", "-show_streams", file_name],
stderr=subprocess.DEVNULL,
)
codecs = tuple(stream["codec_name"] for stream in json.loads(output)["streams"])
if debug:
print("Found codecs:", codecs)
return codecs
def get_ffmpeg_headers(file_name: str) -> bytes:
"""
Returns the headers from ffmpeg.
Requires ffmpeg to be installed.
"""
return subprocess.run(
[
"ffmpeg",
"-i",
file_name,
"-c:v",
"copy",
"-bsf:v",
"trace_headers",
"-f",
"null",
"/dev/null",
],
stdin=subprocess.DEVNULL,
stderr=subprocess.PIPE,
).stderr
def get_type(file_name: str) -> str:
"""
Returns the mime type of the file including codecs.
"""
mime = mimetypes.guess_type(file_name)[0]
codecs = get_codecs(file_name)
headers = get_ffmpeg_headers(file_name)
type_codecs = []
for codec in codecs:
match codec:
case "av1":
type_codecs.append(get_type_av1(headers))
case "h264":
type_codecs.append(get_type_h264(headers))
case "aac":
type_codecs.append(get_type_aac(headers))
case _:
print(f"Unknown codec: {codec}")
return f"{mime}; codecs={','.join(type_codecs)}"
def str_chk(s, r: str | None = None) -> str:
"""
Returns a string if it is not None, otherwise returns the replacement or raises an error.
"""
if s is not None:
return str(s)
elif r is not None:
return r
else:
raise ValueError("Missing value")
def read_ffmpeg(headers: bytes, item: bytes) -> int | None:
"""
Returns the value of an item from the headers in ffmpeg.
None if the item is not found.
"""
index = headers.find(item)
if index == -1:
return None
stop = headers.index(b"\n", index)
start = headers.rindex(b" ", index, stop)
if debug:
print(headers[index:stop].decode())
return int(headers[start + 1 : stop])
# -------- Video Codecs --------
def get_type_av1(headers: bytes) -> str:
# Based on this: https://jakearchibald.com/2022/html-codecs-parameter-for-av1/
# av01.P.LLT.DD.M.CCC.cp.tc.mc.F
codec = ["av01"]
# P
codec.append(str_chk(read_ffmpeg(headers, b"seq_profile")))
# LLT
codec.append(
str_chk(read_ffmpeg(headers, b"seq_level_idx")).rjust(2, "0")
+ ("H" if read_ffmpeg(headers, b"seq_tier") else "M")
)
# DD
if read_ffmpeg(headers, b"high_bitdepth") == 1:
codec.append("12" if read_ffmpeg(headers, b"twelve_bit") == 1 else "10")
else:
codec.append("08")
# M
codec.append("1" if read_ffmpeg(headers, b"mono_chrome") == 1 else "0")
# CCC
if codec[4] == "1": # M = 1
codec.append("111")
elif codec[1] == "0": # P = 0
codec.append("110")
elif codec[1] == "1": # P = 1
codec.append("000")
elif codec[3] != "12": # DD != 12
codec.append("100")
else:
chroma_x = read_ffmpeg(headers, b"subsampling_x")
chroma_y = read_ffmpeg(headers, b"subsampling_y")
codec.append(f"{chroma_x if chroma_x else 0}{chroma_y if chroma_y else 0}0")
# cp, tc, mc
codec.append(str_chk(read_ffmpeg(headers, b"color_primaries"), "01").rjust(2, "0"))
codec.append(
str_chk(read_ffmpeg(headers, b"transfer_characteristics"), "01").rjust(2, "0")
)
codec.append(
str_chk(read_ffmpeg(headers, b"matrix_coefficients"), "01").rjust(2, "0")
)
# F
codec.append(str_chk(read_ffmpeg(headers, b"color_range"), "0"))
return ".".join(codec)
def get_type_h264(headers: bytes) -> str:
# Based on this: https://blog.pearce.org.nz/2013/11/what-does-h264avc1-codecs-parameters.html with knowledge from AV1
# avc1.PPCCLL
codec = ["avc1"]
# PP
codec.append("{:0>2x}".format(read_ffmpeg(headers, b"profile_idc")))
# CC
constraints = 0
for i in range(6):
constraints |= (
read_ffmpeg(headers, str.encode(f"constraint_set{i}_flag")) or 0
) << (8 - i)
codec.append("{:0>2x}".format(constraints))
# LL
codec.append("{:0>2x}".format(read_ffmpeg(headers, b"level_idc")))
return ".".join(codec)
# -------- Audio Codecs --------
def get_type_aac(headers: bytes) -> str:
# Source: https://developer.mozilla.org/en-US/docs/Web/Media/Formats/codecs_parameter
# mp4a.oo[.A]
# oo = 40 Audio Object
# A = 2 AAC LC (Profiles: Main, Scalable, HQ, LD v2, AAC, HE-AAC, HE-AAC v2)
return "mp4a.40.2"
# -------- Main --------
if __name__ == "__main__":
main()
@greg-randall
Copy link

greg-randall commented Oct 26, 2024

I really appreciate you creating this! I forked a version that has h265 hevc code. Maybe not as well tested as yours, but seems to work. https://gist.github.com/greg-randall/798fde2376f8dd632ffffb8a7aa4bdb8

@Steve-Tech
Copy link
Author

@greg-randall Oh wow thanks! You can copy my av1 & h264 code too, I pretty much consider most of my gists public domain, and I don't really intend on maintaining most of them.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment