Created
July 20, 2022 20:16
-
-
Save himynamesdave/6b7261ecfe389e54789f221a7375b0dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Copyright 2016 Google Inc. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Utilities for examining/injecting spatial media metadata in MP4/MOV files.""" | |
import collections | |
import os | |
import re | |
import struct | |
import traceback | |
import xml.etree | |
import xml.etree.ElementTree | |
from spatialmedia import mpeg | |
MPEG_FILE_EXTENSIONS = [".mp4", ".mov"] | |
SPHERICAL_UUID_ID = ( | |
b"\xff\xcc\x82\x63\xf8\x55\x4a\x93\x88\x14\x58\x7a\x02\x52\x1f\xdd") | |
# XML contents. | |
RDF_PREFIX = " xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" " | |
SPHERICAL_XML_HEADER = \ | |
"<?xml version=\"1.0\"?>"\ | |
"<rdf:SphericalVideo\n"\ | |
"xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"\ | |
"xmlns:GSpherical=\"http://ns.google.com/videos/1.0/spherical/\">" | |
SPHERICAL_XML_CONTENTS = \ | |
"<GSpherical:Spherical>true</GSpherical:Spherical>"\ | |
"<GSpherical:Stitched>true</GSpherical:Stitched>"\ | |
"<GSpherical:StitchingSoftware>"\ | |
"Spherical Metadata Tool"\ | |
"</GSpherical:StitchingSoftware>"\ | |
"<GSpherical:ProjectionType>equirectangular</GSpherical:ProjectionType>"\ | |
"<GSpherical:InitialViewHeadingDegrees>180</GSpherical:InitialViewHeadingDegrees>" | |
SPHERICAL_XML_CONTENTS_TOP_BOTTOM = \ | |
"<GSpherical:StereoMode>top-bottom</GSpherical:StereoMode>" | |
SPHERICAL_XML_CONTENTS_LEFT_RIGHT = \ | |
"<GSpherical:StereoMode>left-right</GSpherical:StereoMode>" | |
# Parameter order matches that of the crop option. | |
SPHERICAL_XML_CONTENTS_CROP_FORMAT = \ | |
"<GSpherical:CroppedAreaImageWidthPixels>{0}"\ | |
"</GSpherical:CroppedAreaImageWidthPixels>"\ | |
"<GSpherical:CroppedAreaImageHeightPixels>{1}"\ | |
"</GSpherical:CroppedAreaImageHeightPixels>"\ | |
"<GSpherical:FullPanoWidthPixels>{2}</GSpherical:FullPanoWidthPixels>"\ | |
"<GSpherical:FullPanoHeightPixels>{3}</GSpherical:FullPanoHeightPixels>"\ | |
"<GSpherical:CroppedAreaLeftPixels>{4}</GSpherical:CroppedAreaLeftPixels>"\ | |
"<GSpherical:CroppedAreaTopPixels>{5}</GSpherical:CroppedAreaTopPixels>" | |
SPHERICAL_XML_FOOTER = "</rdf:SphericalVideo>" | |
SPHERICAL_TAGS_LIST = [ | |
"Spherical", | |
"Stitched", | |
"StitchingSoftware", | |
"ProjectionType", | |
"SourceCount", | |
"StereoMode", | |
"InitialViewHeadingDegrees", | |
"InitialViewPitchDegrees", | |
"InitialViewRollDegrees", | |
"Timestamp", | |
"CroppedAreaImageWidthPixels", | |
"CroppedAreaImageHeightPixels", | |
"FullPanoWidthPixels", | |
"FullPanoHeightPixels", | |
"CroppedAreaLeftPixels", | |
"CroppedAreaTopPixels", | |
] | |
class Metadata(object): | |
def __init__(self): | |
self.video = None | |
self.audio = None | |
class ParsedMetadata(object): | |
def __init__(self): | |
self.video = dict() | |
self.audio = None | |
self.num_audio_channels = 0 | |
SPHERICAL_PREFIX = "{http://ns.google.com/videos/1.0/spherical/}" | |
SPHERICAL_TAGS = dict() | |
for tag in SPHERICAL_TAGS_LIST: | |
SPHERICAL_TAGS[SPHERICAL_PREFIX + tag] = tag | |
integer_regex_group = "(\d+)" | |
crop_regex = "^{0}$".format(":".join([integer_regex_group] * 6)) | |
MAX_SUPPORTED_AMBIX_ORDER = 1 | |
SpatialAudioDescription = collections.namedtuple( | |
'SpatialAudioDescription', | |
'order is_supported has_head_locked_stereo') | |
def get_spatial_audio_description(num_channels): | |
for i in range(1, MAX_SUPPORTED_AMBIX_ORDER+1): | |
if (i + 1)*(i + 1) == num_channels: | |
return SpatialAudioDescription( | |
order=i, is_supported=True, has_head_locked_stereo=False) | |
elif ((i + 1)*(i + 1) + 2) == num_channels: | |
return SpatialAudioDescription( | |
order=i, is_supported=True, has_head_locked_stereo=True) | |
return SpatialAudioDescription( | |
order=-1, is_supported=False, has_head_locked_stereo=True) | |
def spherical_uuid(metadata): | |
"""Constructs a uuid containing spherical metadata. | |
Args: | |
metadata: String, xml to inject in spherical tag. | |
Returns: | |
uuid_leaf: a box containing spherical metadata. | |
""" | |
uuid_leaf = mpeg.Box() | |
assert(len(SPHERICAL_UUID_ID) == 16) | |
uuid_leaf.name = mpeg.constants.TAG_UUID | |
uuid_leaf.header_size = 8 | |
uuid_leaf.content_size = 0 | |
uuid_leaf.contents = SPHERICAL_UUID_ID + metadata.encode("utf-8") | |
uuid_leaf.content_size = len(uuid_leaf.contents) | |
return uuid_leaf | |
def mpeg4_add_spherical(mpeg4_file, in_fh, metadata): | |
"""Adds a spherical uuid box to an mpeg4 file for all video tracks. | |
Args: | |
mpeg4_file: mpeg4, Mpeg4 file structure to add metadata. | |
in_fh: file handle, Source for uncached file contents. | |
metadata: string, xml metadata to inject into spherical tag. | |
""" | |
for element in mpeg4_file.moov_box.contents: | |
if element.name == mpeg.constants.TAG_TRAK: | |
added = False | |
element.remove(mpeg.constants.TAG_UUID) | |
for sub_element in element.contents: | |
if sub_element.name != mpeg.constants.TAG_MDIA: | |
continue | |
for mdia_sub_element in sub_element.contents: | |
if mdia_sub_element.name != mpeg.constants.TAG_HDLR: | |
continue | |
position = mdia_sub_element.content_start() + 8 | |
in_fh.seek(position) | |
if in_fh.read(4) == mpeg.constants.TRAK_TYPE_VIDE: | |
added = True | |
break | |
if added: | |
if not element.add(spherical_uuid(metadata)): | |
return False | |
break | |
mpeg4_file.resize() | |
return True | |
def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console): | |
"""Adds spatial audio metadata to the first audio track of the input | |
mpeg4_file. Returns False on failure. | |
Args: | |
mpeg4_file: mpeg4, Mpeg4 file structure to add metadata. | |
in_fh: file handle, Source for uncached file contents. | |
audio_metadata: dictionary ('ambisonic_type': string, | |
'ambisonic_order': int, 'head_locked_stereo': Bool), | |
Supports 'periphonic' ambisonic type only. | |
""" | |
for element in mpeg4_file.moov_box.contents: | |
if element.name == mpeg.constants.TAG_TRAK: | |
for sub_element in element.contents: | |
if sub_element.name != mpeg.constants.TAG_MDIA: | |
continue | |
for mdia_sub_element in sub_element.contents: | |
if mdia_sub_element.name != mpeg.constants.TAG_HDLR: | |
continue | |
position = mdia_sub_element.content_start() + 8 | |
in_fh.seek(position) | |
if in_fh.read(4) == mpeg.constants.TAG_SOUN: | |
return inject_spatial_audio_atom( | |
in_fh, sub_element, audio_metadata, console) | |
return True | |
def mpeg4_add_audio_metadata(mpeg4_file, in_fh, audio_metadata, console): | |
num_audio_tracks = get_num_audio_tracks(mpeg4_file, in_fh) | |
if num_audio_tracks > 1: | |
console("Error: Expected 1 audio track. Found %d" % num_audio_tracks) | |
return False | |
return mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console) | |
def inject_spatial_audio_atom( | |
in_fh, audio_media_atom, audio_metadata, console): | |
for atom in audio_media_atom.contents: | |
if atom.name != mpeg.constants.TAG_MINF: | |
continue | |
for element in atom.contents: | |
if element.name != mpeg.constants.TAG_STBL: | |
continue | |
for sub_element in element.contents: | |
if sub_element.name != mpeg.constants.TAG_STSD: | |
continue | |
for sample_description in sub_element.contents: | |
if sample_description.name in\ | |
mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS: | |
in_fh.seek(sample_description.position + | |
sample_description.header_size + 16) | |
num_channels = get_num_audio_channels( | |
sub_element, in_fh) | |
expected_num_channels = \ | |
get_expected_num_audio_channels( | |
audio_metadata["ambisonic_type"], | |
audio_metadata["ambisonic_order"], | |
audio_metadata["head_locked_stereo"]) | |
if num_channels != expected_num_channels: | |
head_locked_stereo_msg = (" with head-locked stereo" if | |
audio_metadata["head_locked_stereo"] else "") | |
err_msg = "Error: Found %d audio channel(s). "\ | |
"Expected %d channel(s) for %s ambisonics "\ | |
"of order %d%s."\ | |
% (num_channels, | |
expected_num_channels, | |
audio_metadata["ambisonic_type"], | |
audio_metadata["ambisonic_order"], | |
head_locked_stereo_msg) | |
console(err_msg) | |
return False | |
sa3d_atom = mpeg.SA3DBox.create( | |
num_channels, audio_metadata) | |
sample_description.contents.append(sa3d_atom) | |
return True | |
def parse_spherical_xml(contents, console): | |
"""Returns spherical metadata for a set of xml data. | |
Args: | |
contents: string, spherical metadata xml contents. | |
Returns: | |
dictionary containing the parsed spherical metadata values. | |
""" | |
try: | |
parsed_xml = xml.etree.ElementTree.XML(contents) | |
except xml.etree.ElementTree.ParseError: | |
try: | |
console(traceback.format_exc()) | |
console(contents) | |
index = contents.find("<rdf:SphericalVideo") | |
if index != -1: | |
index += len("<rdf:SphericalVideo") | |
contents = contents[:index] + RDF_PREFIX + contents[index:] | |
parsed_xml = xml.etree.ElementTree.XML(contents) | |
console("\t\tWarning missing rdf prefix:", RDF_PREFIX) | |
except xml.etree.ElementTree.ParseError as e: | |
console("\t\tParser Error on XML") | |
console(traceback.format_exc()) | |
console(contents) | |
return | |
sphericalDictionary = dict() | |
for child in parsed_xml.getchildren(): | |
if child.tag in SPHERICAL_TAGS.keys(): | |
console("\t\t" + SPHERICAL_TAGS[child.tag] | |
+ " = " + child.text) | |
sphericalDictionary[SPHERICAL_TAGS[child.tag]] = child.text | |
else: | |
tag = child.tag | |
if child.tag[:len(spherical_prefix)] == spherical_prefix: | |
tag = child.tag[len(spherical_prefix):] | |
console("\t\tUnknown: " + tag + " = " + child.text) | |
return sphericalDictionary | |
def parse_spherical_mpeg4(mpeg4_file, fh, console): | |
"""Returns spherical metadata for a loaded mpeg4 file. | |
Args: | |
mpeg4_file: mpeg4, loaded mpeg4 file contents. | |
fh: file handle, file handle for uncached file contents. | |
Returns: | |
Dictionary stored as (trackName, metadataDictionary) | |
""" | |
metadata = ParsedMetadata() | |
track_num = 0 | |
for element in mpeg4_file.moov_box.contents: | |
if element.name == mpeg.constants.TAG_TRAK: | |
trackName = "Track %d" % track_num | |
console("\t%s" % trackName) | |
track_num += 1 | |
for sub_element in element.contents: | |
if sub_element.name == mpeg.constants.TAG_UUID: | |
if sub_element.contents: | |
sub_element_id = sub_element.contents[:16] | |
else: | |
fh.seek(sub_element.content_start()) | |
sub_element_id = fh.read(16) | |
if sub_element_id == SPHERICAL_UUID_ID: | |
if sub_element.contents: | |
contents = sub_element.contents[16:] | |
else: | |
contents = fh.read(sub_element.content_size - 16) | |
metadata.video[trackName] = \ | |
parse_spherical_xml(contents.decode("utf-8"), console) | |
if sub_element.name == mpeg.constants.TAG_MDIA: | |
for mdia_sub_element in sub_element.contents: | |
if mdia_sub_element.name != mpeg.constants.TAG_MINF: | |
continue | |
for stbl_elem in mdia_sub_element.contents: | |
if stbl_elem.name != mpeg.constants.TAG_STBL: | |
continue | |
for stsd_elem in stbl_elem.contents: | |
if stsd_elem.name != mpeg.constants.TAG_STSD: | |
continue | |
for sa3d_container_elem in stsd_elem.contents: | |
if sa3d_container_elem.name not in \ | |
mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS: | |
continue | |
metadata.num_audio_channels = \ | |
get_num_audio_channels(stsd_elem, fh) | |
for sa3d_elem in sa3d_container_elem.contents: | |
if sa3d_elem.name == mpeg.constants.TAG_SA3D: | |
sa3d_elem.print_box(console) | |
metadata.audio = sa3d_elem | |
return metadata | |
def parse_mpeg4(input_file, console): | |
with open(input_file, "rb") as in_fh: | |
mpeg4_file = mpeg.load(in_fh) | |
if mpeg4_file is None: | |
console("Error, file could not be opened.") | |
return | |
console("Loaded file...") | |
return parse_spherical_mpeg4(mpeg4_file, in_fh, console) | |
console("Error \"" + input_file + "\" does not exist or do not have " | |
"permission.") | |
def inject_mpeg4(input_file, output_file, metadata, console): | |
with open(input_file, "rb") as in_fh: | |
mpeg4_file = mpeg.load(in_fh) | |
if mpeg4_file is None: | |
console("Error file could not be opened.") | |
if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata.video): | |
console("Error failed to insert spherical data") | |
if metadata.audio: | |
if not mpeg4_add_audio_metadata( | |
mpeg4_file, in_fh, metadata.audio, console): | |
console("Error failed to insert spatial audio data") | |
console("Saved file settings") | |
parse_spherical_mpeg4(mpeg4_file, in_fh, console) | |
with open(output_file, "wb") as out_fh: | |
mpeg4_file.save(in_fh, out_fh) | |
return | |
console("Error file: \"" + input_file + "\" does not exist or do not have " | |
"permission.") | |
def parse_metadata(src, console): | |
infile = os.path.abspath(src) | |
try: | |
in_fh = open(infile, "rb") | |
in_fh.close() | |
except: | |
console("Error: " + infile + | |
" does not exist or we do not have permission") | |
console("Processing: " + infile) | |
extension = os.path.splitext(infile)[1].lower() | |
if extension in MPEG_FILE_EXTENSIONS: | |
return parse_mpeg4(infile, console) | |
console("Unknown file type") | |
return None | |
def inject_metadata(src, dest, metadata, console): | |
infile = os.path.abspath(src) | |
outfile = os.path.abspath(dest) | |
if infile == outfile: | |
return "Input and output cannot be the same" | |
try: | |
in_fh = open(infile, "rb") | |
in_fh.close() | |
except: | |
console("Error: " + infile + | |
" does not exist or we do not have permission") | |
return | |
console("Processing: " + infile) | |
extension = os.path.splitext(infile)[1].lower() | |
if (extension in MPEG_FILE_EXTENSIONS): | |
inject_mpeg4(infile, outfile, metadata, console) | |
return | |
console("Unknown file type") | |
def generate_spherical_xml(stereo=None, crop=None): | |
# Configure inject xml. | |
additional_xml = "" | |
if stereo == "top-bottom": | |
additional_xml += SPHERICAL_XML_CONTENTS_TOP_BOTTOM | |
if stereo == "left-right": | |
additional_xml += SPHERICAL_XML_CONTENTS_LEFT_RIGHT | |
if crop: | |
crop_match = re.match(crop_regex, crop) | |
if not crop_match: | |
print("Error: Invalid crop params: {crop}".format(crop=crop)) | |
return False | |
else: | |
cropped_width_pixels = int(crop_match.group(1)) | |
cropped_height_pixels = int(crop_match.group(2)) | |
full_width_pixels = int(crop_match.group(3)) | |
full_height_pixels = int(crop_match.group(4)) | |
cropped_offset_left_pixels = int(crop_match.group(5)) | |
cropped_offset_top_pixels = int(crop_match.group(6)) | |
# This should never happen based on the crop regex. | |
if full_width_pixels <= 0 or full_height_pixels <= 0: | |
print("Error with crop params: full pano dimensions are "\ | |
"invalid: width = {width} height = {height}".format( | |
width=full_width_pixels, | |
height=full_height_pixels)) | |
return False | |
if (cropped_width_pixels <= 0 or | |
cropped_height_pixels <= 0 or | |
cropped_width_pixels > full_width_pixels or | |
cropped_height_pixels > full_height_pixels): | |
print("Error with crop params: cropped area dimensions are "\ | |
"invalid: width = {width} height = {height}".format( | |
width=cropped_width_pixels, | |
height=cropped_height_pixels)) | |
return False | |
# We are pretty restrictive and don't allow anything strange. There | |
# could be use-cases for a horizontal offset that essentially | |
# translates the domain, but we don't support this (so that no | |
# extra work has to be done on the client). | |
total_width = cropped_offset_left_pixels + cropped_width_pixels | |
total_height = cropped_offset_top_pixels + cropped_height_pixels | |
if (cropped_offset_left_pixels < 0 or | |
cropped_offset_top_pixels < 0 or | |
total_width > full_width_pixels or | |
total_height > full_height_pixels): | |
print("Error with crop params: cropped area offsets are "\ | |
"invalid: left = {left} top = {top} "\ | |
"left+cropped width: {total_width} "\ | |
"top+cropped height: {total_height}".format( | |
left=cropped_offset_left_pixels, | |
top=cropped_offset_top_pixels, | |
total_width=total_width, | |
total_height=total_height)) | |
return False | |
additional_xml += SPHERICAL_XML_CONTENTS_CROP_FORMAT.format( | |
cropped_width_pixels, cropped_height_pixels, | |
full_width_pixels, full_height_pixels, | |
cropped_offset_left_pixels, cropped_offset_top_pixels) | |
spherical_xml = (SPHERICAL_XML_HEADER + | |
SPHERICAL_XML_CONTENTS + | |
additional_xml + | |
SPHERICAL_XML_FOOTER) | |
return spherical_xml | |
def get_descriptor_length(in_fh): | |
"""Derives the length of the MP4 elementary stream descriptor at the | |
current position in the input file. | |
""" | |
descriptor_length = 0 | |
for i in range(4): | |
size_byte = struct.unpack(">c", in_fh.read(1))[0] | |
descriptor_length = (descriptor_length << 7 | | |
ord(size_byte) & int("0x7f", 0)) | |
if (ord(size_byte) != int("0x80", 0)): | |
break | |
return descriptor_length | |
def get_expected_num_audio_channels( | |
ambisonics_type, ambisonics_order, head_locked_stereo): | |
""" Returns the expected number of ambisonic components for a given | |
ambisonic type and ambisonic order. | |
""" | |
head_locked_stereo_channels = 2 if head_locked_stereo == True else 0 | |
if (ambisonics_type == 'periphonic'): | |
return (((ambisonics_order + 1) * (ambisonics_order + 1)) + | |
head_locked_stereo_channels) | |
else: | |
return -1 | |
def get_num_audio_channels(stsd, in_fh): | |
if stsd.name != mpeg.constants.TAG_STSD: | |
print("get_num_audio_channels should be given a STSD box") | |
return -1 | |
for sample_description in stsd.contents: | |
if sample_description.name == mpeg.constants.TAG_MP4A: | |
return get_aac_num_channels(sample_description, in_fh) | |
elif sample_description.name in mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS: | |
return get_sample_description_num_channels(sample_description, in_fh) | |
return -1 | |
def get_sample_description_num_channels(sample_description, in_fh): | |
"""Reads the number of audio channels from a sound sample description. | |
""" | |
p = in_fh.tell() | |
in_fh.seek(sample_description.content_start() + 8) | |
version = struct.unpack(">h", in_fh.read(2))[0] | |
revision_level = struct.unpack(">h", in_fh.read(2))[0] | |
vendor = struct.unpack(">i", in_fh.read(4))[0] | |
if version == 0: | |
num_audio_channels = struct.unpack(">h", in_fh.read(2))[0] | |
sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0] | |
elif version == 1: | |
num_audio_channels = struct.unpack(">h", in_fh.read(2))[0] | |
sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0] | |
samples_per_packet = struct.unpack(">i", in_fh.read(4))[0] | |
bytes_per_packet = struct.unpack(">i", in_fh.read(4))[0] | |
bytes_per_frame = struct.unpack(">i", in_fh.read(4))[0] | |
bytes_per_sample = struct.unpack(">i", in_fh.read(4))[0] | |
elif version == 2: | |
always_3 = struct.unpack(">h", in_fh.read(2))[0] | |
always_16 = struct.unpack(">h", in_fh.read(2))[0] | |
always_minus_2 = struct.unpack(">h", in_fh.read(2))[0] | |
always_0 = struct.unpack(">h", in_fh.read(2))[0] | |
always_65536 = struct.unpack(">i", in_fh.read(4))[0] | |
size_of_struct_only = struct.unpack(">i", in_fh.read(4))[0] | |
audio_sample_rate = struct.unpack(">d", in_fh.read(8))[0] | |
num_audio_channels = struct.unpack(">i", in_fh.read(4))[0] | |
else: | |
print("Unsupported version for " + sample_description.name + " box") | |
return -1 | |
in_fh.seek(p) | |
return num_audio_channels | |
def get_aac_num_channels(box, in_fh): | |
"""Reads the number of audio channels from AAC's AudioSpecificConfig | |
descriptor within the esds child box of the input mp4a or wave box. | |
""" | |
p = in_fh.tell() | |
if box.name not in [mpeg.constants.TAG_MP4A, mpeg.constants.TAG_WAVE]: | |
return -1 | |
for element in box.contents: | |
if element.name == mpeg.constants.TAG_WAVE: | |
# Handle .mov with AAC audio, where the structure is: | |
# stsd -> mp4a -> wave -> esds | |
channel_configuration = get_aac_num_channels(element, in_fh) | |
break | |
if element.name != mpeg.constants.TAG_ESDS: | |
continue | |
in_fh.seek(element.content_start() + 4) | |
descriptor_tag = struct.unpack(">c", in_fh.read(1))[0] | |
# Verify the read descriptor is an elementary stream descriptor | |
if ord(descriptor_tag) != 3: # Not an MP4 elementary stream. | |
print("Error: failed to read elementary stream descriptor.") | |
return -1 | |
get_descriptor_length(in_fh) | |
in_fh.seek(3, 1) # Seek to the decoder configuration descriptor | |
config_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0] | |
# Verify the read descriptor is a decoder config. descriptor. | |
if ord(config_descriptor_tag) != 4: | |
print("Error: failed to read decoder config. descriptor.") | |
return -1 | |
get_descriptor_length(in_fh) | |
in_fh.seek(13, 1) # offset to the decoder specific config descriptor. | |
decoder_specific_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0] | |
# Verify the read descriptor is a decoder specific info descriptor | |
if ord(decoder_specific_descriptor_tag) != 5: | |
print("Error: failed to read MP4 audio decoder specific config.") | |
return -1 | |
audio_specific_descriptor_size = get_descriptor_length(in_fh) | |
assert audio_specific_descriptor_size >= 2 | |
decoder_descriptor = struct.unpack(">h", in_fh.read(2))[0] | |
object_type = (int("F800", 16) & decoder_descriptor) >> 11 | |
sampling_frequency_index = (int("0780", 16) & decoder_descriptor) >> 7 | |
if sampling_frequency_index == 0: | |
# TODO: If the sample rate is 96kHz an additional 24 bit offset | |
# value here specifies the actual sample rate. | |
print("Error: Greater than 48khz audio is currently not supported.") | |
return -1 | |
channel_configuration = (int("0078", 16) & decoder_descriptor) >> 3 | |
in_fh.seek(p) | |
return channel_configuration | |
def get_num_audio_tracks(mpeg4_file, in_fh): | |
""" Returns the number of audio track in the input mpeg4 file. """ | |
num_audio_tracks = 0 | |
for element in mpeg4_file.moov_box.contents: | |
if (element.name == mpeg.constants.TAG_TRAK): | |
for sub_element in element.contents: | |
if (sub_element.name != mpeg.constants.TAG_MDIA): | |
continue | |
for mdia_sub_element in sub_element.contents: | |
if (mdia_sub_element.name != mpeg.constants.TAG_HDLR): | |
continue | |
position = mdia_sub_element.content_start() + 8 | |
in_fh.seek(position) | |
if (in_fh.read(4) == mpeg.constants.TAG_SOUN): | |
num_audio_tracks += 1 | |
return num_audio_tracks | |
def get_spatial_audio_metadata(ambisonic_order, head_locked_stereo): | |
num_channels = get_expected_num_audio_channels( | |
"periphonic", ambisonic_order, head_locked_stereo) | |
metadata = { | |
"ambisonic_order": 0, | |
"head_locked_stereo": False, | |
"ambisonic_type": "periphonic", | |
"ambisonic_channel_ordering": "ACN", | |
"ambisonic_normalization": "SN3D", | |
"channel_map": [], | |
} | |
metadata['ambisonic_order'] = ambisonic_order | |
metadata['head_locked_stereo'] = head_locked_stereo | |
metadata['channel_map'] = range(0, num_channels) | |
return metadata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I played with the initial View settings with the help of your description. At this time, change in Metadata injected, has no (longer) effect on any of the players I tested with or the upload to YouTube. I remain questioning what the players use in the videofile to know the initial viewpoint or the start orientation, if it is not the Metadata?