-
-
Save Plazik/e950267cc580bcde371ea7879aaa3d5d to your computer and use it in GitHub Desktop.
import argparse | |
import pathlib | |
import dataclasses | |
import subprocess | |
CHANNELS = { | |
'2.0': { | |
'id': 0, | |
'names': ['L', 'R'], | |
}, | |
'3.1': { | |
'id': 3, | |
'names': ['L', 'R', 'C', 'LFE'], | |
}, | |
'5.1': { | |
'id': 7, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs'], | |
}, | |
'7.1': { | |
'id': 11, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs'], | |
}, | |
'9.1': { | |
'id': 12, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw'], | |
}, | |
'5.1.2': { | |
'id': 13, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Ltm', 'Rtm'], | |
}, | |
'5.1.4': { | |
'id': 14, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Ltf', 'Rtf', 'Ltr', 'Rtr'], | |
}, | |
'7.1.2': { | |
'id': 15, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Ltm', 'Rtm'], | |
}, | |
'7.1.4': { | |
'id': 16, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Ltf', 'Rtf', 'Ltr', 'Rtr'], | |
}, | |
'7.1.6': { | |
'id': 17, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Ltf', 'Rtf', 'Ltm', 'Rtm', 'Ltr', 'Rtr'], | |
}, | |
'9.1.2': { | |
'id': 18, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw', 'Ltm', 'Rtm'], | |
}, | |
'9.1.4': { | |
'id': 19, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw', 'Ltf', 'Rtf', 'Ltr', 'Rtr'], | |
}, | |
'9.1.6': { | |
'id': 20, | |
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw', 'Ltf', 'Rtf', 'Ltm', 'Rtm', 'Ltr', 'Rtr'], | |
}, | |
} | |
@dataclasses.dataclass | |
class Config: | |
gst_launch: pathlib.Path | |
channels: str | |
no_numbers: bool | |
single: bool | |
class AtmosDecode: | |
def __init__(self, config: Config): | |
if not config.gst_launch.is_file(): | |
raise RuntimeError(f'Incorrect path to gst-launch-1.0.exe: {config.gst_launch.absolute()}') | |
self.config: Config = config | |
def decode(self, input_file: pathlib.Path, out_file: pathlib.Path | None = None): | |
if not input_file.is_file(): | |
raise RuntimeError(f'Input file {input_file.absolute()} is not a file') | |
with input_file.open('rb') as f: | |
first_bytes = f.read(10) | |
eac3_sync_word = 0x0B77.to_bytes(2, 'big') | |
truehd_sync_word = 0xF8726FBA.to_bytes(4, 'big') | |
if first_bytes.startswith(eac3_sync_word): | |
command_fun = self.prepare_eac3_decode_command | |
elif truehd_sync_word in first_bytes: | |
command_fun = self.prepare_truehd_decode_command | |
else: | |
raise RuntimeError(f'Source file must be in E-AC3 or TrueHD format') | |
channel_layout = CHANNELS[self.config.channels] | |
out_channel_config_id, channel_names = channel_layout['id'], channel_layout['names'] | |
processes = [] | |
for channel_id, channel_name in enumerate(channel_names): | |
if self.config.no_numbers: | |
suffix = f'.{channel_name}.wav' | |
else: | |
suffix = f'.{str(channel_id + 1).zfill(2)}_{channel_name}.wav' | |
out_file_path = out_file.with_suffix(suffix) if out_file is not None else input_file.with_suffix(suffix) | |
command = command_fun(input_file, out_file_path, channel_id, out_channel_config_id) | |
if self.config.single: | |
print(f'Decoding "{out_file_path}"') | |
subprocess.run(command) | |
else: | |
processes.append(subprocess.Popen(command)) | |
if not self.config.single: | |
for process in processes: | |
process.wait() | |
def prepare_eac3_decode_command( | |
self, | |
input_file: pathlib.Path, | |
out_file: pathlib.Path, | |
channel_id: int, | |
out_channel_config_id: int | |
) -> list[str]: | |
return [ | |
str(self.config.gst_launch.absolute()), | |
'--gst-plugin-path', f'{self.config.gst_launch.parent.absolute()}/gst-plugins', | |
'filesrc', f'location={self._prepare_file_path(input_file)}', '!', | |
'dlbac3parse', '!', | |
'dlbaudiodecbin', f'out-ch-config={out_channel_config_id}', '!', | |
'deinterleave', 'name=d', f'd.src_{channel_id}', '!', | |
'wavenc', '!', | |
'filesink', f'location={self._prepare_file_path(out_file)}' | |
] | |
def prepare_truehd_decode_command( | |
self, | |
input_file: pathlib.Path, | |
out_file: pathlib.Path, | |
channel_id: int, | |
out_channel_config_id: int | |
) -> list[str]: | |
return [ | |
str(self.config.gst_launch.absolute()), | |
'--gst-plugin-path', f'{self.config.gst_launch.parent.absolute()}/gst-plugins', | |
'filesrc', f'location={self._prepare_file_path(input_file)}', '!', | |
'dlbtruehdparse', 'align-major-sync=false', '!', | |
'dlbaudiodecbin', 'truehddec-presentation=16', f'out-ch-config={out_channel_config_id}', '!', | |
'deinterleave', 'name=d', f'd.src_{channel_id}', '!', | |
'wavenc', '!', | |
'filesink', f'location={self._prepare_file_path(out_file)}' | |
] | |
def _prepare_file_path(self, source: pathlib.Path) -> str: | |
return str(source.absolute()).replace('\\', '\\\\') | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'-i', '--input', | |
help='Path to source file', | |
type=pathlib.Path, | |
required=True, | |
) | |
parser.add_argument( | |
'-o', '--output', | |
help='Path to output base file', | |
type=pathlib.Path, | |
) | |
parser.add_argument( | |
'--gst_launch', | |
help='Path to gst-launch file from Dolby Reference Player', | |
type=pathlib.Path, | |
default=pathlib.Path(r'C:\Program Files\Dolby\Dolby Reference Player\gst-launch-1.0.exe') | |
) | |
parser.add_argument( | |
'-c', '--channels', | |
help='Output channel configuration', | |
type=str, | |
default='9.1.6', | |
choices=CHANNELS.keys(), | |
) | |
parser.add_argument( | |
'-nn', '--no_numbers', | |
help='Do not use numbers in output channel names', | |
action='store_true', | |
) | |
parser.add_argument( | |
'-s', '--single', | |
help='Decode one channel at a time', | |
action='store_true', | |
) | |
args = parser.parse_args() | |
args_dataclass = Config( | |
gst_launch=args.gst_launch, | |
channels=args.channels, | |
no_numbers=args.no_numbers, | |
single=args.single, | |
) | |
decoder = AtmosDecode(args_dataclass) | |
decoder.decode(args.input, args.output) | |
if __name__ == '__main__': | |
try: | |
main() | |
except RuntimeError as e: | |
print(e) |
None of the bitstream documentations of either format (E-AC-3 base, E-AC-3 JOC, AC-4 base, AC-4 extensions) mention any field metadata
https://dolbyprofessionalsupport.zendesk.com/hc/en-us/articles/4406297948180-Module-7-4-Binaural-Render-Mode-
The documentation you referenced probably doesn't, but the one I referenced mentions the use of field metadata in the AC4-IMS encoding process.
https://professionalsupport.dolby.com/s/question/0D54u00007k0lOeCAI/ac4ims-a-2channel-binaural-audio-signal-without-metadata-without-any-rendering-during-playback?language=en_US
And this was also mentioned in Dolby Labs.
use of field metadata in the AC4-IMS encoding process.
Yes, they create the mix there, and nowhere else. The file will not have this metadata (or any for that matter), just a final rendered mix. For an E-AC-3 mix, near-field just means an object that's closer to the center and is handled by the renderer correctly.
The file will not have this metadata (or any for that matter), just a final rendered mix.
Then it would be impossible to apply PHRTF to AC4-IMS? Currently Dolby Access interacts with EAC3-JOC, and the mechanism allowing PHRTF is probably due to the object metadata contained in the bitstream, right? On the other hand, if AC4-IMS is just a final mix with no metadata, applying PHRTF would be impossible, Isn't it?
Then it would be impossible to apply PHRTF to AC4-IMS?
No, ambisonics can be transformed freely, just look at the 360 degree videos on YouTube. They also use this technology to further transform an already existing stereo-encoded soundspace with no metadata at all, just a regular AAC file.
They also use this technology to further transform an already existing stereo-encoded soundspace with no metadata at all, just a regular AAC file.
I didn't know about this. I thought YouTube only spatialized ambisonics and surround uploads.
Unless I'm missing something, stereo content like AC4-IMS is usually not spatialized by youtube. But do you have any examples of what you mean in that case or how the tech works?
like did they introduce some sort of stereo virtualization, upmixer and virtualizer or AI sound splitting to spatialize them as objects?
That'd be really interesting. I know at one point youtube even offered an option to convert video uploads to 3D, so it'd be cool if they did something like that for sound now that spatial audio is picking up some traction 🤔
I didn't know about this. I thought YouTube only spatialized ambisonics and surround uploads.
I was talking about ambisonics, but it can be transferred to any HRTF set or rotated, which they do when you rotate a 360 video.
Unless I'm missing something, stereo content like AC4-IMS is usually not spatialized by youtube.
If it's the only track of a 360 video, it will.
But do you have any examples of what you mean in that case or how the tech works?
The file will not have this metadata (or any for that matter), just a final rendered mix.
Then it would be impossible to apply PHRTF to AC4-IMS? Currently Dolby Access interacts with EAC3-JOC, and the mechanism allowing PHRTF is probably due to the object metadata contained in the bitstream, right? On the other hand, if AC4-IMS is just a final mix with no metadata, applying PHRTF would be impossible, Isn't it?
I believe the AC4-IMS is already encoded with the near mid and far room reflection per object and the hrtf too. (Iits like the lossy form of what you hear from Dolby Atmos Renderer binaural render output in headphones). When you play back test files you hear the metadata near, mid and far is already in the bit stream I assume?
right. This is why I think it is impossible to apply PHRTF to AC4-IMS. In the case of DD+JOC, binaural rendering is done in real time by Dolby access, so PHRTF can be incorporated, but AC4-IMS is already a binaural rendered finished audio stream, so PHRTF will not be able to interfere.
I always wondered if that's why Apple chose to binauralise EAC3-JOC to Apple Spatial rather than AC4-IMS was for headtracking, but I assume it's because the licence is cheaper.
It's free as it's an open standard. But it's also perfectly doable with IMS, like what I just showed before, even YouTube did it.
Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?
something like:
ffmpeg -i a0-ec3-L.wav -i a0-ec3-R.wav -i a0-ec3-C.wav -i a0-ec3-LFE.wav -i a0-ec3-Ls.wav -i a0-ec3-Rs.wav -i a0-ec3-Lrs.wav -i a0-ec3-Rrs.wav -i a0-ec3-Lw.wav -i a0-ec3-Rw.wav -i a0-ec3-Ltf.wav -i a0-ec3-Rtf.wav -i a0-ec3-Ltm.wav -i a0-ec3-Rtm.wav -i a0-ec3-Ltr.wav -i a0-ec3-Rtr.wav filterpar="[0:a][1:a][2:a][3:a][4:a][5:a][6:a][7:a][8:a][9:a][10:a][11:a][12:a][13:a][14:a][15:a]amerge=inputs=16[a]" -map "[a]" out.wav
Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?
something like:
ffmpeg -i a0-ec3-L.wav -i a0-ec3-R.wav -i a0-ec3-C.wav -i a0-ec3-LFE.wav -i a0-ec3-Ls.wav -i a0-ec3-Rs.wav -i a0-ec3-Lrs.wav -i a0-ec3-Rrs.wav -i a0-ec3-Lw.wav -i a0-ec3-Rw.wav -i a0-ec3-Ltf.wav -i a0-ec3-Rtf.wav -i a0-ec3-Ltm.wav -i a0-ec3-Rtm.wav -i a0-ec3-Ltr.wav -i a0-ec3-Rtr.wav filterpar="[0:a][1:a][2:a][3:a][4:a][5:a][6:a][7:a][8:a][9:a][10:a][11:a][12:a][13:a][14:a][15:a]amerge=inputs=16[a]" -map "[a]" out.wav
Thanks!
Plazik, , you can add to the script the ability to set the “ac4dec-out-ref-level: Set output reference level” parameter as well as channels?
Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?
something like:
ffmpeg -i a0-ec3-L.wav -i a0-ec3-R.wav -i a0-ec3-C.wav -i a0-ec3-LFE.wav -i a0-ec3-Ls.wav -i a0-ec3-Rs.wav -i a0-ec3-Lrs.wav -i a0-ec3-Rrs.wav -i a0-ec3-Lw.wav -i a0-ec3-Rw.wav -i a0-ec3-Ltf.wav -i a0-ec3-Rtf.wav -i a0-ec3-Ltm.wav -i a0-ec3-Rtm.wav -i a0-ec3-Ltr.wav -i a0-ec3-Rtr.wav filterpar="[0:a][1:a][2:a][3:a][4:a][5:a][6:a][7:a][8:a][9:a][10:a][11:a][12:a][13:a][14:a][15:a]amerge=inputs=16[a]" -map "[a]" out.wavThanks!
Music Media Helper 7 now does what this script does and has options to output multi-mono or multichannel.
You can get the ac4-ims version using tidal-dl gui with an Anroid token from a device with Atmos feature, but I agree it's only a format Android users can hear. For headphone playback it's much closer to the binaural render from Dolby Atmos Renderer than eac3-joc binauralized is imo.