Skip to content

Instantly share code, notes, and snippets.

@Plazik
Last active April 21, 2024 15:19
Show Gist options
  • Star 27 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Plazik/e950267cc580bcde371ea7879aaa3d5d to your computer and use it in GitHub Desktop.
Save Plazik/e950267cc580bcde371ea7879aaa3d5d to your computer and use it in GitHub Desktop.
Decode E-AC3 and TrueHD with Atmos to WAVs (up to 16 channels)
import argparse
import pathlib
import dataclasses
import subprocess
CHANNELS = {
'2.0': {
'id': 0,
'names': ['L', 'R'],
},
'3.1': {
'id': 3,
'names': ['L', 'R', 'C', 'LFE'],
},
'5.1': {
'id': 7,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs'],
},
'7.1': {
'id': 11,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs'],
},
'9.1': {
'id': 12,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw'],
},
'5.1.2': {
'id': 13,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Ltm', 'Rtm'],
},
'5.1.4': {
'id': 14,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Ltf', 'Rtf', 'Ltr', 'Rtr'],
},
'7.1.2': {
'id': 15,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Ltm', 'Rtm'],
},
'7.1.4': {
'id': 16,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Ltf', 'Rtf', 'Ltr', 'Rtr'],
},
'7.1.6': {
'id': 17,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Ltf', 'Rtf', 'Ltm', 'Rtm', 'Ltr', 'Rtr'],
},
'9.1.2': {
'id': 18,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw', 'Ltm', 'Rtm'],
},
'9.1.4': {
'id': 19,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw', 'Ltf', 'Rtf', 'Ltr', 'Rtr'],
},
'9.1.6': {
'id': 20,
'names': ['L', 'R', 'C', 'LFE', 'Ls', 'Rs', 'Lrs', 'Rrs', 'Lw', 'Rw', 'Ltf', 'Rtf', 'Ltm', 'Rtm', 'Ltr', 'Rtr'],
},
}
@dataclasses.dataclass
class Config:
gst_launch: pathlib.Path
channels: str
no_numbers: bool
single: bool
class AtmosDecode:
def __init__(self, config: Config):
if not config.gst_launch.is_file():
raise RuntimeError(f'Incorrect path to gst-launch-1.0.exe: {config.gst_launch.absolute()}')
self.config: Config = config
def decode(self, input_file: pathlib.Path, out_file: pathlib.Path | None = None):
if not input_file.is_file():
raise RuntimeError(f'Input file {input_file.absolute()} is not a file')
with input_file.open('rb') as f:
first_bytes = f.read(10)
eac3_sync_word = 0x0B77.to_bytes(2, 'big')
truehd_sync_word = 0xF8726FBA.to_bytes(4, 'big')
if first_bytes.startswith(eac3_sync_word):
command_fun = self.prepare_eac3_decode_command
elif truehd_sync_word in first_bytes:
command_fun = self.prepare_truehd_decode_command
else:
raise RuntimeError(f'Source file must be in E-AC3 or TrueHD format')
channel_layout = CHANNELS[self.config.channels]
out_channel_config_id, channel_names = channel_layout['id'], channel_layout['names']
processes = []
for channel_id, channel_name in enumerate(channel_names):
if self.config.no_numbers:
suffix = f'.{channel_name}.wav'
else:
suffix = f'.{str(channel_id + 1).zfill(2)}_{channel_name}.wav'
out_file_path = out_file.with_suffix(suffix) if out_file is not None else input_file.with_suffix(suffix)
command = command_fun(input_file, out_file_path, channel_id, out_channel_config_id)
if self.config.single:
print(f'Decoding "{out_file_path}"')
subprocess.run(command)
else:
processes.append(subprocess.Popen(command))
if not self.config.single:
for process in processes:
process.wait()
def prepare_eac3_decode_command(
self,
input_file: pathlib.Path,
out_file: pathlib.Path,
channel_id: int,
out_channel_config_id: int
) -> list[str]:
return [
str(self.config.gst_launch.absolute()),
'--gst-plugin-path', f'{self.config.gst_launch.parent.absolute()}/gst-plugins',
'filesrc', f'location={self._prepare_file_path(input_file)}', '!',
'dlbac3parse', '!',
'dlbaudiodecbin', f'out-ch-config={out_channel_config_id}', '!',
'deinterleave', 'name=d', f'd.src_{channel_id}', '!',
'wavenc', '!',
'filesink', f'location={self._prepare_file_path(out_file)}'
]
def prepare_truehd_decode_command(
self,
input_file: pathlib.Path,
out_file: pathlib.Path,
channel_id: int,
out_channel_config_id: int
) -> list[str]:
return [
str(self.config.gst_launch.absolute()),
'--gst-plugin-path', f'{self.config.gst_launch.parent.absolute()}/gst-plugins',
'filesrc', f'location={self._prepare_file_path(input_file)}', '!',
'dlbtruehdparse', 'align-major-sync=false', '!',
'dlbaudiodecbin', 'truehddec-presentation=16', f'out-ch-config={out_channel_config_id}', '!',
'deinterleave', 'name=d', f'd.src_{channel_id}', '!',
'wavenc', '!',
'filesink', f'location={self._prepare_file_path(out_file)}'
]
def _prepare_file_path(self, source: pathlib.Path) -> str:
return str(source.absolute()).replace('\\', '\\\\')
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-i', '--input',
help='Path to source file',
type=pathlib.Path,
required=True,
)
parser.add_argument(
'-o', '--output',
help='Path to output base file',
type=pathlib.Path,
)
parser.add_argument(
'--gst_launch',
help='Path to gst-launch file from Dolby Reference Player',
type=pathlib.Path,
default=pathlib.Path(r'C:\Program Files\Dolby\Dolby Reference Player\gst-launch-1.0.exe')
)
parser.add_argument(
'-c', '--channels',
help='Output channel configuration',
type=str,
default='9.1.6',
choices=CHANNELS.keys(),
)
parser.add_argument(
'-nn', '--no_numbers',
help='Do not use numbers in output channel names',
action='store_true',
)
parser.add_argument(
'-s', '--single',
help='Decode one channel at a time',
action='store_true',
)
args = parser.parse_args()
args_dataclass = Config(
gst_launch=args.gst_launch,
channels=args.channels,
no_numbers=args.no_numbers,
single=args.single,
)
decoder = AtmosDecode(args_dataclass)
decoder.decode(args.input, args.output)
if __name__ == '__main__':
try:
main()
except RuntimeError as e:
print(e)
@chris027
Copy link

I'd love to use this script if it put the channels into a single WAV file, in the right order obviously.
Is there a reason the channels are split into separate tracks?

@VoidXH
Copy link

VoidXH commented Jan 30, 2023

Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?

Open a feature request issue in VoidXH/Cavern. There is a WIP merger tool, it's in development for E-AC-3, but a WAV export could be added in a day.

@sw5163
Copy link

sw5163 commented Jan 31, 2023

There is no need for cavern, ffmpeg supports merging 16 wavs in one wav no problem. See https://trac.ffmpeg.org/wiki/AudioChannelManipulation

Thanks! I saw that and tried this command
ffmpeg -i "12_Channel_ID 1.wav" -i "12_Channel_ID 2.wav" -i "12_Channel_ID 3.wav" -i "12_Channel_ID 4.wav" -i "12_Channel_ID 5.wav" -i "12_Channel_ID 6.wav" -i "12_Channel_ID 7.wav" -i "12_Channel_ID 8.wav" -i "12_Channel_ID 9.wav" -i "12_Channel_ID 10.wav" -i "12_Channel_ID 11.wav" -i "12_Channel_ID 12.wav" -c pcm_s32le -filter_complex "join=inputs=12:channel_layout=7.1+TFL+TFR+TBL+TBR:map=0.0-FL|1.0-FR|2.0-FC|3.0-LFE|4.0-BL|5.0-BR|6.0-SL|7.0-SR|8.0-TFL|9.0-TFR|10.0-TBL|11.0-TBR" 7.1.4.wav
End up with this 7.1.4.wav, but I have no idea if it's correct.

The source file is the 8_Channel_ID.wav from https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html, I modified it.
If above command can somehow simplify, please let me know, much appreciate!

@sw5163
Copy link

sw5163 commented Feb 1, 2023

Use -c pcm_s24le since source is 24 bit integer.

Thanks!

@hockinsk
Copy link

hockinsk commented Jun 11, 2023

Can you create a plug-in to create a two channel WAV by applying headphone virtualization from EAC3 and TrueHD with Atmos? The reason I want this is because about Dolby AC4, it is already implemented in the CLI environment of the DRP, but the EAC3 and TrueHD bitstream are not implemented.

Headphone and speaker virtualization requires the binaural metadata which isn't included in the eac3. You can decode the ac4-ims to .wav in Reference Player already.

@VoidXH
Copy link

VoidXH commented Jun 11, 2023

Headphone and speaker virtualization requires the binaural metadata which isn't included in the eac3.

There is no such thing as binaural metadata, there are just objects/channels with movement. Virtualizing just requires any channel-based render (same as for speakers) convolved with the corresponding HRIRs, this is how everyone's doing it, otherwise the performance would be unusable. Cavern can do the rest if the channels are either in correct order in a 7.1.4 WAV file (L, R, C, LFE, RL, RR, SL, SR, TFL, TFR, TRL, TRR) (top sides are not in that standard), or just annotated in any of the available channel masks. E-AC-3 is supported completely from reading to binaural export, but TrueHD is a closed standard only DRP can read as of now.

@hockinsk
Copy link

hockinsk commented Jun 11, 2023

My poor explanation. I was really meaning the binaural metadata set in the ADM that is used to binauralise the off,near,mid or far binaural room per object ends up encoded into AC4-IMS and not EAC3-JOC, so there's no per-object binaural 'room' because EAC3-JOC is really a speaker playback format (even though Apple do binauralise it to Apple Spatial).

@VoidXH
Copy link

VoidXH commented Jun 11, 2023

binauralise the off,near,mid or far

Atmos has no such thing. It's rendered in a balance-based way, which has perfect near- and mid-field object perception for a single far-field channel-based render, which is similar to what Dolby's own renderer does. Putting a basic HRTF on that render results in the correct wavefield, without synthesis. Also, research shows that raw human distance perception dies off after about 3-4 meters, further sounds are only felt based on volume-to-distance experience.

AC4-IMS

This is a common Dolby practice, it's a buzzword. It's literally basic pre-rendered ambisonics as a separate substream. You can just rip it out from the bitstream without rendering. You could create the same exact stream by putting the original mix for speakers through Atmos for Headphones, or any other virtualizer for that matter.

@hockinsk
Copy link

I would have to see that, because off.near,mid and far is definitely set per object when mixing Dolby Atmos and its metadata is stored per object and bed in the ADM and is only observed in AC4-IMS playback in my testing. Dolby state AC4-IMS bypasses spatial coding, it's a per-object render, eac3-joc is a dowmix to 5.1 containing all object and bed audio but with JOC to rebuild the 16 spatial groups/object collections.

@VoidXH
Copy link

VoidXH commented Jun 11, 2023

I would have to see that, because off.near,mid and far is definitely set per object when mixing Dolby Atmos and its metadata is stored per object and bed in the ADM and is only observed in AC4-IMS playback in my testing.

None of the bitstream documentations of either format (E-AC-3 base, E-AC-3 JOC, AC-4 base, AC-4 extensions) mention any field metadata, this will be translated to a modified object placement. Current encoders only use object placement, not even scaling (it only exists in the DAW/ADM, even if the codecs have the field for it - it turned out to be a waste of space and resources), everything else is already baked in the resulting objects.

Dolby state AC4-IMS bypasses spatial coding, it's a per-object render

It's a complete bake once the file is assembled. But yes, if it's present, it's bypassing the object renderer.

eac3-joc is a dowmix to 5.1 containing all object and bed audio but with JOC to rebuild the 16 spatial groups/object collections
AC-4 is exactly the same:

  • E-AC-3 Atmos = AC-3 5.1 downmix + channel extension substream (7.1, 5.1.2 front in the industry, but my encoder can do 9.1.6, which is the codec limit) + JOC up to 63 objects (LFE is bypassed - but could be done for any channel, the current 15 object limit is to fit in ARC's bitrate limit)
  • AC-4 Atmos = Same 5.1 downmix + same extension substream + A-JOC (Advanced JOC) + optional ambisonic downmix stream (which can be hacked into E-AC-3 too, just needs a special decoder, which, for a lack of interest, only exists in the standard)

@hockinsk
Copy link

hockinsk commented Jun 12, 2023

I understand at the consumer side that's the case, but bluewidy was talking about headphone virtualization aka Dolby Atmos Reference Player Speaker Virtualization to 2.0 .wav. This doesn't exist for eac3-joc only ac4-ims in DRP CLI. I think we're talking from the same understanding, I'm just looking at it from the ADM encoding onwards whereas you're looking at it from the consumer end backwards. All good, although an AC4-IMS player within Cavern would be pretty cool nonetheless.

@bluewidy
Copy link

I just wanted to fuse the Dolby Access features into the Dolby Reference Player. That's all.

@hockinsk
Copy link

hockinsk commented Jun 12, 2023

That's why I was mentioning AC4-IMS because Dolby Reference Player has no headphone virtualization for eac3 ony ac4. Dolby Access is a bit of an anomoly really in placing hrtf onto eac3-joc as a channel-based process. ac4-ims bakes in the near/mid/far/off binaural for each object and bed individually with no speakers/channels assumed, just the objects positions in the room it seems.

@VoidXH
Copy link

VoidXH commented Jun 12, 2023

This doesn't exist for eac3-joc only ac4-ims in DRP CLI

Yes, it needs some hacking, but a very easy one.

All good, although an AC4-IMS player within Cavern would be pretty cool nonetheless.

Once it's actually used in the industry, I'll start implementing AC-4. There's currently no need for it because of the lack of content.

@bluewidy
Copy link

Dolby Reference Player has no headphone virtualization for eac3 ony ac4

This is why I wrote a request for a new script.

@hockinsk
Copy link

You can get the ac4-ims version using tidal-dl gui with an Anroid token from a device with Atmos feature, but I agree it's only a format Android users can hear. For headphone playback it's much closer to the binaural render from Dolby Atmos Renderer than eac3-joc binauralized is imo.

@bluewidy
Copy link

bluewidy commented Jun 12, 2023

None of the bitstream documentations of either format (E-AC-3 base, E-AC-3 JOC, AC-4 base, AC-4 extensions) mention any field metadata

https://dolbyprofessionalsupport.zendesk.com/hc/en-us/articles/4406297948180-Module-7-4-Binaural-Render-Mode-
The documentation you referenced probably doesn't, but the one I referenced mentions the use of field metadata in the AC4-IMS encoding process.

https://professionalsupport.dolby.com/s/question/0D54u00007k0lOeCAI/ac4ims-a-2channel-binaural-audio-signal-without-metadata-without-any-rendering-during-playback?language=en_US
And this was also mentioned in Dolby Labs.

@VoidXH
Copy link

VoidXH commented Jun 12, 2023

use of field metadata in the AC4-IMS encoding process.

Yes, they create the mix there, and nowhere else. The file will not have this metadata (or any for that matter), just a final rendered mix. For an E-AC-3 mix, near-field just means an object that's closer to the center and is handled by the renderer correctly.

@bluewidy
Copy link

bluewidy commented Jun 12, 2023

The file will not have this metadata (or any for that matter), just a final rendered mix.

Then it would be impossible to apply PHRTF to AC4-IMS? Currently Dolby Access interacts with EAC3-JOC, and the mechanism allowing PHRTF is probably due to the object metadata contained in the bitstream, right? On the other hand, if AC4-IMS is just a final mix with no metadata, applying PHRTF would be impossible, Isn't it?

@VoidXH
Copy link

VoidXH commented Jun 12, 2023

Then it would be impossible to apply PHRTF to AC4-IMS?

No, ambisonics can be transformed freely, just look at the 360 degree videos on YouTube. They also use this technology to further transform an already existing stereo-encoded soundspace with no metadata at all, just a regular AAC file.

@ThreeDeeJay
Copy link

They also use this technology to further transform an already existing stereo-encoded soundspace with no metadata at all, just a regular AAC file.

I didn't know about this. I thought YouTube only spatialized ambisonics and surround uploads.
Unless I'm missing something, stereo content like AC4-IMS is usually not spatialized by youtube. But do you have any examples of what you mean in that case or how the tech works?
like did they introduce some sort of stereo virtualization, upmixer and virtualizer or AI sound splitting to spatialize them as objects?
That'd be really interesting. I know at one point youtube even offered an option to convert video uploads to 3D, so it'd be cool if they did something like that for sound now that spatial audio is picking up some traction 🤔

@VoidXH
Copy link

VoidXH commented Jun 12, 2023

I didn't know about this. I thought YouTube only spatialized ambisonics and surround uploads.

I was talking about ambisonics, but it can be transferred to any HRTF set or rotated, which they do when you rotate a 360 video.

Unless I'm missing something, stereo content like AC4-IMS is usually not spatialized by youtube.

If it's the only track of a 360 video, it will.

But do you have any examples of what you mean in that case or how the tech works?

https://youtu.be/bb5eETSspVI

@hockinsk
Copy link

The file will not have this metadata (or any for that matter), just a final rendered mix.

Then it would be impossible to apply PHRTF to AC4-IMS? Currently Dolby Access interacts with EAC3-JOC, and the mechanism allowing PHRTF is probably due to the object metadata contained in the bitstream, right? On the other hand, if AC4-IMS is just a final mix with no metadata, applying PHRTF would be impossible, Isn't it?

I believe the AC4-IMS is already encoded with the near mid and far room reflection per object and the hrtf too. (Iits like the lossy form of what you hear from Dolby Atmos Renderer binaural render output in headphones). When you play back test files you hear the metadata near, mid and far is already in the bit stream I assume?

@bluewidy
Copy link

right. This is why I think it is impossible to apply PHRTF to AC4-IMS. In the case of DD+JOC, binaural rendering is done in real time by Dolby access, so PHRTF can be incorporated, but AC4-IMS is already a binaural rendered finished audio stream, so PHRTF will not be able to interfere.

@hockinsk
Copy link

I always wondered if that's why Apple chose to binauralise EAC3-JOC to Apple Spatial rather than AC4-IMS was for headtracking, but I assume it's because the licence is cheaper.

@VoidXH
Copy link

VoidXH commented Jun 12, 2023

It's free as it's an open standard. But it's also perfectly doable with IMS, like what I just showed before, even YouTube did it.

@eviluess
Copy link

eviluess commented Aug 3, 2023

Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?
something like:
ffmpeg -i a0-ec3-L.wav -i a0-ec3-R.wav -i a0-ec3-C.wav -i a0-ec3-LFE.wav -i a0-ec3-Ls.wav -i a0-ec3-Rs.wav -i a0-ec3-Lrs.wav -i a0-ec3-Rrs.wav -i a0-ec3-Lw.wav -i a0-ec3-Rw.wav -i a0-ec3-Ltf.wav -i a0-ec3-Rtf.wav -i a0-ec3-Ltm.wav -i a0-ec3-Rtm.wav -i a0-ec3-Ltr.wav -i a0-ec3-Rtr.wav filterpar="[0:a][1:a][2:a][3:a][4:a][5:a][6:a][7:a][8:a][9:a][10:a][11:a][12:a][13:a][14:a][15:a]amerge=inputs=16[a]" -map "[a]" out.wav

@sw5163
Copy link

sw5163 commented Sep 7, 2023

Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?
something like:
ffmpeg -i a0-ec3-L.wav -i a0-ec3-R.wav -i a0-ec3-C.wav -i a0-ec3-LFE.wav -i a0-ec3-Ls.wav -i a0-ec3-Rs.wav -i a0-ec3-Lrs.wav -i a0-ec3-Rrs.wav -i a0-ec3-Lw.wav -i a0-ec3-Rw.wav -i a0-ec3-Ltf.wav -i a0-ec3-Rtf.wav -i a0-ec3-Ltm.wav -i a0-ec3-Rtm.wav -i a0-ec3-Ltr.wav -i a0-ec3-Rtr.wav filterpar="[0:a][1:a][2:a][3:a][4:a][5:a][6:a][7:a][8:a][9:a][10:a][11:a][12:a][13:a][14:a][15:a]amerge=inputs=16[a]" -map "[a]" out.wav

Thanks!

@Sashakg
Copy link

Sashakg commented Jan 8, 2024

Plazik, , you can add to the script the ability to set the “ac4dec-out-ref-level: Set output reference level” parameter as well as channels?

@hockinsk
Copy link

hockinsk commented Jan 8, 2024

Is there any script I can somehow merge those 16 wav files into a single 9.1.6 PCM?
something like:
ffmpeg -i a0-ec3-L.wav -i a0-ec3-R.wav -i a0-ec3-C.wav -i a0-ec3-LFE.wav -i a0-ec3-Ls.wav -i a0-ec3-Rs.wav -i a0-ec3-Lrs.wav -i a0-ec3-Rrs.wav -i a0-ec3-Lw.wav -i a0-ec3-Rw.wav -i a0-ec3-Ltf.wav -i a0-ec3-Rtf.wav -i a0-ec3-Ltm.wav -i a0-ec3-Rtm.wav -i a0-ec3-Ltr.wav -i a0-ec3-Rtr.wav filterpar="[0:a][1:a][2:a][3:a][4:a][5:a][6:a][7:a][8:a][9:a][10:a][11:a][12:a][13:a][14:a][15:a]amerge=inputs=16[a]" -map "[a]" out.wav

Thanks!

Music Media Helper 7 now does what this script does and has options to output multi-mono or multichannel.
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment