Dumps apple HLS stream format - including alternative audio tracks and all subtitles
- m3u8
- ffmpy (and ffmpeg)
- pycountry
mov_text
format is used for webvtt conversion - thus output container should supportmov_text
(e.g.,mp4
)
import m3u8 | |
import sys | |
import ffmpy | |
import pycountry | |
def parse_main_m3u8(main_m3u8_url): | |
main_playlist = m3u8.load(main_m3u8_url) | |
video_playlist_uri = get_main_video_stream(main_playlist) | |
audio_playlist_media_list = get_audio_streams(main_playlist) | |
sub_playlist_media_list = get_sub_streams(main_playlist) | |
return video_playlist_uri, audio_playlist_media_list, sub_playlist_media_list | |
def get_main_video_stream(main_playlist): | |
bandwidth = -1 | |
for playlist in main_playlist.playlists: | |
if playlist.stream_info.bandwidth > bandwidth: | |
bandwidth = playlist.stream_info.bandwidth | |
for playlist in main_playlist.playlists: | |
if playlist.stream_info.bandwidth == bandwidth: | |
return playlist.uri | |
def get_audio_streams(main_playlist): | |
to_return = list() | |
for media in main_playlist.media: | |
if media.type == "AUDIO": | |
to_return.append(media) | |
return to_return | |
def get_sub_streams(main_playlist): | |
to_return = list() | |
for media in main_playlist.media: | |
if media.type == "SUBTITLES": | |
to_return.append(media) | |
return to_return | |
def main(): | |
if len(sys.argv) != 3: | |
print("Usage: ", sys.argv[0], " (stream file name) (output_file_name)") | |
exit(-1) | |
video_m3u8_url, audio_playlist_media_list, sub_playlist_media_list = parse_main_m3u8(sys.argv[1]) | |
metadata_string = "" | |
for idx, audio_media in enumerate(audio_playlist_media_list): | |
metadata_string = metadata_string + \ | |
" -metadata:s:a:" + str(idx) + ' name="' + audio_media.name + '"' + \ | |
" -metadata:s:a:" + str(idx) + ' language="' + \ | |
pycountry.languages.get(alpha_2=audio_media.language).alpha_3 + '"' | |
for idx, sub_media in enumerate(sub_playlist_media_list): | |
metadata_string = metadata_string + \ | |
" -metadata:s:s:" + str(idx) + ' name="' + sub_media.name + '"' +\ | |
" -metadata:s:s:" + str(idx) + ' language="' + \ | |
pycountry.languages.get(alpha_2=sub_media.language).alpha_3 + '"' | |
map_string = "" | |
# Hard coded video stream count | |
stream_count = 1 | |
for i in range(0, len(audio_playlist_media_list)): | |
stream_count += 1 | |
for i in range(0, len(sub_playlist_media_list)): | |
stream_count += 1 | |
for i in range(0, stream_count): | |
map_string = map_string + " -map " + str(i) | |
stream_input_dict = dict() | |
stream_input_dict[video_m3u8_url] = None | |
for audio_media in audio_playlist_media_list: | |
stream_input_dict[audio_media.uri] = None | |
for sub_media in sub_playlist_media_list: | |
stream_input_dict[sub_media.uri] = None | |
ffmpy.FFmpeg( | |
inputs=stream_input_dict, | |
outputs={sys.argv[2]: map_string + ' -c:a copy -c:v copy -c:s mov_text' + metadata_string} | |
).run() | |
if __name__ == '__main__': | |
main() |