-
-
Save juanuys/3d144bb01d9e3ce2519660b6a17cca52 to your computer and use it in GitHub Desktop.
download hls segments from m3u8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Http Live Streaming -- fetcher/downloader | |
# A simple script to download ts segments/m3u8 files from given url, including variant streams from master m3u8 | |
# require: python3 | |
import sys | |
import os | |
import urllib.request | |
TEST_URL = 'https://devimages.apple.com.edgekey.net/streaming/examples/bipbop_4x3/bipbop_4x3_variant.m3u8' | |
def isValidUrl(url: str) -> bool: | |
if url == '': | |
print('Invalid URL: empty url') | |
return False | |
elif not (url.startswith('http') or url.startswith('https')): | |
print('Invalid URL: require \'http/https\' url') | |
return False | |
elif os.path.splitext(url)[1].lower() != '.m3u8': | |
print('Invalid URL: not hls source') | |
return False | |
else: | |
return True | |
def readDataFromUrl(url: str) -> bytes: | |
with urllib.request.urlopen(url) as response: | |
data = response.read() | |
return data | |
def writeFile(path: str, filename: str, data: bytes) -> None: | |
fullPath = os.path.join(path, filename) | |
full_dir_name = os.path.dirname(fullPath) | |
# create directory first, if it doesn't exist | |
os.makedirs(full_dir_name, exist_ok=True) | |
with open(fullPath, 'wb') as file: | |
file.write(data) | |
return None | |
def parseM3U8(baseDir: str, baseUrl: str, data: bytes) -> None: | |
for line in data.splitlines(): | |
line = line.strip() | |
extension = os.path.splitext(line)[1] | |
if extension.lower() == b'.ts' or extension.lower() == b'.aac': | |
tsUrl = baseUrl + '/' + line.decode() | |
print('downloading ', tsUrl) | |
tsData = readDataFromUrl(tsUrl) | |
writeFile(baseDir, line.decode(), tsData) | |
elif extension.lower() == b'.m3u8': | |
simpleUrl = baseUrl + '/' + line.decode() | |
binDir = os.path.join(baseDir, simpleUrl.split('/')[-2]) | |
m3u8Name = os.path.basename(simpleUrl) | |
print('In master m3u8, processing ', simpleUrl) | |
if not os.path.exists(binDir): | |
try: | |
os.mkdir(binDir) | |
except Exception as e: | |
print(e, ' Create ', binDir, ' failed, exit.') | |
return | |
m3u8Data = readDataFromUrl(simpleUrl) | |
writeFile(binDir, m3u8Name, m3u8Data) | |
parseM3U8(binDir, os.path.dirname(simpleUrl), m3u8Data) | |
def fetchData(url: str) -> bool: | |
curPath = os.path.abspath(os.curdir) | |
baseUrl = os.path.dirname(url) | |
m3u8Name = os.path.basename(url) | |
binDir = os.path.join(curPath, url.split('/')[-2]) | |
print(curPath, baseUrl, binDir) | |
if not os.path.exists(binDir): | |
try: | |
os.mkdir(binDir) | |
except Exception as e: | |
print(e, ' Create ', binDir, ' failed, exit.') | |
return False | |
m3u8Data = readDataFromUrl(url) | |
writeFile(binDir, m3u8Name, m3u8Data) | |
parseM3U8(binDir, baseUrl, m3u8Data) | |
return True | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print('Invalid arguments: require 1 parameter, but you gave ', len(sys.argv) - 1) | |
exit(-1) | |
url = sys.argv[1] | |
#url = TEST_URL | |
if isValidUrl(url): | |
fetchData(url) | |
print('Done') | |
else: | |
exit(-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This still misses URIs in
EXT-X-MEDIA
tags, e.g.105560_audio_1_stereo_128000.m3u8
in