Skip to content

Instantly share code, notes, and snippets.

@juanuys
Forked from anonymouss/hlsFetcher.py
Created June 15, 2021 15:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save juanuys/3d144bb01d9e3ce2519660b6a17cca52 to your computer and use it in GitHub Desktop.
Save juanuys/3d144bb01d9e3ce2519660b6a17cca52 to your computer and use it in GitHub Desktop.
download hls segments from m3u8
#!/usr/bin/env python3
# Http Live Streaming -- fetcher/downloader
# A simple script to download ts segments/m3u8 files from given url, including variant streams from master m3u8
# require: python3
import sys
import os
import urllib.request
TEST_URL = 'https://devimages.apple.com.edgekey.net/streaming/examples/bipbop_4x3/bipbop_4x3_variant.m3u8'
def isValidUrl(url: str) -> bool:
if url == '':
print('Invalid URL: empty url')
return False
elif not (url.startswith('http') or url.startswith('https')):
print('Invalid URL: require \'http/https\' url')
return False
elif os.path.splitext(url)[1].lower() != '.m3u8':
print('Invalid URL: not hls source')
return False
else:
return True
def readDataFromUrl(url: str) -> bytes:
with urllib.request.urlopen(url) as response:
data = response.read()
return data
def writeFile(path: str, filename: str, data: bytes) -> None:
fullPath = os.path.join(path, filename)
full_dir_name = os.path.dirname(fullPath)
# create directory first, if it doesn't exist
os.makedirs(full_dir_name, exist_ok=True)
with open(fullPath, 'wb') as file:
file.write(data)
return None
def parseM3U8(baseDir: str, baseUrl: str, data: bytes) -> None:
for line in data.splitlines():
line = line.strip()
extension = os.path.splitext(line)[1]
if extension.lower() == b'.ts' or extension.lower() == b'.aac':
tsUrl = baseUrl + '/' + line.decode()
print('downloading ', tsUrl)
tsData = readDataFromUrl(tsUrl)
writeFile(baseDir, line.decode(), tsData)
elif extension.lower() == b'.m3u8':
simpleUrl = baseUrl + '/' + line.decode()
binDir = os.path.join(baseDir, simpleUrl.split('/')[-2])
m3u8Name = os.path.basename(simpleUrl)
print('In master m3u8, processing ', simpleUrl)
if not os.path.exists(binDir):
try:
os.mkdir(binDir)
except Exception as e:
print(e, ' Create ', binDir, ' failed, exit.')
return
m3u8Data = readDataFromUrl(simpleUrl)
writeFile(binDir, m3u8Name, m3u8Data)
parseM3U8(binDir, os.path.dirname(simpleUrl), m3u8Data)
def fetchData(url: str) -> bool:
curPath = os.path.abspath(os.curdir)
baseUrl = os.path.dirname(url)
m3u8Name = os.path.basename(url)
binDir = os.path.join(curPath, url.split('/')[-2])
print(curPath, baseUrl, binDir)
if not os.path.exists(binDir):
try:
os.mkdir(binDir)
except Exception as e:
print(e, ' Create ', binDir, ' failed, exit.')
return False
m3u8Data = readDataFromUrl(url)
writeFile(binDir, m3u8Name, m3u8Data)
parseM3U8(binDir, baseUrl, m3u8Data)
return True
if __name__ == "__main__":
if len(sys.argv) != 2:
print('Invalid arguments: require 1 parameter, but you gave ', len(sys.argv) - 1)
exit(-1)
url = sys.argv[1]
#url = TEST_URL
if isValidUrl(url):
fetchData(url)
print('Done')
else:
exit(-1)
@juanuys
Copy link
Author

juanuys commented Jun 17, 2021

This still misses URIs in EXT-X-MEDIA tags, e.g. 105560_audio_1_stereo_128000.m3u8 in

#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio_high",NAME="english stereo",LANGUAGE="en",AUTOSELECT=YES,URI="105560_audio_1_stereo_128000.m3u8"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment