Last active
January 31, 2016 16:22
-
-
Save saxicek/753008560a978a052e3e to your computer and use it in GitHub Desktop.
iVysilani RTMP -> HLS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# částečně inspirováno ctsream od petr_p | |
__author__ = "Jakub Lužný" | |
__desc__ = "ČT (iVysílání)" | |
__url__ = r"https?://www\.ceskatelevize\.cz/(porady|ivysilani)/.+" | |
import re,os.path, urllib.request, urllib.parse, json, http.cookiejar, logging | |
import xml.etree.ElementTree as ElementTree | |
from urllib.parse import urlparse, unquote | |
log = logging.getLogger() | |
urlopen = urllib.request.urlopen | |
def flatten(obj, prefix = ''): | |
out = [] | |
# print(prefix) | |
if type(obj) == dict: | |
for key in obj: | |
out+= flatten(obj[key], prefix+"[{}]".format(key) ) | |
elif type(obj) == list: | |
for i in range(0, len(obj)): | |
out+= flatten(obj[i], prefix+'[{}]'.format(i) ) | |
else: | |
out.append( (prefix, obj) ) | |
return out | |
def srt_time(time): | |
time = int(time) | |
sec = time / 1000 | |
msec = time % 1000 | |
hour = sec / 3600 | |
sec = sec % 3600 | |
min = sec / 60 | |
sec = sec % 60 | |
return "{:02}:{:02}:{:02},{:03}".format(int(hour), int(min), int(sec), msec) | |
def txt_to_srt(txt): | |
subs = re.findall('\s*(\d+); (\d+) (\d+)\n(.+?)\n\n', txt, re.DOTALL) | |
srt = '' | |
for s in subs: | |
srt += "{}\n{} --> {}\n{}\n\n".format(s[0], srt_time(s[1]), srt_time(s[2]), s[3] ) | |
return srt | |
class CtEngine: | |
def __init__(self, url): | |
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | |
self.jar = http.cookiejar.CookieJar() | |
self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar)) | |
self.opener.addheaders = [ | |
('User-agent', 'Mozilla/5.0'), | |
('x-addr', '127.0.0.1'), | |
('Referer', url) | |
] | |
urllib.request.install_opener(self.opener) | |
self.b_page = urlopen(url).read() # .decode('utf-8') | |
#get playlist URL first | |
data = re.findall(b"getPlaylistUrl\((.+?]), request", self.b_page)[0] | |
data = data.decode('utf-8') | |
data = json.loads(data) | |
data = data[0] | |
data = { | |
'playlist[0][type]' : data['type'], | |
'playlist[0][id]' : data['id'], | |
'requestUrl' : urlparse(url).path, | |
'requestSource' : 'iVysilani' | |
} | |
data = urllib.parse.urlencode( data, 'utf-8') | |
header = { | |
"Content-type": "application/x-www-form-urlencoded" | |
} | |
req = urllib.request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', bytes(data, 'utf-8'), header ) | |
data = json.loads(urlopen(req).read().decode('utf-8')) | |
#----------------- | |
#print('====> Playlist URL: ' + data['url']) | |
urlobj = urlparse(data['url']) | |
urlhost = urlobj.netloc | |
urlpath = urllib.parse.urlunparse(('', '', urlobj.path, urlobj.params, urlobj.query, urlobj.fragment)) | |
#print('===> Playlist Host= ' + urlhost + ' Path=' + urlpath) | |
### Get Playlists | |
conn = http.client.HTTPConnection(urlhost) | |
headers = { | |
'Connection': 'keep-alive', | |
#'Referer': 'http://imgct.ceskatelevize.cz/global/swf/player/player.swf?version=1.45.15a', | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36', | |
'Accept-Encoding': 'identity', | |
#'Accept-Encoding': 'gzip,deflate,sdch', | |
'Accept-Language': 'en-US,en;q=0.8,cs;q=0.6' | |
} | |
conn.request('GET', urlpath, '', headers) | |
res = conn.getresponse() | |
httpdata = res.read() | |
conn.close() | |
#print '====> PLAYLIST PAGE START' | |
#print httpdata | |
#print '====> PLAYLIST PAGE END' | |
### Read client config | |
jsondata = json.loads(httpdata.decode()) | |
m3u_url = jsondata["playlist"][0]["streamUrls"]["main"] | |
#print('====> M3U URL: ' + m3u_url) | |
urlobj = urlparse(m3u_url) | |
urlhost = urlobj.netloc | |
urlpath = urllib.parse.urlunparse(('', '', urlobj.path, urlobj.params, urlobj.query, urlobj.fragment)) | |
#print('===> M3U Host= ' + urlhost + ' Path=' + urlpath) | |
### Get M3u | |
conn = http.client.HTTPConnection(urlhost) | |
headers = { | |
'Connection': 'close', | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36', | |
} | |
conn.request('GET', urlpath, '', headers) | |
res = conn.getresponse() | |
m3u_url = res.getheader("Location") | |
conn.close() | |
#print('====> M3U URL: ' + m3u_url) | |
urlobj = urlparse(m3u_url) | |
urlhost = urlobj.netloc | |
urlpath = urllib.parse.urlunparse(('', '', urlobj.path, urlobj.params, urlobj.query, urlobj.fragment)) | |
#print('===> M3U Host= ' + urlhost + ' Path=' + urlpath) | |
### Get M3u | |
conn = http.client.HTTPConnection(urlhost) | |
headers = { | |
'Connection': 'close', | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36', | |
} | |
conn.request('GET', urlpath, '', headers) | |
res = conn.getresponse() | |
httpdata = res.read() | |
conn.close() | |
# Add links | |
stream_urls = re.findall(r"^http://.*.m3u8$", httpdata.decode(), re.MULTILINE) | |
print('720p stream url=' + stream_urls[3]) | |
print('ffmpeg -i ' + stream_urls[3] + ' -c copy ceske.stoleti.S01E0.mkv') | |
self.videos = [{'720p': stream_urls[3]}] | |
#----------------- | |
# url = urllib.parse.unquote(data['url']) | |
# | |
# self.playlist = urlopen(url).read().decode('utf-8') | |
# self.getMovie() | |
# self.videos = self.movie.findall('video') | |
# | |
# # setridime podle kvality: z popisku nechame jenom cislo | |
# # kvuli audio-description verzi (label AD) pridame 0, abychom to mohli tridit jako cisla | |
# self.videos = sorted(self.videos, key=lambda k: int(re.sub(r"\D", "", k.get('label')+"0")), reverse=True) | |
# | |
# if len(self.videos) == 0: | |
# raise ValueError('Není k dispozici žádná kvalita videa.') | |
def getMovie(self): | |
xml = ElementTree.fromstring(self.playlist) | |
for e in xml.findall('smilRoot/body/switchItem'): | |
i = e.get('id') | |
if not ('AD' in i or 'BO' in i): | |
self.movie = e | |
break | |
self.subtitles = None | |
for e in xml.findall('metaDataRoot/Playlist/PlaylistItem'): | |
if e.get('id') == self.movie.get('id'): | |
s = e.find('SubtitlesURL') | |
if s is not None: | |
self.subtitles = s.text | |
def qualities(self): | |
return [( v.get('label'), v.get('label') ) for v in self.videos] + ([('srt', 'Titulky')] if self.subtitles is not None else [] ) | |
def movies(self): | |
return [ ('0', re.findall(b'<title>(.+?) —', self.b_page)[0].decode('utf-8')) ] | |
def get_video(self, quality): | |
for video in self.videos: | |
if video.get('label') == quality: | |
log.info('Vybraná kvalita: {}'.format(quality)) | |
return video | |
raise ValueError('Není k dispozici zadaná kvalita videa.') | |
def download(self, quality, movie): | |
if quality == 'srt': | |
return self.download_srt() | |
if quality: | |
video = self.get_video(quality) | |
else: | |
video = self.videos[0] | |
log.info('Automaticky vybraná kvalita: {}'.format(video.get('label')) ) | |
base = self.movie.get('base') | |
src = video.get('src') | |
filename = os.path.basename( src)[:-3] + 'flv' | |
parsedurl = urlparse(base) | |
app = parsedurl.path[1:] + '?' + parsedurl.query | |
# rtmpdump --live kvůli restartům - viz http://www.abclinuxu.cz/blog/pb/2011/5/televize-9-ctstream-3#18 | |
return ('rtmp', filename, { 'url': base, 'playpath': src, 'app' : app, 'rtmpdump_args' : '--live'} ) | |
def download_srt(self): | |
if self.subtitles is None: | |
raise ValueError('Titulky nejsou k dispozici.') | |
txt = urllib.request.urlopen(self.subtitles).read().decode('utf8') | |
srt = txt_to_srt(txt) | |
return ('text', 'subtitles.srt', srt.encode('cp1250') ) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python tv-dl http://www.ceskatelevize.cz/porady/10362011008-ceske-stoleti/21251212024-kulka-pro-heydricha-1941/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment