Skip to content

Instantly share code, notes, and snippets.

@christophlsa
Last active May 25, 2018 06:42
Show Gist options
  • Save christophlsa/5736722 to your computer and use it in GitHub Desktop.
Save christophlsa/5736722 to your computer and use it in GitHub Desktop.
Ausführbefehl und encoding
#!/usr/bin/python
# -*- coding: utf-8 -*-
import urllib.request, re, argparse, math, sys
import xml.etree.ElementTree as ET
from operator import itemgetter
parser = argparse.ArgumentParser(description='Extrahiert die URLs der Dateien eines Videos aus der MDR Mediathek')
parser.add_argument('urls', help='Eine Liste von URLs zu den Videos in der MDR Mediathek', nargs='+')
parser.add_argument('-d', '--download', action='store_true', help='größtes Video herunterladen')
args = parser.parse_args()
class MDRVideoFetcher(object):
def __init__(self, mdrurl):
self.MDRURL = mdrurl
self.fetchXMLURL()
self.fetchInfosFromXML()
def fetchXMLURL(self):
f = urllib.request.urlopen(self.MDRURL)
html = f.read()
f.close()
m = re.search("dataURL:'(.+\.xml)", html.decode("utf8"))
self.XMLURL = None
if m is not None:
self.XMLURL = "http://www.mdr.de" + m.group(1)
def fetchInfosFromXML(self):
f = urllib.request.urlopen(self.XMLURL)
xmlvid = f.read()
f.close()
root = ET.fromstring(xmlvid)
self.Title = ''
self.Discription = ''
self.Duration = ''
self.Type = ''
titlenode = root.find('title')
if titlenode is not None:
self.Title = titlenode.text
discriptionnode = root.find('teaserText')
if discriptionnode is not None:
self.Discription = discriptionnode.text
durationnode = root.find('duration')
if durationnode is not None:
self.Duration = durationnode.text
typenode = root.find('type')
if typenode is not None:
self.Type = typenode.text
self.Assets = list()
if self.Type in ['audio', 'video']:
for node in root.iter('asset'):
asset = dict()
if self.Type == 'video':
width = node.find('frameWidth')
if width is None:
continue
asset['width'] = int(width.text)
height = node.find('frameHeight')
if height is None:
continue
asset['height'] = int(height.text)
bitrate_video = node.find('bitrateVideo')
if bitrate_video is None:
continue
asset['video_bitrate'] = int(bitrate_video.text)
audio_bitrate = node.find('bitrateAudio')
if audio_bitrate is None:
continue
asset['audio_bitrate'] = int(audio_bitrate.text)
downloadurl = node.find('progressiveDownloadUrl')
if downloadurl is None:
continue
asset['url'] = downloadurl.text
self.Assets.append(asset)
if self.Type == 'video':
self.Assets.sort(key=itemgetter('width'), reverse=True)
elif self.Type == 'audio':
self.Assets.sort(key=itemgetter('audio_bitrate'), reverse=True)
def calcAspectRatio(width, height):
div1 = width / 16
rat1 = height / div1
if math.floor(rat1) == 9 or math.floor(rat1) == 10:
return (16, round(rat1))
div2 = width / 4
rat2 = height / div2
return (4, round(rat2))
def printInfos(self):
print('Titel: ' + self.Title)
print('Beschreibung: ' + self.Discription)
print('Dauer: ' + self.Duration)
def printAssets(self):
for asset in self.Assets:
if self.Type == 'video':
print('{}x{} {}: {}'.format(asset['width'], asset['height'],
MDRVideoFetcher.calcAspectRatio(int(asset['width']), int(asset['height'])),
asset['url']))
elif self.Type == 'audio':
print('{}: {}'.format(asset['audio_bitrate'], asset['url']))
def download(self):
video = self.Assets[0]
filename = video['url'].split('/')[-1]
with open(filename, 'wb') as handle:
request = requests.get(video['url'], stream=True)
file_size = int(request.headers['content-length'])
file_size_dl = 0
for buf in request.iter_content(1024):
if not buf:
break
handle.write(buf)
file_size_dl += len(buf)
status = file_size_dl * 100 / file_size
sys.stdout.write("\r[{0: <100}] {1:3.0f}%".format('#' * int(status), status))
print('\ndone')
for url in args.urls:
mvf = MDRVideoFetcher(url)
mvf.printInfos()
print()
if args.download:
mvf.download()
else:
mvf.printAssets()
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment