Created
October 29, 2012 16:26
-
-
Save FiloSottile/3974635 to your computer and use it in GitHub Desktop.
youtube-dl archives diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -r own/FileDownloader.py his/FileDownloader.py | |
477d476 | |
< video['extractor'] = ie.IE_NAME | |
diff -r own/InfoExtractors.py his/InfoExtractors.py | |
16,17d15 | |
< import random | |
< import math | |
100,118c98 | |
< _VALID_URL = r"""^ | |
< ( | |
< (?:https?://)? # http(s):// (optional) | |
< (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| | |
< tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains | |
< (?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs | |
< (?: # the various things that can precede the ID: | |
< (?:(?:v|embed|e)/) # v/ or embed/ or e/ | |
< |(?: # or the v= param in all its forms | |
< (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | |
< (?:\?|\#!?) # the params delimiter ? or # or #! | |
< (?:.+&)? # any other preceding param (like /?s=tuff&v=xxxx) | |
< v= | |
< ) | |
< )? # optional -> youtube.com/xxxx is OK | |
< )? # all until now is optional -> you can pass the naked ID | |
< ([0-9A-Za-z_-]+) # here is it! the YouTube video ID | |
< (?(1).+)? # if we found the ID, everything can follow | |
< $""" | |
--- | |
> _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|tube.majestyc.net/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' | |
157,160d136 | |
< def suitable(self, url): | |
< """Receives a URL and returns True if suitable for this IE.""" | |
< return re.match(self._VALID_URL, url, re.VERBOSE) is not None | |
< | |
295c271 | |
< mobj = re.match(self._VALID_URL, url, re.VERBOSE) | |
--- | |
> mobj = re.match(self._VALID_URL, url) | |
619c595 | |
< _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' | |
--- | |
> _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' | |
640c616 | |
< video_id = mobj.group(1).split('_')[0].split('?')[0] | |
--- | |
> video_id = mobj.group(1) | |
642c618 | |
< video_extension = 'mp4' | |
--- | |
> video_extension = 'flv' | |
656c632 | |
< mobj = re.search(r'\s*var flashvars = (.*)', webpage) | |
--- | |
> mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage) | |
660,671c636,637 | |
< flashvars = urllib.unquote(mobj.group(1)) | |
< | |
< for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: | |
< if key in flashvars: | |
< max_quality = key | |
< self._downloader.to_screen(u'[dailymotion] Using %s' % key) | |
< break | |
< else: | |
< self._downloader.trouble(u'ERROR: unable to extract video URL') | |
< return | |
< | |
< mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) | |
--- | |
> sequence = urllib.unquote(mobj.group(1)) | |
> mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) | |
673c639 | |
< self._downloader.trouble(u'ERROR: unable to extract video URL') | |
--- | |
> self._downloader.trouble(u'ERROR: unable to extract media URL') | |
674a641 | |
> mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '') | |
676c643 | |
< video_url = urllib.unquote(mobj.group(1)).replace('\\/', '/') | |
--- | |
> # if needed add http://www.dailymotion.com/ if relative URL | |
678c645 | |
< # TODO: support choosing qualities | |
--- | |
> video_url = mediaURL | |
686d652 | |
< video_uploader = u'NA' | |
689,696c655,657 | |
< self._downloader.trouble(u'WARNING: unable to extract uploader nickname') | |
< else: | |
< video_uploader = mobj.group(1) | |
< | |
< video_upload_date = u'NA' | |
< mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | |
< if mobj is not None: | |
< video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) | |
--- | |
> self._downloader.trouble(u'ERROR: unable to extract uploader nickname') | |
> return | |
> video_uploader = mobj.group(1) | |
702c663 | |
< 'upload_date': video_upload_date, | |
--- | |
> 'upload_date': u'NA', | |
1513c1474 | |
< _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=.*?%s' | |
--- | |
> _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=(PL)?%s&' | |
2997,3307d2957 | |
< | |
< | |
< class YoukuIE(InfoExtractor): | |
< | |
< _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html' | |
< IE_NAME = u'Youku' | |
< | |
< def __init__(self, downloader=None): | |
< InfoExtractor.__init__(self, downloader) | |
< | |
< def report_download_webpage(self, file_id): | |
< """Report webpage download.""" | |
< self._downloader.to_screen(u'[Youku] %s: Downloading webpage' % file_id) | |
< | |
< def report_extraction(self, file_id): | |
< """Report information extraction.""" | |
< self._downloader.to_screen(u'[Youku] %s: Extracting information' % file_id) | |
< | |
< def _gen_sid(self): | |
< nowTime = int(time.time() * 1000) | |
< random1 = random.randint(1000,1998) | |
< random2 = random.randint(1000,9999) | |
< | |
< return "%d%d%d" %(nowTime,random1,random2) | |
< | |
< def _get_file_ID_mix_string(self, seed): | |
< mixed = [] | |
< source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") | |
< seed = float(seed) | |
< for i in range(len(source)): | |
< seed = (seed * 211 + 30031 ) % 65536 | |
< index = math.floor(seed / 65536 * len(source) ) | |
< mixed.append(source[int(index)]) | |
< source.remove(source[int(index)]) | |
< #return ''.join(mixed) | |
< return mixed | |
< | |
< def _get_file_id(self, fileId, seed): | |
< mixed = self._get_file_ID_mix_string(seed) | |
< ids = fileId.split('*') | |
< realId = [] | |
< for ch in ids: | |
< if ch: | |
< realId.append(mixed[int(ch)]) | |
< return ''.join(realId) | |
< | |
< def _real_extract(self, url): | |
< mobj = re.match(self._VALID_URL, url) | |
< if mobj is None: | |
< self._downloader.trouble(u'ERROR: invalid URL: %s' % url) | |
< return | |
< video_id = mobj.group('ID') | |
< | |
< info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id | |
< | |
< request = urllib2.Request(info_url, None, std_headers) | |
< try: | |
< self.report_download_webpage(video_id) | |
< jsondata = urllib2.urlopen(request).read() | |
< except (urllib2.URLError, httplib.HTTPException, socket.error) as err: | |
< self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) | |
< return | |
< | |
< self.report_extraction(video_id) | |
< try: | |
< config = json.loads(jsondata) | |
< | |
< video_title = config['data'][0]['title'] | |
< seed = config['data'][0]['seed'] | |
< | |
< format = self._downloader.params.get('format', None) | |
< supported_format = config['data'][0]['streamfileids'].keys() | |
< | |
< if format is None or format == 'best': | |
< if 'hd2' in supported_format: | |
< format = 'hd2' | |
< else: | |
< format = 'flv' | |
< ext = u'flv' | |
< elif format == 'worst': | |
< format = 'mp4' | |
< ext = u'mp4' | |
< else: | |
< format = 'flv' | |
< ext = u'flv' | |
< | |
< | |
< fileid = config['data'][0]['streamfileids'][format] | |
< seg_number = len(config['data'][0]['segs'][format]) | |
< | |
< keys=[] | |
< for i in xrange(seg_number): | |
< keys.append(config['data'][0]['segs'][format][i]['k']) | |
< | |
< #TODO check error | |
< #youku only could be viewed from mainland china | |
< except: | |
< self._downloader.trouble(u'ERROR: unable to extract info section') | |
< return | |
< | |
< files_info=[] | |
< sid = self._gen_sid() | |
< fileid = self._get_file_id(fileid, seed) | |
< | |
< #column 8,9 of fileid represent the segment number | |
< #fileid[7:9] should be changed | |
< for index, key in enumerate(keys): | |
< | |
< temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) | |
< download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) | |
< | |
< info = { | |
< 'id': '%s_part%02d' % (video_id, index), | |
< 'url': download_url, | |
< 'uploader': None, | |
< 'title': video_title, | |
< 'ext': ext, | |
< 'format': u'NA' | |
< } | |
< files_info.append(info) | |
< | |
< return files_info | |
< | |
< | |
< class XNXXIE(InfoExtractor): | |
< """Information extractor for xnxx.com""" | |
< | |
< _VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)' | |
< IE_NAME = u'xnxx' | |
< VIDEO_URL_RE = r'flv_url=(.*?)&' | |
< VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' | |
< VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' | |
< | |
< def report_webpage(self, video_id): | |
< """Report information extraction""" | |
< self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) | |
< | |
< def report_extraction(self, video_id): | |
< """Report information extraction""" | |
< self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) | |
< | |
< def _real_extract(self, url): | |
< mobj = re.match(self._VALID_URL, url) | |
< if mobj is None: | |
< self._downloader.trouble(u'ERROR: invalid URL: %s' % url) | |
< return | |
< video_id = mobj.group(1).decode('utf-8') | |
< | |
< self.report_webpage(video_id) | |
< | |
< # Get webpage content | |
< try: | |
< webpage = urllib2.urlopen(url).read() | |
< except (urllib2.URLError, httplib.HTTPException, socket.error), err: | |
< self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err) | |
< return | |
< | |
< result = re.search(self.VIDEO_URL_RE, webpage) | |
< if result is None: | |
< self._downloader.trouble(u'ERROR: unable to extract video url') | |
< return | |
< video_url = urllib.unquote(result.group(1).decode('utf-8')) | |
< | |
< result = re.search(self.VIDEO_TITLE_RE, webpage) | |
< if result is None: | |
< self._downloader.trouble(u'ERROR: unable to extract video title') | |
< return | |
< video_title = result.group(1).decode('utf-8') | |
< | |
< result = re.search(self.VIDEO_THUMB_RE, webpage) | |
< if result is None: | |
< self._downloader.trouble(u'ERROR: unable to extract video thumbnail') | |
< return | |
< video_thumbnail = result.group(1).decode('utf-8') | |
< | |
< info = {'id': video_id, | |
< 'url': video_url, | |
< 'uploader': None, | |
< 'upload_date': None, | |
< 'title': video_title, | |
< 'ext': 'flv', | |
< 'format': 'flv', | |
< 'thumbnail': video_thumbnail, | |
< 'description': None, | |
< 'player_url': None} | |
< | |
< return [info] | |
< | |
< | |
< class GooglePlusIE(InfoExtractor): | |
< """Information extractor for plus.google.com.""" | |
< | |
< _VALID_URL = r'(?:https://)?plus\.google\.com/(?:\w+/)*?(\d+)/posts/(\w+)' | |
< IE_NAME = u'plus.google' | |
< | |
< def __init__(self, downloader=None): | |
< InfoExtractor.__init__(self, downloader) | |
< | |
< def report_extract_entry(self, url): | |
< """Report downloading extry""" | |
< self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url.decode('utf-8')) | |
< | |
< def report_date(self, upload_date): | |
< """Report downloading extry""" | |
< self._downloader.to_screen(u'[plus.google] Entry date: %s' % upload_date) | |
< | |
< def report_uploader(self, uploader): | |
< """Report downloading extry""" | |
< self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader.decode('utf-8')) | |
< | |
< def report_title(self, video_title): | |
< """Report downloading extry""" | |
< self._downloader.to_screen(u'[plus.google] Title: %s' % video_title.decode('utf-8')) | |
< | |
< def report_extract_vid_page(self, video_page): | |
< """Report information extraction.""" | |
< self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page.decode('utf-8')) | |
< | |
< def _real_extract(self, url): | |
< # Extract id from URL | |
< mobj = re.match(self._VALID_URL, url) | |
< if mobj is None: | |
< self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) | |
< return | |
< | |
< post_url = mobj.group(0) | |
< video_id = mobj.group(2) | |
< | |
< video_extension = 'flv' | |
< | |
< # Step 1, Retrieve post webpage to extract further information | |
< self.report_extract_entry(post_url) | |
< request = urllib2.Request(post_url) | |
< try: | |
< webpage = urllib2.urlopen(request).read() | |
< except (urllib2.URLError, httplib.HTTPException, socket.error), err: | |
< self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err)) | |
< return | |
< | |
< # Extract update date | |
< upload_date = u'NA' | |
< pattern = 'title="Timestamp">(.*?)</a>' | |
< mobj = re.search(pattern, webpage) | |
< if mobj: | |
< upload_date = mobj.group(1) | |
< # Convert timestring to a format suitable for filename | |
< upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") | |
< upload_date = upload_date.strftime('%Y%m%d') | |
< self.report_date(upload_date) | |
< | |
< # Extract uploader | |
< uploader = u'NA' | |
< pattern = r'rel\="author".*?>(.*?)</a>' | |
< mobj = re.search(pattern, webpage) | |
< if mobj: | |
< uploader = mobj.group(1) | |
< self.report_uploader(uploader) | |
< | |
< # Extract title | |
< # Get the first line for title | |
< video_title = u'NA' | |
< pattern = r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]' | |
< mobj = re.search(pattern, webpage) | |
< if mobj: | |
< video_title = mobj.group(1) | |
< self.report_title(video_title) | |
< | |
< # Step 2, Stimulate clicking the image box to launch video | |
< pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]' | |
< mobj = re.search(pattern, webpage) | |
< if mobj is None: | |
< self._downloader.trouble(u'ERROR: unable to extract video page URL') | |
< | |
< video_page = mobj.group(1) | |
< request = urllib2.Request(video_page) | |
< try: | |
< webpage = urllib2.urlopen(request).read() | |
< except (urllib2.URLError, httplib.HTTPException, socket.error), err: | |
< self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) | |
< return | |
< self.report_extract_vid_page(video_page) | |
< | |
< | |
< # Extract video links on video page | |
< """Extract video links of all sizes""" | |
< pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' | |
< mobj = re.findall(pattern, webpage) | |
< if len(mobj) == 0: | |
< self._downloader.trouble(u'ERROR: unable to extract video links') | |
< | |
< # Sort in resolution | |
< links = sorted(mobj) | |
< | |
< # Choose the lowest of the sort, i.e. highest resolution | |
< video_url = links[-1] | |
< # Only get the url. The resolution part in the tuple has no use anymore | |
< video_url = video_url[-1] | |
< # Treat escaped \u0026 style hex | |
< video_url = unicode(video_url, "unicode_escape") | |
< | |
< | |
< return [{ | |
< 'id': video_id.decode('utf-8'), | |
< 'url': video_url, | |
< 'uploader': uploader.decode('utf-8'), | |
< 'upload_date': upload_date.decode('utf-8'), | |
< 'title': video_title.decode('utf-8'), | |
< 'ext': video_extension.decode('utf-8'), | |
< 'format': u'NA', | |
< 'player_url': None, | |
< }] | |
diff -r own/PostProcessor.py his/PostProcessor.py | |
74,81c74,80 | |
< def executable(exe): | |
< try: | |
< subprocess.check_output([exe, '-version']) | |
< except OSError: | |
< return False | |
< return exe | |
< programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | |
< return dict((program, executable(program)) for program in programs) | |
--- | |
> available = {'avprobe' : False, 'avconv' : False, 'ffmpeg' : False, 'ffprobe' : False} | |
> for path in os.environ["PATH"].split(os.pathsep): | |
> for program in available.keys(): | |
> exe_file = os.path.join(path, program) | |
> if os.path.isfile(exe_file) and os.access(exe_file, os.X_OK): | |
> available[program] = exe_file | |
> return available | |
146,149c145 | |
< if int(self._preferredquality) < 10: | |
< more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] | |
< else: | |
< more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] | |
--- | |
> more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] | |
156,159c152 | |
< if int(self._preferredquality) < 10: | |
< more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] | |
< else: | |
< more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] | |
--- | |
> more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] | |
diff -r own/__init__.py his/__init__.py | |
22c22 | |
< __version__ = '2012.10.09' | |
--- | |
> __version__ = '2012.09.27' | |
189c189 | |
< dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) | |
--- | |
> dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) | |
193,194d192 | |
< general.add_option('--user-agent', | |
< dest='user_agent', help='specify a custom user agent', metavar='UA') | |
200c198 | |
< dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) | |
--- | |
> dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) | |
272c270 | |
< dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') | |
--- | |
> dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.') | |
301,302c299,300 | |
< postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', | |
< help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') | |
--- | |
> postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', | |
> help='ffmpeg/avconv audio bitrate specification, 128k by default') | |
356,358d353 | |
< YoukuIE(), | |
< XNXXIE(), | |
< GooglePlusIE(), | |
376,378d370 | |
< # Set user agent | |
< if opts.user_agent is not None: | |
< std_headers['User-Agent'] = opts.user_agent | |
455,458d446 | |
< if opts.audioquality: | |
< opts.audioquality = opts.audioquality.strip('k').strip('K') | |
< if not opts.audioquality.isdigit(): | |
< parser.error(u'invalid audio quality specified') | |
diff -r own/utils.py his/utils.py | |
22c22 | |
< 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', | |
--- | |
> 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment