Created
February 1, 2021 23:35
-
-
Save Lesmiscore/4053d35cd3a6160021a5fe70cc92e84c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py | |
index b254ceced..074397dad 100644 | |
--- a/youtube_dl/extractor/youtube.py | |
+++ b/youtube_dl/extractor/youtube.py | |
@@ -13,7 +13,6 @@ import traceback | |
from .common import InfoExtractor, SearchInfoExtractor | |
from ..jsinterp import JSInterpreter | |
-from ..swfinterp import SWFInterpreter | |
from ..compat import ( | |
compat_chr, | |
compat_HTTPError, | |
@@ -280,7 +279,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' | |
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' | |
- _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)' | |
+ _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|if\s*\(window\.ytcsi\)|\n)' | |
def _call_api(self, ep, query, video_id): | |
data = self._DEFAULT_API_DATA.copy() | |
@@ -401,6 +400,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
youtu\.be| # just youtu.be/xxxx | |
vid\.plus| # or vid.plus/xxxx | |
zwearz\.com/watch| # or zwearz.com/watch/xxxx | |
+ i\.ytimg\.com/vi| # or i.ytimg.com/vi/xxx | |
+ y2u\.be| # y2u.be | |
)/ | |
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= | |
) | |
@@ -1161,10 +1162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
"""Report extracted video URL.""" | |
self.to_screen('%s: Format %s not available' % (video_id, format)) | |
- def report_rtmp_download(self): | |
- """Indicate the download will use the RTMP protocol.""" | |
- self.to_screen('RTMP download detected') | |
- | |
def _signature_cache_id(self, example_sig): | |
""" Return a string representation of a signature """ | |
return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) | |
@@ -1202,13 +1199,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
note=download_note, | |
errnote='Download of %s failed' % player_url) | |
res = self._parse_sig_js(code) | |
- elif player_type == 'swf': | |
- urlh = self._request_webpage( | |
- player_url, video_id, | |
- note=download_note, | |
- errnote='Download of %s failed' % player_url) | |
- code = urlh.read() | |
- res = self._parse_sig_swf(code) | |
else: | |
assert False, 'Invalid player type %r' % player_type | |
@@ -1228,8 +1218,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
return 's[%s%s%s]' % (starts, ends, steps) | |
step = None | |
- # Quelch pyflakes warnings - start will be set when step is set | |
- start = '(Never used)' | |
+ # Quelch Pylance warnings - start will be set when step is set | |
+ start, i = '(Never used)', None | |
for i, prev in zip(idxs[1:], idxs[:-1]): | |
if step is not None: | |
if i - prev == step: | |
@@ -1279,13 +1269,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
initial_function = jsi.extract_function(funcname) | |
return lambda s: initial_function([s]) | |
- def _parse_sig_swf(self, file_contents): | |
- swfi = SWFInterpreter(file_contents) | |
- TARGET_CLASSNAME = 'SignatureDecipher' | |
- searched_class = swfi.extract_class(TARGET_CLASSNAME) | |
- initial_function = swfi.extract_function(searched_class, 'decipher') | |
- return lambda s: initial_function([s]) | |
- | |
def _decrypt_signature(self, s, video_id, player_url, age_gate=False): | |
"""Turn the encrypted s field into a working signature""" | |
@@ -1362,6 +1345,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
if config: | |
return self._parse_json( | |
uppercase_escape(config), video_id, fatal=False) | |
+ # below is to extract error reason | |
+ patterns = ( | |
+ r'(?m)window\["ytInitialPlayerResponse"\]\s*=\s*({.+});$', | |
+ r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE), | |
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, | |
+ ) | |
+ config = self._search_regex( | |
+ patterns, webpage, 'initial player response', default=None) | |
+ if config: | |
+ return {'args': {'player_response': config}} | |
+ embedded_config = self._search_regex( | |
+ r'setConfig\(({.*})\);', | |
+ webpage, 'ytInitialData', default=None) | |
+ if embedded_config: | |
+ return try_get( | |
+ embedded_config, | |
+ lambda x: {'args': {'player_response': x['PLAYER_VARS']['embedded_player_response']}}, | |
+ compat_str | |
+ ) | |
def _get_automatic_captions(self, video_id, player_response, player_config): | |
"""We need the webpage for getting the captions url, pass it as an | |
@@ -1647,12 +1649,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query) | |
video_id = qs.get('v', [None])[0] or video_id | |
- # Attempt to extract SWF player URL | |
- mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) | |
- if mobj is not None: | |
- player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) | |
- else: | |
- player_url = None | |
+ player_url = None | |
dash_mpds = [] | |
@@ -1688,21 +1685,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
# Get video info | |
video_info = {} | |
embed_webpage = None | |
- ytplayer_config = None | |
+ ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) | |
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None: | |
age_gate = True | |
# We simulate the access to the video from www.youtube.com/v/{video_id} | |
# this can be viewed without login into Youtube | |
- url = proto + '://www.youtube.com/embed/%s' % video_id | |
+ url = 'https://www.youtube.com/embed/%s' % video_id | |
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage') | |
+ sts = self._search_regex( | |
+ r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default='') | |
data = compat_urllib_parse_urlencode({ | |
'video_id': video_id, | |
'eurl': 'https://youtube.googleapis.com/v/' + video_id, | |
- 'sts': self._search_regex( | |
- r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), | |
+ 'sts': sts, | |
}) | |
- video_info_url = proto + '://www.youtube.com/get_video_info?' + data | |
+ video_info_url = 'https://www.youtube.com/get_video_info?' + data | |
try: | |
video_info_webpage = self._download_webpage( | |
video_info_url, video_id, | |
@@ -1716,39 +1714,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
player_response = extract_player_response(pl_response, video_id) | |
add_dash_mpd(video_info) | |
view_count = extract_view_count(video_info) | |
+ else: | |
+ ytplayer_config = self._get_ytplayer_config(video_id, embed_webpage) | |
else: | |
age_gate = False | |
- # Try looking directly into the video webpage | |
- ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) | |
- if ytplayer_config: | |
- args = ytplayer_config['args'] | |
- if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): | |
- # Convert to the same format returned by compat_parse_qs | |
- video_info = dict((k, [v]) for k, v in args.items()) | |
- add_dash_mpd(video_info) | |
- # Rental video is not rented but preview is available (e.g. | |
- # https://www.youtube.com/watch?v=yYr8q0y5Jfg, | |
- # https://github.com/ytdl-org/youtube-dl/issues/10532) | |
- if not video_info and args.get('ypc_vid'): | |
- return self.url_result( | |
- args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) | |
- if args.get('livestream') == '1' or args.get('live_playback') == 1: | |
- is_live = True | |
- if not player_response: | |
- player_response = extract_player_response(args.get('player_response'), video_id) | |
- if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): | |
- add_dash_mpd_pr(player_response) | |
- | |
- if not video_info and not player_response: | |
- player_response = extract_player_response( | |
- self._search_regex( | |
- (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE), | |
- self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage, | |
- 'initial player response', default='{}'), | |
- video_id) | |
+ # Try looking directly into the video webpage | |
+ if ytplayer_config: | |
+ args = ytplayer_config['args'] | |
+ if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'): | |
+ # Convert to the same format returned by compat_parse_qs | |
+ video_info = dict((k, [v]) for k, v in args.items()) | |
+ add_dash_mpd(video_info) | |
+ # Rental video is not rented but preview is available (e.g. | |
+ # https://www.youtube.com/watch?v=yYr8q0y5Jfg, | |
+ # https://github.com/ytdl-org/youtube-dl/issues/10532) | |
+ if not video_info and args.get('ypc_vid'): | |
+ return self.url_result( | |
+ args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) | |
+ if args.get('livestream') == '1' or args.get('live_playback') == 1: | |
+ is_live = True | |
+ if not player_response: | |
+ player_response = extract_player_response(args.get('player_response'), video_id) | |
+ if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): | |
+ add_dash_mpd_pr(player_response) | |
def extract_unavailable_message(): | |
messages = [] | |
+ # NOTE: this no longer work | |
for tag, kind in (('h1', 'message'), ('div', 'submessage')): | |
msg = self._html_search_regex( | |
r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind), | |
@@ -1757,6 +1749,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
messages.append(msg) | |
if messages: | |
return '\n'.join(messages) | |
+ if player_response: | |
+ return try_get( | |
+ player_response, | |
+ (lambda x: clean_html(''.join(r['text'] for r in x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'])), | |
+ lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['simpleText'], | |
+ lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['reason']['simpleText'], | |
+ lambda x: x['playabilityStatus']['messages'][0], | |
+ lambda x: x['playabilityStatus']['reason']), compat_str) | |
if not video_info and not player_response: | |
unavailable_message = extract_unavailable_message() | |
@@ -1870,15 +1870,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or [] | |
streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or []) | |
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | |
- self.report_rtmp_download() | |
- formats = [{ | |
- 'format_id': '_rtmp', | |
- 'protocol': 'rtmp', | |
- 'url': video_info['conn'][0], | |
- 'player_url': player_url, | |
- }] | |
- elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): | |
+ if not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): | |
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] | |
if 'rtmpe%3Dyes' in encoded_url_map: | |
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True) | |
@@ -1976,7 +1968,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
player_desc = 'unknown' | |
else: | |
player_type, player_version = self._extract_player_info(player_url) | |
- player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version) | |
+ player_desc = 'html5 player %s' % player_version | |
parts_sizes = self._signature_cache_id(encrypted_sig) | |
self.to_screen('{%s} signature length %s, %s' % | |
(format_id, parts_sizes, player_desc)) | |
@@ -2073,31 +2065,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
dct = self._formats[itag].copy() | |
dct.update(a_format) | |
a_format = dct | |
- a_format['player_url'] = player_url | |
+ # a_format['player_url'] = player_url | |
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming | |
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' | |
formats.append(a_format) | |
else: | |
error_message = extract_unavailable_message() | |
- if not error_message: | |
- reason_list = try_get( | |
- player_response, | |
- lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'], | |
- list) or [] | |
- for reason in reason_list: | |
- if not isinstance(reason, dict): | |
- continue | |
- reason_text = try_get(reason, lambda x: x['text'], compat_str) | |
- if reason_text: | |
- if not error_message: | |
- error_message = '' | |
- error_message += reason_text | |
- if error_message: | |
- error_message = clean_html(error_message) | |
- if not error_message: | |
- error_message = clean_html(try_get( | |
- player_response, lambda x: x['playabilityStatus']['reason'], | |
- compat_str)) | |
if not error_message: | |
error_message = clean_html( | |
try_get(video_info, lambda x: x['reason'][0], compat_str)) | |
@@ -2417,9 +2390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |
msg=video_info['reason'][0], countries=countries) | |
reason = video_info['reason'][0] | |
if 'Invalid parameters' in reason: | |
- unavailable_message = extract_unavailable_message() | |
- if unavailable_message: | |
- reason = unavailable_message | |
+ reason = extract_unavailable_message() or reason | |
raise ExtractorError( | |
'YouTube said: %s' % reason, | |
expected=True, video_id=video_id) | |
@@ -3027,6 +2998,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |
break | |
count = 0 | |
retries = 3 | |
+ browse = None | |
while count <= retries: | |
try: | |
# Downloading page may result in intermittent 5xx HTTP error |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment