Skip to content

Instantly share code, notes, and snippets.

@Lesmiscore
Created February 1, 2021 23:35
Show Gist options
  • Save Lesmiscore/4053d35cd3a6160021a5fe70cc92e84c to your computer and use it in GitHub Desktop.
Save Lesmiscore/4053d35cd3a6160021a5fe70cc92e84c to your computer and use it in GitHub Desktop.
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b254ceced..074397dad 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -13,7 +13,6 @@ import traceback
from .common import InfoExtractor, SearchInfoExtractor
from ..jsinterp import JSInterpreter
-from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
compat_HTTPError,
@@ -280,7 +279,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
- _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
+ _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|if\s*\(window\.ytcsi\)|\n)'
def _call_api(self, ep, query, video_id):
data = self._DEFAULT_API_DATA.copy()
@@ -401,6 +400,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
youtu\.be| # just youtu.be/xxxx
vid\.plus| # or vid.plus/xxxx
zwearz\.com/watch| # or zwearz.com/watch/xxxx
+ i\.ytimg\.com/vi| # or i.ytimg.com/vi/xxx
+ y2u\.be| # y2u.be
)/
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
@@ -1161,10 +1162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
"""Report extracted video URL."""
self.to_screen('%s: Format %s not available' % (video_id, format))
- def report_rtmp_download(self):
- """Indicate the download will use the RTMP protocol."""
- self.to_screen('RTMP download detected')
-
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
@@ -1202,13 +1199,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
note=download_note,
errnote='Download of %s failed' % player_url)
res = self._parse_sig_js(code)
- elif player_type == 'swf':
- urlh = self._request_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- code = urlh.read()
- res = self._parse_sig_swf(code)
else:
assert False, 'Invalid player type %r' % player_type
@@ -1228,8 +1218,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return 's[%s%s%s]' % (starts, ends, steps)
step = None
- # Quelch pyflakes warnings - start will be set when step is set
- start = '(Never used)'
+ # Quelch Pylance warnings - start will be set when step is set
+ start, i = '(Never used)', None
for i, prev in zip(idxs[1:], idxs[:-1]):
if step is not None:
if i - prev == step:
@@ -1279,13 +1269,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_function = jsi.extract_function(funcname)
return lambda s: initial_function([s])
- def _parse_sig_swf(self, file_contents):
- swfi = SWFInterpreter(file_contents)
- TARGET_CLASSNAME = 'SignatureDecipher'
- searched_class = swfi.extract_class(TARGET_CLASSNAME)
- initial_function = swfi.extract_function(searched_class, 'decipher')
- return lambda s: initial_function([s])
-
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
@@ -1362,6 +1345,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
+ # below is to extract error reason
+ patterns = (
+ r'(?m)window\["ytInitialPlayerResponse"\]\s*=\s*({.+});$',
+ r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
+ self._YT_INITIAL_PLAYER_RESPONSE_RE,
+ )
+ config = self._search_regex(
+ patterns, webpage, 'initial player response', default=None)
+ if config:
+ return {'args': {'player_response': config}}
+ embedded_config = self._search_regex(
+ r'setConfig\(({.*})\);',
+ webpage, 'ytInitialData', default=None)
+ if embedded_config:
+ return try_get(
+ embedded_config,
+ lambda x: {'args': {'player_response': x['PLAYER_VARS']['embedded_player_response']}},
+ compat_str
+ )
def _get_automatic_captions(self, video_id, player_response, player_config):
"""We need the webpage for getting the captions url, pass it as an
@@ -1647,12 +1649,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
video_id = qs.get('v', [None])[0] or video_id
- # Attempt to extract SWF player URL
- mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
- if mobj is not None:
- player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
- else:
- player_url = None
+ player_url = None
dash_mpds = []
@@ -1688,21 +1685,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video info
video_info = {}
embed_webpage = None
- ytplayer_config = None
+ ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
- url = proto + '://www.youtube.com/embed/%s' % video_id
+ url = 'https://www.youtube.com/embed/%s' % video_id
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
+ sts = self._search_regex(
+ r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default='')
data = compat_urllib_parse_urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'sts': self._search_regex(
- r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
+ 'sts': sts,
})
- video_info_url = proto + '://www.youtube.com/get_video_info?' + data
+ video_info_url = 'https://www.youtube.com/get_video_info?' + data
try:
video_info_webpage = self._download_webpage(
video_info_url, video_id,
@@ -1716,39 +1714,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = extract_player_response(pl_response, video_id)
add_dash_mpd(video_info)
view_count = extract_view_count(video_info)
+ else:
+ ytplayer_config = self._get_ytplayer_config(video_id, embed_webpage)
else:
age_gate = False
- # Try looking directly into the video webpage
- ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
- if ytplayer_config:
- args = ytplayer_config['args']
- if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
- # Convert to the same format returned by compat_parse_qs
- video_info = dict((k, [v]) for k, v in args.items())
- add_dash_mpd(video_info)
- # Rental video is not rented but preview is available (e.g.
- # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
- # https://github.com/ytdl-org/youtube-dl/issues/10532)
- if not video_info and args.get('ypc_vid'):
- return self.url_result(
- args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
- if args.get('livestream') == '1' or args.get('live_playback') == 1:
- is_live = True
- if not player_response:
- player_response = extract_player_response(args.get('player_response'), video_id)
- if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
- add_dash_mpd_pr(player_response)
-
- if not video_info and not player_response:
- player_response = extract_player_response(
- self._search_regex(
- (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
- self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
- 'initial player response', default='{}'),
- video_id)
+ # Try looking directly into the video webpage
+ if ytplayer_config:
+ args = ytplayer_config['args']
+ if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
+ # Convert to the same format returned by compat_parse_qs
+ video_info = dict((k, [v]) for k, v in args.items())
+ add_dash_mpd(video_info)
+ # Rental video is not rented but preview is available (e.g.
+ # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
+ # https://github.com/ytdl-org/youtube-dl/issues/10532)
+ if not video_info and args.get('ypc_vid'):
+ return self.url_result(
+ args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
+ if args.get('livestream') == '1' or args.get('live_playback') == 1:
+ is_live = True
+ if not player_response:
+ player_response = extract_player_response(args.get('player_response'), video_id)
+ if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+ add_dash_mpd_pr(player_response)
def extract_unavailable_message():
messages = []
+ # NOTE: this no longer work
for tag, kind in (('h1', 'message'), ('div', 'submessage')):
msg = self._html_search_regex(
r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
@@ -1757,6 +1749,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
messages.append(msg)
if messages:
return '\n'.join(messages)
+ if player_response:
+ return try_get(
+ player_response,
+ (lambda x: clean_html(''.join(r['text'] for r in x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'])),
+ lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['simpleText'],
+ lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['reason']['simpleText'],
+ lambda x: x['playabilityStatus']['messages'][0],
+ lambda x: x['playabilityStatus']['reason']), compat_str)
if not video_info and not player_response:
unavailable_message = extract_unavailable_message()
@@ -1870,15 +1870,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
- self.report_rtmp_download()
- formats = [{
- 'format_id': '_rtmp',
- 'protocol': 'rtmp',
- 'url': video_info['conn'][0],
- 'player_url': player_url,
- }]
- elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+ if not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
@@ -1976,7 +1968,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_desc = 'unknown'
else:
player_type, player_version = self._extract_player_info(player_url)
- player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
+ player_desc = 'html5 player %s' % player_version
parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen('{%s} signature length %s, %s' %
(format_id, parts_sizes, player_desc))
@@ -2073,31 +2065,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
dct = self._formats[itag].copy()
dct.update(a_format)
a_format = dct
- a_format['player_url'] = player_url
+ # a_format['player_url'] = player_url
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
error_message = extract_unavailable_message()
- if not error_message:
- reason_list = try_get(
- player_response,
- lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
- list) or []
- for reason in reason_list:
- if not isinstance(reason, dict):
- continue
- reason_text = try_get(reason, lambda x: x['text'], compat_str)
- if reason_text:
- if not error_message:
- error_message = ''
- error_message += reason_text
- if error_message:
- error_message = clean_html(error_message)
- if not error_message:
- error_message = clean_html(try_get(
- player_response, lambda x: x['playabilityStatus']['reason'],
- compat_str))
if not error_message:
error_message = clean_html(
try_get(video_info, lambda x: x['reason'][0], compat_str))
@@ -2417,9 +2390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
msg=video_info['reason'][0], countries=countries)
reason = video_info['reason'][0]
if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
+ reason = extract_unavailable_message() or reason
raise ExtractorError(
'YouTube said: %s' % reason,
expected=True, video_id=video_id)
@@ -3027,6 +2998,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
break
count = 0
retries = 3
+ browse = None
while count <= retries:
try:
# Downloading page may result in intermittent 5xx HTTP error
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment