Created
August 22, 2021 06:50
-
-
Save czoins/841509cd43ee23e7286610aa01706734 to your computer and use it in GitHub Desktop.
Modified VLC youtube.lua using youtube-dl. The latest youtube.lua doesn't load all videos resulting in a 403 error. Youtube-dl manages to get the direct link however many videos which previously failed to load using the original script do not play in high resolution for reasons probably related to changes in Youtube itself.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
$Id$ | |
Copyright © 2007-2020 the VideoLAN team | |
This program is free software; you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation; either version 2 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program; if not, write to the Free Software | |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. | |
--]] | |
-- Helper function to get a parameter's value in a URL | |
function get_url_param( url, name ) | |
local _, _, res = string.find( url, "[&?]"..name.."=([^&]*)" ) | |
return res | |
end | |
-- Helper function to copy a parameter when building a new URL | |
function copy_url_param( url, name ) | |
local value = get_url_param( url, name ) | |
return ( value and "&"..name.."="..value or "" ) -- Ternary operator | |
end | |
function get_arturl() | |
local iurl = get_url_param( vlc.path, "iurl" ) | |
if iurl then | |
return iurl | |
end | |
local video_id = get_url_param( vlc.path, "v" ) | |
if not video_id then | |
return nil | |
end | |
return vlc.access.."://img.youtube.com/vi/"..video_id.."/default.jpg" | |
end | |
-- Pick the most suited format available | |
function get_fmt( fmt_list ) | |
local prefres = vlc.var.inherit(nil, "preferred-resolution") | |
if prefres < 0 then | |
return nil | |
end | |
local fmt = nil | |
for itag,height in string.gmatch( fmt_list, "(%d+)/%d+x(%d+)[^,]*" ) do | |
-- Apparently formats are listed in quality | |
-- order, so we take the first one that works, | |
-- or fallback to the lowest quality | |
fmt = itag | |
if tonumber(height) <= prefres then | |
break | |
end | |
end | |
return fmt | |
end | |
-- Helper emulating vlc.readline() to work around its failure on | |
-- very long lines (see #24957) | |
function read_long_line() | |
local eol | |
local pos = 0 | |
local len = 32768 | |
repeat | |
len = len * 2 | |
local line = vlc.peek( len ) | |
if not line then return nil end | |
eol = string.find( line, "\n", pos + 1 ) | |
pos = len | |
until eol or len >= 1024 * 1024 -- No EOF detection, loop until limit | |
return vlc.read( eol or len ) | |
end | |
-- Buffering iterator to parse through the HTTP stream several times | |
-- without making several HTTP requests | |
function buf_iter( s ) | |
s.i = s.i + 1 | |
local line = s.lines[s.i] | |
if not line then | |
-- Put back together statements split across several lines, | |
-- otherwise we won't be able to parse them | |
repeat | |
local l = s.stream:readline() | |
if not l then break end | |
line = line and line..l or l -- Ternary operator | |
until string.match( line, "};$" ) | |
if line then | |
s.lines[s.i] = line | |
end | |
end | |
return line | |
end | |
-- Helper to search and extract code from javascript stream | |
function js_extract( js, pattern ) | |
js.i = 0 -- Reset to beginning | |
for line in buf_iter, js do | |
local ex = string.match( line, pattern ) | |
if ex then | |
return ex | |
end | |
end | |
vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" ) | |
return nil | |
end | |
-- Descramble the URL signature using the javascript code that does that | |
-- in the web page | |
function js_descramble( sig, js_url ) | |
-- Fetch javascript code | |
local js = { stream = vlc.stream( js_url ), lines = {}, i = 0 } | |
if not js.stream then | |
vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" ) | |
return sig | |
end | |
-- Look for the descrambler function's name | |
-- if(k.s){var l=k.sp,m=pt(decodeURIComponent(k.s));f.set(l,encodeURIComponent(m))} | |
-- k.s (from stream map field "s") holds the input scrambled signature | |
-- k.sp (from stream map field "sp") holds a parameter name (normally | |
-- "signature" or "sig") to set with the output, descrambled signature | |
local descrambler = js_extract( js, "[=%(,&|](..)%(decodeURIComponent%(.%.s%)%)" ) | |
if not descrambler then | |
vlc.msg.dbg( "Couldn't extract youtube video URL signature descrambling function name" ) | |
return sig | |
end | |
-- Fetch the code of the descrambler function | |
-- Go=function(a){a=a.split("");Fo.sH(a,2);Fo.TU(a,28);Fo.TU(a,44);Fo.TU(a,26);Fo.TU(a,40);Fo.TU(a,64);Fo.TR(a,26);Fo.sH(a,1);return a.join("")}; | |
local rules = js_extract( js, "^"..descrambler.."=function%([^)]*%){(.-)};" ) | |
if not rules then | |
vlc.msg.dbg( "Couldn't extract youtube video URL signature descrambling rules" ) | |
return sig | |
end | |
-- Get the name of the helper object providing transformation definitions | |
local helper = string.match( rules, ";(..)%...%(" ) | |
if not helper then | |
vlc.msg.dbg( "Couldn't extract youtube video URL signature transformation helper name" ) | |
vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" ) | |
return sig | |
end | |
-- Fetch the helper object code | |
-- var Fo={TR:function(a){a.reverse()},TU:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c},sH:function(a,b){a.splice(0,b)}}; | |
local transformations = js_extract( js, "[ ,]"..helper.."={(.-)};" ) | |
if not transformations then | |
vlc.msg.dbg( "Couldn't extract youtube video URL signature transformation code" ) | |
return sig | |
end | |
-- Parse the helper object to map available transformations | |
local trans = {} | |
for meth,code in string.gmatch( transformations, "(..):function%([^)]*%){([^}]*)}" ) do | |
-- a=a.reverse() | |
if string.match( code, "%.reverse%(" ) then | |
trans[meth] = "reverse" | |
-- a.splice(0,b) | |
elseif string.match( code, "%.splice%(") then | |
trans[meth] = "slice" | |
-- var c=a[0];a[0]=a[b%a.length];a[b]=c | |
elseif string.match( code, "var c=" ) then | |
trans[meth] = "swap" | |
else | |
vlc.msg.warn("Couldn't parse unknown youtube video URL signature transformation") | |
end | |
end | |
-- Parse descrambling rules, map them to known transformations | |
-- and apply them on the signature | |
local missing = false | |
for meth,idx in string.gmatch( rules, "..%.(..)%([^,]+,(%d+)%)" ) do | |
idx = tonumber( idx ) | |
if trans[meth] == "reverse" then | |
sig = string.reverse( sig ) | |
elseif trans[meth] == "slice" then | |
sig = string.sub( sig, idx + 1 ) | |
elseif trans[meth] == "swap" then | |
if idx > 1 then | |
sig = string.gsub( sig, "^(.)("..string.rep( ".", idx - 1 )..")(.)(.*)$", "%3%2%1%4" ) | |
elseif idx == 1 then | |
sig = string.gsub( sig, "^(.)(.)", "%2%1" ) | |
end | |
else | |
vlc.msg.dbg("Couldn't apply unknown youtube video URL signature transformation") | |
missing = true | |
end | |
end | |
if missing then | |
vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" ) | |
end | |
return sig | |
end | |
-- Parse and assemble video stream URL | |
function stream_url( params, js_url ) | |
local url = string.match( params, "url=([^&]+)" ) | |
if not url then | |
return nil | |
end | |
url = vlc.strings.decode_uri( url ) | |
-- Descramble any scrambled signature and append it to URL | |
local s = string.match( params, "s=([^&]+)" ) | |
if s then | |
s = vlc.strings.decode_uri( s ) | |
vlc.msg.dbg( "Found "..string.len( s ).."-character scrambled signature for youtube video URL, attempting to descramble... " ) | |
if js_url then | |
s = js_descramble( s, js_url ) | |
else | |
vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" ) | |
end | |
local sp = string.match( params, "sp=([^&]+)" ) | |
if not sp then | |
vlc.msg.warn( "Couldn't extract signature parameters for youtube video URL, guessing" ) | |
sp = "signature" | |
end | |
url = url.."&"..sp.."="..vlc.strings.encode_uri_component( s ) | |
end | |
return url | |
end | |
-- Parse and pick our video stream URL (classic parameters) | |
function pick_url( url_map, fmt, js_url ) | |
for stream in string.gmatch( url_map, "[^,]+" ) do | |
local itag = string.match( stream, "itag=(%d+)" ) | |
if not fmt or not itag or tonumber( itag ) == tonumber( fmt ) then | |
return stream_url( stream, js_url ) | |
end | |
end | |
return nil | |
end | |
-- Parse and pick our video stream URL (new-style parameters) | |
function pick_stream( stream_map, js_url ) | |
local pick = nil | |
local fmt = tonumber( get_url_param( vlc.path, "fmt" ) ) | |
if fmt then | |
-- Legacy match from URL parameter | |
for stream in string.gmatch( stream_map, '{(.-)}' ) do | |
local itag = tonumber( string.match( stream, '"itag":(%d+)' ) ) | |
if fmt == itag then | |
pick = stream | |
break | |
end | |
end | |
else | |
-- Compare the different available formats listed with our | |
-- quality targets | |
local prefres = vlc.var.inherit( nil, "preferred-resolution" ) | |
local bestres = nil | |
for stream in string.gmatch( stream_map, '{(.-)}' ) do | |
local height = tonumber( string.match( stream, '"height":(%d+)' ) ) | |
-- Better than nothing | |
if not pick or ( height and ( not bestres | |
-- Better quality within limits | |
or ( ( prefres < 0 or height <= prefres ) and height > bestres ) | |
-- Lower quality more suited to limits | |
or ( prefres > -1 and bestres > prefres and height < bestres ) | |
) ) then | |
bestres = height | |
pick = stream | |
end | |
end | |
end | |
if not pick then | |
return nil | |
end | |
-- Either the "url" or the "signatureCipher" parameter is present, | |
-- depending on whether the URL signature is scrambled. | |
local cipher = string.match( pick, '"signatureCipher":"(.-)"' ) | |
or string.match( pick, '"[a-zA-Z]*[Cc]ipher":"(.-)"' ) | |
if cipher then | |
-- Scrambled signature: some assembly required | |
local url = stream_url( cipher, js_url ) | |
if url then | |
return url | |
end | |
end | |
-- Unscrambled signature, already included in ready-to-use URL | |
return string.match( pick, '"url":"(.-)"' ) | |
end | |
-- Probe function. | |
function probe() | |
return ( ( vlc.access == "http" or vlc.access == "https" ) and ( | |
(( | |
string.match( vlc.path, "^www%.youtube%.com/" ) | |
or string.match( vlc.path, "^music%.youtube%.com/" ) | |
or string.match( vlc.path, "^gaming%.youtube%.com/" ) -- out of use | |
) and ( | |
string.match( vlc.path, "/watch%?" ) -- the html page | |
or string.match( vlc.path, "/live$" ) -- user live stream html page | |
or string.match( vlc.path, "/live%?" ) -- user live stream html page | |
or string.match( vlc.path, "/get_video_info%?" ) -- info API | |
or string.match( vlc.path, "/v/" ) -- video in swf player | |
or string.match( vlc.path, "/embed/" ) -- embedded player iframe | |
)) or | |
string.match( vlc.path, "^consent%.youtube%.com/" ) | |
) ) | |
end | |
-- Parse function. | |
function parse() | |
if string.match( vlc.path, "^consent%.youtube%.com/" ) then | |
-- Cookie consent redirection | |
-- Location: https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DXXXXXXXXXXX&gl=FR&m=0&pc=yt&uxe=23983172&hl=fr&src=1 | |
-- Set-Cookie: CONSENT=PENDING+355; expires=Fri, 01-Jan-2038 00:00:00 GMT; path=/; domain=.youtube.com | |
local url = get_url_param( vlc.path, "continue" ) | |
if not url then | |
vlc.msg.err( "Couldn't handle YouTube cookie consent redirection, please check for updates to this script or try disabling HTTP cookie forwarding" ) | |
return { } | |
end | |
return { { path = vlc.strings.decode_uri( url ), options = { ":no-http-forward-cookies" } } } | |
elseif not string.match( vlc.path, "^www%.youtube%.com/" ) then | |
-- Skin subdomain | |
return { { path = vlc.access.."://"..string.gsub( vlc.path, "^([^/]*)/", "www.youtube.com/" ) } } | |
elseif string.match( vlc.path, "/watch%?" ) | |
or string.match( vlc.path, "/live$" ) | |
or string.match( vlc.path, "/live%?" ) | |
then -- This is the HTML page's URL | |
local js_url | |
-- fmt is the format of the video | |
-- (cf. http://en.wikipedia.org/wiki/YouTube#Quality_and_formats) | |
fmt = get_url_param( vlc.path, "fmt" ) | |
while true do | |
-- The new HTML code layout has fewer and longer lines; always | |
-- use the long line workaround until we get more visibility. | |
local line = new_layout and read_long_line() or vlc.readline() | |
if not line then break end | |
-- The next line is the major configuration line that we need. | |
-- It is very long so we need this workaround (see #24957). | |
if string.match( line, '^ *<div id="player%-api">' ) then | |
line = read_long_line() | |
if not line then break end | |
end | |
if not title then | |
local meta = string.match( line, '<meta property="og:title"( .-)>' ) | |
if meta then | |
title = string.match( meta, ' content="(.-)"' ) | |
if title then | |
title = vlc.strings.resolve_xml_special_chars( title ) | |
end | |
end | |
end | |
if not description then | |
-- FIXME: there is another version of this available, | |
-- without the double JSON string encoding, but we're | |
-- unlikely to access it due to #24957 | |
description = string.match( line, '\\"shortDescription\\":\\"(.-[^\\])\\"') | |
if description then | |
-- FIXME: do this properly (see #24958) | |
description = string.gsub( description, '\\(["\\/])', '%1' ) | |
else | |
description = string.match( line, '"shortDescription":"(.-[^\\])"') | |
end | |
if description then | |
if string.match( description, '^"' ) then | |
description = "" | |
end | |
-- FIXME: do this properly (see #24958) | |
-- This way of unescaping is technically wrong | |
-- so as little as possible of it should be done | |
description = string.gsub( description, '\\(["\\/])', '%1' ) | |
description = string.gsub( description, '\\n', '\n' ) | |
description = string.gsub( description, '\\r', '\r' ) | |
description = string.gsub( description, "\\u0026", "&" ) | |
end | |
end | |
if not arturl then | |
local meta = string.match( line, '<meta property="og:image"( .-)>' ) | |
if meta then | |
arturl = string.match( meta, ' content="(.-)"' ) | |
if arturl then | |
arturl = vlc.strings.resolve_xml_special_chars( arturl ) | |
end | |
end | |
end | |
if not artist then | |
artist = string.match(line, '\\"author\\":\\"(.-)\\"') | |
if artist then | |
-- FIXME: do this properly (see #24958) | |
artist = string.gsub( artist, '\\(["\\/])', '%1' ) | |
else | |
artist = string.match( line, '"author":"(.-)"' ) | |
end | |
if artist then | |
-- FIXME: do this properly (see #24958) | |
artist = string.gsub( artist, "\\u0026", "&" ) | |
end | |
end | |
if not new_layout then | |
if string.match( line, '<script nonce="' ) then | |
vlc.msg.dbg( "Detected new YouTube HTML code layout" ) | |
new_layout = true | |
end | |
end | |
end | |
if not path then | |
local cmd = "youtube-dl.exe -f best --get-url "..vlc.access.."://"..vlc.path | |
local f = io.popen (cmd, 'r') | |
local output = f:read('*all') | |
f:close() | |
path = output:sub(1, -2) | |
end | |
if not path then | |
vlc.msg.err( "Couldn't extract youtube video URL, please check for updates to this script" ) | |
return { } | |
end | |
if not arturl then | |
arturl = get_arturl() | |
end | |
return { { path = path; name = title; description = description; artist = artist; arturl = arturl } } | |
elseif string.match( vlc.path, "/get_video_info%?" ) then -- video info API | |
local line = vlc.read( 1024*1024 ) -- data is on one line only | |
if not line then | |
vlc.msg.err( "YouTube API output missing" ) | |
return { } | |
end | |
local js_url = get_url_param( vlc.path, "jsurl" ) | |
if js_url then | |
js_url= vlc.strings.decode_uri( js_url ) | |
end | |
-- Classic parameters - out of use since early 2020 | |
local fmt = get_url_param( vlc.path, "fmt" ) | |
if not fmt then | |
local fmt_list = string.match( line, "&fmt_list=([^&]*)" ) | |
if fmt_list then | |
fmt_list = vlc.strings.decode_uri( fmt_list ) | |
fmt = get_fmt( fmt_list ) | |
end | |
end | |
local url_map = string.match( line, "&url_encoded_fmt_stream_map=([^&]*)" ) | |
if url_map then | |
vlc.msg.dbg( "Found classic parameters for youtube video stream, parsing..." ) | |
url_map = vlc.strings.decode_uri( url_map ) | |
path = pick_url( url_map, fmt, js_url ) | |
end | |
-- New-style parameters | |
if not path then | |
local stream_map = string.match( line, '%%22formats%%22%%3A%%5B(.-)%%5D' ) | |
if stream_map then | |
vlc.msg.dbg( "Found new-style parameters for youtube video stream, parsing..." ) | |
stream_map = vlc.strings.decode_uri( stream_map ) | |
-- FIXME: do this properly (see #24958) | |
stream_map = string.gsub( stream_map, "\\u0026", "&" ) | |
path = pick_stream( stream_map, js_url ) | |
end | |
end | |
if not path then | |
-- If this is a live stream, the URL map will be empty | |
-- and we get the URL from this field instead | |
local hlsvp = string.match( line, "%%22hlsManifestUrl%%22%%3A%%22(.-)%%22" ) | |
if hlsvp then | |
hlsvp = vlc.strings.decode_uri( hlsvp ) | |
path = hlsvp | |
end | |
end | |
if not path and get_url_param( vlc.path, "el" ) ~= "detailpage" then | |
-- Retry with the other known value for the "el" parameter; | |
-- either value has historically been wrong and failed for | |
-- some videos but not others. | |
local video_id = get_url_param( vlc.path, "video_id" ) | |
if video_id then | |
path = vlc.access.."://www.youtube.com/get_video_info?video_id="..video_id.."&el=detailpage"..copy_url_param( vlc.path, "fmt" )..copy_url_param( vlc.path, "jsurl" ) | |
vlc.msg.warn( "Couldn't extract video URL, retrying with alternate YouTube API parameters" ) | |
end | |
end | |
if not path then | |
vlc.msg.err( "Couldn't extract youtube video URL, please check for updates to this script" ) | |
return { } | |
end | |
local title = string.match( line, "%%22title%%22%%3A%%22(.-)%%22" ) | |
if title then | |
title = string.gsub( title, "+", " " ) | |
title = vlc.strings.decode_uri( title ) | |
-- FIXME: do this properly (see #24958) | |
title = string.gsub( title, "\\u0026", "&" ) | |
end | |
-- FIXME: description gets truncated if it contains a double quote | |
local description = string.match( line, "%%22shortDescription%%22%%3A%%22(.-)%%22" ) | |
if description then | |
description = string.gsub( description, "+", " " ) | |
description = vlc.strings.decode_uri( description ) | |
-- FIXME: do this properly (see #24958) | |
description = string.gsub( description, '\\(["\\/])', '%1' ) | |
description = string.gsub( description, '\\n', '\n' ) | |
description = string.gsub( description, '\\r', '\r' ) | |
description = string.gsub( description, "\\u0026", "&" ) | |
end | |
local artist = string.match( line, "%%22author%%22%%3A%%22(.-)%%22" ) | |
if artist then | |
artist = string.gsub( artist, "+", " " ) | |
artist = vlc.strings.decode_uri( artist ) | |
-- FIXME: do this properly (see #24958) | |
artist = string.gsub( artist, "\\u0026", "&" ) | |
end | |
local arturl = string.match( line, "%%22playerMicroformatRenderer%%22%%3A%%7B%%22thumbnail%%22%%3A%%7B%%22thumbnails%%22%%3A%%5B%%7B%%22url%%22%%3A%%22(.-)%%22" ) | |
if arturl then | |
arturl = vlc.strings.decode_uri( arturl ) | |
end | |
return { { path = path, name = title, description = description, artist = artist, arturl = arturl } } | |
else -- Other supported URL formats | |
local video_id = string.match( vlc.path, "/[^/]+/([^?]*)" ) | |
if not video_id then | |
vlc.msg.err( "Couldn't extract youtube video URL" ) | |
return { } | |
end | |
return { { path = vlc.access.."://www.youtube.com/watch?v="..video_id..copy_url_param( vlc.path, "fmt" ) } } | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I still get 403 responses using this.