Created
November 16, 2014 20:38
-
-
Save cattode/462180b605163a7db1e6 to your computer and use it in GitHub Desktop.
Grab a Youtube video file information from its Youtube URL in pure JS — Adapated from VLC's youtube.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
// Adapted from http://git.videolan.org/?p=vlc.git;f=share/lua/playlist/youtube.lua;hb=77f58c382703f93e899683844d1917cd7cb69d22 | |
/* | |
* Changelog: | |
* 11/16/2014: Converted the Lua code to vanilla JS | |
*/ | |
var preferences = { | |
"access": "https", | |
"resolution": 720 | |
}; | |
console.log(parse('https://www.youtube.com/watch?v=TIW1m3jbEsg')); | |
function get_url_param (url, name) { | |
var res = url.match(new RegExp("&?" + name + "=([^&]*)")); | |
return res ? res[1] : null; | |
} | |
function get_arturl (url) { | |
var iurl = get_url_param(url, "iurl"); | |
if (iurl) { | |
return iurl; | |
} | |
var video_id = get_url_param(url, "v"); | |
if (!video_id) { | |
return null; | |
} | |
return "http://img.youtube.com/vi/" + video_id + "/default.jpg"; | |
} | |
function get_prefres () { | |
var prefres = isNaN(preferences.resolution) ? -1 : preferences.resolution; | |
return prefres; | |
} | |
// Pick the most suited format available | |
function get_fmt (fmt_list) { | |
var prefres = get_prefres(); | |
if (prefres < 0) { | |
return null; | |
} | |
var fmt = null; | |
var fmt_list_regexp = /(\d+)\/\d+x(\d+)\/[^,]+/g; | |
var result; | |
while ((result = fmt_list_regexp.exec(fmt_list)) !== null) { | |
fmt = result[1]; | |
height = result[2]; | |
// Apparently formats are listed in quality | |
// order, so we take the first one that works, | |
// or fallback to the lowest quality | |
if (parseInt(height, 10) <= prefres) { | |
break; | |
} | |
} | |
return fmt; | |
} | |
// Descramble the URL signature using the javascript code that does that | |
// in the web page | |
function js_descramble (sig, js_url) { | |
// Fetch javascript code | |
var js; | |
var req = new XMLHttpRequest(); | |
req.open('GET', js_url, false); | |
if (req.readyState == 4 && req.status != 200) { | |
js = req.responseText; | |
} else { | |
return sig; | |
} | |
var lines = []; | |
// Look for the descrambler function's name | |
var descrambler = null; | |
js = js.split('\n'); | |
while (!descrambler) { | |
var line = js.shift(); | |
if (line === undefined) { | |
console.error("Couldn't process youtube video URL, please check for updates to this script"); | |
return sig; | |
} | |
// Buffer lines for later, so we don't have to make a second | |
// HTTP request later | |
lines.push(line); | |
// c&&(b.signature=ij(c)); | |
descrambler = line.match(/\.signature=(.*?)\(/); | |
if (descrambler) { | |
descrambler = descrambler[1]; | |
} | |
} | |
var descrambler_regexp = new RegExp("var ..=\\{(.*?)\\};function " + descrambler + "\\([^)]*\\)\\{(.*?)\\}"); | |
//* | |
// Fetch the code of the descrambler function. The function is | |
// conveniently preceded by the definition of a helper object | |
// that it uses. Example: | |
// var Fo={TR:function(a){a.reverse()},TU:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c},sH:function(a,b){a.splice(0,b)}};function Go(a){a=a.split("");Fo.sH(a,2);Fo.TU(a,28);Fo.TU(a,44);Fo.TU(a,26);Fo.TU(a,40);Fo.TU(a,64);Fo.TR(a,26);Fo.sH(a,1);return a.join("")}; | |
var transformations = null; | |
var rules = null; | |
while (transformations === null && rules === null) { | |
var line; | |
if (lines.length > 0) { | |
line = lines.pop(); | |
} else { | |
line = js.shift(); | |
if (line === undefined) { | |
console.error("Couldn't process youtube video URL, please check for updates to this script"); | |
return sig; | |
} | |
} | |
transformations = line.match(descrambler_regexp)[1]; | |
rules = line.match(descrambler_regexp)[2]; | |
} | |
// Parse the helper object to map available transformations | |
var trans = {}; | |
var transformations_regexp = /(..):function\([^)]*\)\{([^}]*)\}/g; | |
var result; | |
while ((result = transformations_regexp.exec(transformations)) !== null) { | |
meth = result[1]; | |
code = result[2]; | |
// a=a.reverse() | |
if (code.match(/\.reverse\(/)) { | |
trans[meth] = "reverse"; | |
} | |
// a.splice(0,b) | |
else if (code.macth(/\.splice\(/)) { | |
trans[meth] = "splice"; | |
} | |
// var c=a[0];a[0]=a[b%a.length];a[b]=c | |
else if (code.match(/var c=/)) { | |
trans[meth] = "swap"; | |
} | |
else { | |
console.warn("Couldn't parse unknown youtube video URL signature transformation"); | |
} | |
} | |
// Parse descrambling rules, map them to known transformations | |
// && apply them on the signature | |
var missing = false; | |
sig = sig.split(''); | |
var rules_regexp = /..\.(..)\([^,]+,(\d+)\)/g; | |
while ((result = rules_regexp.exec(rules)) !== null) { | |
meth = result[1]; | |
idx = parseInt(result[2], 10); | |
if (trans[meth] == "reverse") { | |
sig.reverse(); | |
} | |
else if (trans[meth] == "splice") { | |
sig.splice(0, idx); | |
} | |
else if (trans[meth] == "swap") { | |
var tmp = sig[0]; | |
sig[0] = sig[idx%sig.length]; | |
sig[idx] = tmp; | |
} | |
else { | |
console.debug("Couldn't apply unknown youtube video URL signature transformation"); | |
missing = true; | |
} | |
} | |
/*/ | |
try { | |
eval(line.match(descrambler_regexp)[0]); | |
eval('sig = ' + descrambler+'("' + sig + '");'); | |
} catch (e) { | |
missing = true; | |
} | |
//*/ | |
if (missing) { | |
console.error("Couldn't process youtube video URL, please check for updates to this script"); | |
} | |
return sig; | |
} | |
// Parse && pick our video URL | |
function pick_url (url_map, fmt, js_url) { | |
var path = null; | |
var url_map_regexp = /[^,]+/g; | |
var result; | |
while ((result = url_map_regexp.exec(url_map)) !== null) { | |
var stream = result[0]; | |
// Apparently formats are listed in quality order, | |
// so we can afford to simply take the first one | |
var itag = stream.match(/itag=(\d+)/); | |
if (!fmt || !itag || parseInt(itag[1], 10) == parseInt(fmt, 10)) { | |
var url = stream.match(/url=([^&,]+)/); | |
if (url) { | |
url = url[1]; | |
url = decodeURIComponent(url); | |
var sig = stream.match(/sig=([^&,]+)/); | |
if (!sig) { | |
// Scrambled signature | |
sig = stream.match(/s=([^&,]+)/); | |
if (sig) { | |
sig = sig[1]; | |
console.debug("Found " + sig.length + "-character scrambled signature for youtube video URL, attempting to descramble ..."); | |
if (js_url) { | |
sig = js_descramble(sig, js_url); | |
} | |
else { | |
console.error("Couldn't process youtube video URL, please check for updates to this script"); | |
} | |
} | |
} else { | |
sig = sig[1]; | |
} | |
var signature = ""; | |
if (sig) { | |
signature = "&signature=" + sig; | |
} | |
path = url + signature; | |
break; | |
} | |
} | |
} | |
return path; | |
} | |
// Probe function. | |
function probe (url) { | |
var access = url.match(/^(.*?):\/\//); | |
if (!access || (access[1] != "http" && access[1] != "https")) { | |
return false; | |
} | |
var youtube_site = url.slice(0,7).match(/youtube/); | |
if (!youtube_site) { | |
// FIXME we should be using a builtin list of known youtube websites | |
// like "fr.youtube.com", "uk.youtube.com" etc + | |
youtube_site = url.match(/\.youtube\.com/); | |
if (youtube_site === null) { | |
return false; | |
} | |
} | |
return (url.match(/\/watch\?/) // the html page | |
|| url.match(/\/get_video_info\?/) // info API | |
|| url.match(/\/v\//) // video in swf player | |
|| url.match(/\/embed\//) // embedded player iframe | |
|| url.match(/\/player2.swf/)); // another player url | |
} | |
function resolve_xml_special_chars (str) { | |
return str.replace(/&/g, '&').replace(/>/g, '>').replace(/</g, '<'); | |
} | |
// Parse function. | |
function parse (url) { | |
var data; | |
var req = new XMLHttpRequest(); | |
req.open('GET', url, false); | |
req.send(); | |
if (req.readyState == 4 && req.status == 200) { | |
data = req.responseText; | |
} else { | |
console.error("Couldn't process youtube video URL, please check for updates to this script"); | |
return; | |
} | |
var path, name, artist, description, arturl; | |
data = data.split(/\n/); | |
if (url.match(/\/watch\?/)) { | |
// This is the HTML page's URL | |
// fmt is the format of the video | |
// (cf. http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs) | |
var fmt = get_url_param(url, "fmt"); | |
while (true) { | |
// Try to find the video's title | |
line = data.shift(); | |
if (line === null || line === undefined) { | |
break; | |
} | |
if (line.match(/<meta name="title"/)) { | |
name = line.match(/content="(.*?)"/)[1]; | |
resolve_xml_special_chars(name); | |
resolve_xml_special_chars(name); | |
} | |
if (line.match(/<meta name="description"/)) { | |
// don't ask me why they double encode ... | |
description = line.match(/content="(.*?)"/)[1]; | |
resolve_xml_special_chars(description); | |
resolve_xml_special_chars(description); | |
} | |
if (line.match(/<meta property="og:image"/)) { | |
arturl = line.match(/content="(.*?)"/)[1]; | |
} | |
// This is not available in the video parameters (whereas it | |
// is given by the get_video_info API as the "author" field) | |
if (!artist) { | |
artist = line.match(/yt-uix-sessionlink yt-user-name[^>]*>([^<]*)<\//); | |
if (artist) { | |
artist = artist[1]; | |
resolve_xml_special_chars(artist); | |
} | |
} | |
// JSON parameters, also formerly known as "swfConfig", | |
// "SWF_ARGS", "swfArgs", "PLAYER_CONFIG", "playerConfig" ... | |
if (line.match(/ytplayer\.config/)) { | |
var js_url = line.match(/"js": "(.*?)"/); | |
if (js_url) { | |
js_url = js_url[1]; | |
js_url = js_url.replace("\\/", "/"); | |
js_url = js_url.replace(/^\/\//, preferences.access + "://"); | |
} | |
if (!fmt) { | |
fmt_list = line.match(/"fmt_list": "(.*?)"/); | |
if (fmt_list) { | |
fmt_list = fmt_list[1]; | |
fmt_list = fmt_list.replace(/\\\//g, "/"); | |
fmt = get_fmt(fmt_list); | |
} | |
} | |
url_map = line.match(/"url_encoded_fmt_stream_map": "(.*?)"/); | |
if (url_map) { | |
url_map = url_map[1]; | |
// FIXME: do this properly | |
url_map = url_map.replace(/\\u0026/g, "&"); | |
path = pick_url(url_map, fmt, js_url); | |
} | |
if (!path) { | |
// If this is a live stream, the URL map will be empty | |
// && we get the URL from this field instead | |
var hlsvp = line.match(/"hlsvp": "(.*?)"/); | |
if (hlsvp) { | |
hlsvp = hlsvp[1]; | |
hlsvp = hlsvp.replace("\\/", "/"); | |
path = hlsvp; | |
} | |
} | |
// There is also another version of the parameters, encoded | |
// differently, as an HTML attribute of an <object> or <embed> | |
// tag; but we don't need it now | |
} | |
} | |
if (!path) { | |
var video_id = get_url_param(url, "v"); | |
if (video_id) { | |
if (fmt) { | |
format = "&fmt=" + fmt; | |
} | |
else { | |
format = ""; | |
} | |
// Without "el=detailpage", /get_video_info fails for many | |
// music videos with errors about copyrighted content being | |
// "restricted from playback on certain sites" | |
path = "http://www.youtube.com/get_video_info?video_id=" + video_id + format + "&el=detailpage"; | |
console.error("Couldn't extract video URL, falling back to alternate youtube API"); | |
} | |
} | |
if (!path) { | |
console.error("Couldn't extract youtube video URL, please check for updates to this script"); | |
return {}; | |
} | |
if (!arturl) { | |
arturl = get_arturl(url); | |
} | |
return { "path": path, "name": name, "description": description, "artist": artist, "arturl": arturl }; | |
} | |
else if (url.match(/\/get_video_info\?/)) { // video info API | |
var line = data.shift(); // data is on one line only | |
var fmt = get_url_param(url, "fmt"); | |
if (!fmt) { | |
var fmt_list = line.match(/&fmt_list=([^&]*)/); | |
if (fmt_list) { | |
fmt_list = fmt_list[1]; | |
fmt_list = decodeURIComponent(fmt_list); | |
fmt = get_fmt(fmt_list); | |
} | |
} | |
var url_map = line.match(/&url_encoded_fmt_stream_map=([^&]*)/); | |
if (url_map) { | |
url_map = url_map[1]; | |
url_map = decodeURIComponent(url_map); | |
path = pick_url(url_map, fmt); | |
} | |
if (!path) { | |
// If this is a live stream, the URL map will be empty | |
// && we get the URL from this field instead | |
var hlsvp = line.match(/&hlsvp=([^&]*)/); | |
if (hlsvp) { | |
hlsvp = hlsvp[1]; | |
hlsvp = decodeURIComponent(hlsvp); | |
path = hlsvp; | |
} | |
} | |
if (!path) { | |
console.error("Couldn't extract youtube video URL, please check for updates to this script"); | |
return {}; | |
} | |
title = line.match(/&title=([^&]*)/); | |
if (title) { | |
title = title[1]; | |
title = title.replace(/\+/g, " "); | |
title = decodeURIComponent(title); | |
} | |
artist = line.match(/&author=([^&]*)/); | |
if (artist) { | |
artist = artist[1]; | |
artist = artist.replace(/\+/g, " "); | |
artist = decodeURIComponent(artist); | |
} | |
arturl = line.match(/&thumbnail_url=([^&]*)/); | |
if (arturl) { | |
arturl = arturl[1]; | |
arturl = decodeURIComponent(arturl); | |
} | |
return { path: path, title: title, artist: artist, arturl: arturl }; | |
} | |
else { // This is the flash player's URL | |
var video_id = get_url_param(url, "video_id"); | |
if (!video_id) { | |
video_id = url.match(/\/v\/([^?]*)/); | |
if (video_id) { | |
video_id = video_id[1]; | |
} | |
} | |
if (!video_id) { | |
video_id = url.match(/\/embed\/([^?]*)/); | |
if (video_id) { | |
video_id = video_id[1]; | |
} | |
} | |
if (!video_id) { | |
console.error("Couldn't extract youtube video URL"); | |
return {}; | |
} | |
var fmt = get_url_param(url, "fmt"); | |
if (fmt) { | |
format = "&fmt=" + fmt; | |
} | |
else { | |
format = ""; | |
} | |
return { "path": "http://www.youtube.com/watch?v=" + video_id + format }; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment