Skip to content

Instantly share code, notes, and snippets.

@cattode
Created November 16, 2014 20:38
Show Gist options
  • Save cattode/462180b605163a7db1e6 to your computer and use it in GitHub Desktop.
Save cattode/462180b605163a7db1e6 to your computer and use it in GitHub Desktop.
Grab a Youtube video file information from its Youtube URL in pure JS — Adapated from VLC's youtube.lua
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Adapted from http://git.videolan.org/?p=vlc.git;f=share/lua/playlist/youtube.lua;hb=77f58c382703f93e899683844d1917cd7cb69d22
/*
* Changelog:
* 11/16/2014: Converted the Lua code to vanilla JS
*/
var preferences = {
"access": "https",
"resolution": 720
};
console.log(parse('https://www.youtube.com/watch?v=TIW1m3jbEsg'));
function get_url_param (url, name) {
var res = url.match(new RegExp("&?" + name + "=([^&]*)"));
return res ? res[1] : null;
}
function get_arturl (url) {
var iurl = get_url_param(url, "iurl");
if (iurl) {
return iurl;
}
var video_id = get_url_param(url, "v");
if (!video_id) {
return null;
}
return "http://img.youtube.com/vi/" + video_id + "/default.jpg";
}
function get_prefres () {
var prefres = isNaN(preferences.resolution) ? -1 : preferences.resolution;
return prefres;
}
// Pick the most suited format available
function get_fmt (fmt_list) {
var prefres = get_prefres();
if (prefres < 0) {
return null;
}
var fmt = null;
var fmt_list_regexp = /(\d+)\/\d+x(\d+)\/[^,]+/g;
var result;
while ((result = fmt_list_regexp.exec(fmt_list)) !== null) {
fmt = result[1];
height = result[2];
// Apparently formats are listed in quality
// order, so we take the first one that works,
// or fallback to the lowest quality
if (parseInt(height, 10) <= prefres) {
break;
}
}
return fmt;
}
// Descramble the URL signature using the javascript code that does that
// in the web page
function js_descramble (sig, js_url) {
// Fetch javascript code
var js;
var req = new XMLHttpRequest();
req.open('GET', js_url, false);
if (req.readyState == 4 && req.status != 200) {
js = req.responseText;
} else {
return sig;
}
var lines = [];
// Look for the descrambler function's name
var descrambler = null;
js = js.split('\n');
while (!descrambler) {
var line = js.shift();
if (line === undefined) {
console.error("Couldn't process youtube video URL, please check for updates to this script");
return sig;
}
// Buffer lines for later, so we don't have to make a second
// HTTP request later
lines.push(line);
// c&&(b.signature=ij(c));
descrambler = line.match(/\.signature=(.*?)\(/);
if (descrambler) {
descrambler = descrambler[1];
}
}
var descrambler_regexp = new RegExp("var ..=\\{(.*?)\\};function " + descrambler + "\\([^)]*\\)\\{(.*?)\\}");
//*
// Fetch the code of the descrambler function. The function is
// conveniently preceded by the definition of a helper object
// that it uses. Example:
// var Fo={TR:function(a){a.reverse()},TU:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c},sH:function(a,b){a.splice(0,b)}};function Go(a){a=a.split("");Fo.sH(a,2);Fo.TU(a,28);Fo.TU(a,44);Fo.TU(a,26);Fo.TU(a,40);Fo.TU(a,64);Fo.TR(a,26);Fo.sH(a,1);return a.join("")};
var transformations = null;
var rules = null;
while (transformations === null && rules === null) {
var line;
if (lines.length > 0) {
line = lines.pop();
} else {
line = js.shift();
if (line === undefined) {
console.error("Couldn't process youtube video URL, please check for updates to this script");
return sig;
}
}
transformations = line.match(descrambler_regexp)[1];
rules = line.match(descrambler_regexp)[2];
}
// Parse the helper object to map available transformations
var trans = {};
var transformations_regexp = /(..):function\([^)]*\)\{([^}]*)\}/g;
var result;
while ((result = transformations_regexp.exec(transformations)) !== null) {
meth = result[1];
code = result[2];
// a=a.reverse()
if (code.match(/\.reverse\(/)) {
trans[meth] = "reverse";
}
// a.splice(0,b)
else if (code.macth(/\.splice\(/)) {
trans[meth] = "splice";
}
// var c=a[0];a[0]=a[b%a.length];a[b]=c
else if (code.match(/var c=/)) {
trans[meth] = "swap";
}
else {
console.warn("Couldn't parse unknown youtube video URL signature transformation");
}
}
// Parse descrambling rules, map them to known transformations
// && apply them on the signature
var missing = false;
sig = sig.split('');
var rules_regexp = /..\.(..)\([^,]+,(\d+)\)/g;
while ((result = rules_regexp.exec(rules)) !== null) {
meth = result[1];
idx = parseInt(result[2], 10);
if (trans[meth] == "reverse") {
sig.reverse();
}
else if (trans[meth] == "splice") {
sig.splice(0, idx);
}
else if (trans[meth] == "swap") {
var tmp = sig[0];
sig[0] = sig[idx%sig.length];
sig[idx] = tmp;
}
else {
console.debug("Couldn't apply unknown youtube video URL signature transformation");
missing = true;
}
}
/*/
try {
eval(line.match(descrambler_regexp)[0]);
eval('sig = ' + descrambler+'("' + sig + '");');
} catch (e) {
missing = true;
}
//*/
if (missing) {
console.error("Couldn't process youtube video URL, please check for updates to this script");
}
return sig;
}
// Parse && pick our video URL
function pick_url (url_map, fmt, js_url) {
var path = null;
var url_map_regexp = /[^,]+/g;
var result;
while ((result = url_map_regexp.exec(url_map)) !== null) {
var stream = result[0];
// Apparently formats are listed in quality order,
// so we can afford to simply take the first one
var itag = stream.match(/itag=(\d+)/);
if (!fmt || !itag || parseInt(itag[1], 10) == parseInt(fmt, 10)) {
var url = stream.match(/url=([^&,]+)/);
if (url) {
url = url[1];
url = decodeURIComponent(url);
var sig = stream.match(/sig=([^&,]+)/);
if (!sig) {
// Scrambled signature
sig = stream.match(/s=([^&,]+)/);
if (sig) {
sig = sig[1];
console.debug("Found " + sig.length + "-character scrambled signature for youtube video URL, attempting to descramble ...");
if (js_url) {
sig = js_descramble(sig, js_url);
}
else {
console.error("Couldn't process youtube video URL, please check for updates to this script");
}
}
} else {
sig = sig[1];
}
var signature = "";
if (sig) {
signature = "&signature=" + sig;
}
path = url + signature;
break;
}
}
}
return path;
}
// Probe function.
function probe (url) {
var access = url.match(/^(.*?):\/\//);
if (!access || (access[1] != "http" && access[1] != "https")) {
return false;
}
var youtube_site = url.slice(0,7).match(/youtube/);
if (!youtube_site) {
// FIXME we should be using a builtin list of known youtube websites
// like "fr.youtube.com", "uk.youtube.com" etc +
youtube_site = url.match(/\.youtube\.com/);
if (youtube_site === null) {
return false;
}
}
return (url.match(/\/watch\?/) // the html page
|| url.match(/\/get_video_info\?/) // info API
|| url.match(/\/v\//) // video in swf player
|| url.match(/\/embed\//) // embedded player iframe
|| url.match(/\/player2.swf/)); // another player url
}
function resolve_xml_special_chars (str) {
return str.replace(/&amp;/g, '&').replace(/&gt;/g, '>').replace(/&lt;/g, '<');
}
// Parse function.
function parse (url) {
var data;
var req = new XMLHttpRequest();
req.open('GET', url, false);
req.send();
if (req.readyState == 4 && req.status == 200) {
data = req.responseText;
} else {
console.error("Couldn't process youtube video URL, please check for updates to this script");
return;
}
var path, name, artist, description, arturl;
data = data.split(/\n/);
if (url.match(/\/watch\?/)) {
// This is the HTML page's URL
// fmt is the format of the video
// (cf. http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs)
var fmt = get_url_param(url, "fmt");
while (true) {
// Try to find the video's title
line = data.shift();
if (line === null || line === undefined) {
break;
}
if (line.match(/<meta name="title"/)) {
name = line.match(/content="(.*?)"/)[1];
resolve_xml_special_chars(name);
resolve_xml_special_chars(name);
}
if (line.match(/<meta name="description"/)) {
// don't ask me why they double encode ...
description = line.match(/content="(.*?)"/)[1];
resolve_xml_special_chars(description);
resolve_xml_special_chars(description);
}
if (line.match(/<meta property="og:image"/)) {
arturl = line.match(/content="(.*?)"/)[1];
}
// This is not available in the video parameters (whereas it
// is given by the get_video_info API as the "author" field)
if (!artist) {
artist = line.match(/yt-uix-sessionlink yt-user-name[^>]*>([^<]*)<\//);
if (artist) {
artist = artist[1];
resolve_xml_special_chars(artist);
}
}
// JSON parameters, also formerly known as "swfConfig",
// "SWF_ARGS", "swfArgs", "PLAYER_CONFIG", "playerConfig" ...
if (line.match(/ytplayer\.config/)) {
var js_url = line.match(/"js": "(.*?)"/);
if (js_url) {
js_url = js_url[1];
js_url = js_url.replace("\\/", "/");
js_url = js_url.replace(/^\/\//, preferences.access + "://");
}
if (!fmt) {
fmt_list = line.match(/"fmt_list": "(.*?)"/);
if (fmt_list) {
fmt_list = fmt_list[1];
fmt_list = fmt_list.replace(/\\\//g, "/");
fmt = get_fmt(fmt_list);
}
}
url_map = line.match(/"url_encoded_fmt_stream_map": "(.*?)"/);
if (url_map) {
url_map = url_map[1];
// FIXME: do this properly
url_map = url_map.replace(/\\u0026/g, "&");
path = pick_url(url_map, fmt, js_url);
}
if (!path) {
// If this is a live stream, the URL map will be empty
// && we get the URL from this field instead
var hlsvp = line.match(/"hlsvp": "(.*?)"/);
if (hlsvp) {
hlsvp = hlsvp[1];
hlsvp = hlsvp.replace("\\/", "/");
path = hlsvp;
}
}
// There is also another version of the parameters, encoded
// differently, as an HTML attribute of an <object> or <embed>
// tag; but we don't need it now
}
}
if (!path) {
var video_id = get_url_param(url, "v");
if (video_id) {
if (fmt) {
format = "&fmt=" + fmt;
}
else {
format = "";
}
// Without "el=detailpage", /get_video_info fails for many
// music videos with errors about copyrighted content being
// "restricted from playback on certain sites"
path = "http://www.youtube.com/get_video_info?video_id=" + video_id + format + "&el=detailpage";
console.error("Couldn't extract video URL, falling back to alternate youtube API");
}
}
if (!path) {
console.error("Couldn't extract youtube video URL, please check for updates to this script");
return {};
}
if (!arturl) {
arturl = get_arturl(url);
}
return { "path": path, "name": name, "description": description, "artist": artist, "arturl": arturl };
}
else if (url.match(/\/get_video_info\?/)) { // video info API
var line = data.shift(); // data is on one line only
var fmt = get_url_param(url, "fmt");
if (!fmt) {
var fmt_list = line.match(/&fmt_list=([^&]*)/);
if (fmt_list) {
fmt_list = fmt_list[1];
fmt_list = decodeURIComponent(fmt_list);
fmt = get_fmt(fmt_list);
}
}
var url_map = line.match(/&url_encoded_fmt_stream_map=([^&]*)/);
if (url_map) {
url_map = url_map[1];
url_map = decodeURIComponent(url_map);
path = pick_url(url_map, fmt);
}
if (!path) {
// If this is a live stream, the URL map will be empty
// && we get the URL from this field instead
var hlsvp = line.match(/&hlsvp=([^&]*)/);
if (hlsvp) {
hlsvp = hlsvp[1];
hlsvp = decodeURIComponent(hlsvp);
path = hlsvp;
}
}
if (!path) {
console.error("Couldn't extract youtube video URL, please check for updates to this script");
return {};
}
title = line.match(/&title=([^&]*)/);
if (title) {
title = title[1];
title = title.replace(/\+/g, " ");
title = decodeURIComponent(title);
}
artist = line.match(/&author=([^&]*)/);
if (artist) {
artist = artist[1];
artist = artist.replace(/\+/g, " ");
artist = decodeURIComponent(artist);
}
arturl = line.match(/&thumbnail_url=([^&]*)/);
if (arturl) {
arturl = arturl[1];
arturl = decodeURIComponent(arturl);
}
return { path: path, title: title, artist: artist, arturl: arturl };
}
else { // This is the flash player's URL
var video_id = get_url_param(url, "video_id");
if (!video_id) {
video_id = url.match(/\/v\/([^?]*)/);
if (video_id) {
video_id = video_id[1];
}
}
if (!video_id) {
video_id = url.match(/\/embed\/([^?]*)/);
if (video_id) {
video_id = video_id[1];
}
}
if (!video_id) {
console.error("Couldn't extract youtube video URL");
return {};
}
var fmt = get_url_param(url, "fmt");
if (fmt) {
format = "&fmt=" + fmt;
}
else {
format = "";
}
return { "path": "http://www.youtube.com/watch?v=" + video_id + format };
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment