Skip to content

Instantly share code, notes, and snippets.

@wjmazza
Last active August 29, 2015 14:18
Show Gist options
  • Save wjmazza/20ee3160a151882b82e5 to your computer and use it in GitHub Desktop.
Save wjmazza/20ee3160a151882b82e5 to your computer and use it in GitHub Desktop.
[JavaScript] YouTube URL Parser
/**
* Validates and parses a YouTube URL
*
* Credits:
* https://gist.github.com/jlong/2428561
* http://stackoverflow.com/a/22763925/3819103
*
* TODO:
* Add "attribution_link" in type checking
* Add "playlist" in type checking
*
* @param {String} url YouTube Video or Channel URL (Playlists NYI)
* @return {Object} Object containing passed URL and parsed URL if valid
*/
function parseYouTubeURL(url) {
"use strict";
// In order to use DOM parsing of a URL string,
// URLs need to be absolute otherwise they will be relative to the CURRENT location
if ( !/^http(s)?/i.test(url) ) {
url = "http://" + url;
}
// Use native DOM location parser by creating an <a> element
// Garbage collector will destory element at end of function if we don't attach to body
var parser = document.createElement("a");
parser.href = url;
// Setting up the return object
var parsedUrl = {
url: url,
valid: false,
parsed: { // Example: http://example.com:3000/pathname/?search=test#hash
host: parser.host, // "example.com:3000",
protocol: parser.protocol, // "http:"
hostname: parser.hostname, // "example.com"
port: parser.port, // "3000"
pathname: parser.pathname, // "/pathname/"
search: parser.search, // "?search=test"
hash: parser.hash, // "#hash"
params: {}
}
};
// Begin by verifing valid YouTube domain
var regexYouTubeUrl = /^((?:(?:m|www)\.)?youtu(?:be.com|.be|be.googleapis.com))/i;
if ( regexYouTubeUrl.test(parser.hostname) ) {
// Check the type based on first part of file path
var regexYouTubeType = /^\/(channel|user|playlist|watch|v|video|embed)/i;
var typeCheck = regexYouTubeType.exec(parser.pathname);
if ( typeCheck ) {
// Right now, we only care for videos and channels
if ( ["watch","v","video","embed"].indexOf(typeCheck[1]) > -1 ) {
parsedUrl.type = "video";
}
else if ( ["channel","user"].indexOf(typeCheck[1]) > -1 ) {
parsedUrl.type = "channel";
}
// If we got a valid type, get the ID
if ( parsedUrl.type === "channel" ) {
var regexYouTubeChannelId = /^\/[^\/]*\/([^\/]*)/i;
var channelCheck = regexYouTubeChannelId.exec(parser.pathname);
parsedUrl.id = channelCheck[1];
}
else if ( parsedUrl.type === "video" ) {
var urlParamsStr = parser.search.substring(1);
var urlParamsPairs = urlParamsStr.split("&");
urlParamsPairs.forEach(function(pair){
var pairKeyValue = pair.split("=");
parsedUrl.parsed.params[pairKeyValue[0]] = pairKeyValue[1];
});
parsedUrl.id = parsedUrl.parsed.params.v;
}
// If we got the ID, then we can mark this as valid
if ( parsedUrl.id ) {
parsedUrl.valid = true;
// Create a normalized YouTube URL
parsedUrl.normalized = "http://youtube.com/" + parsedUrl.type + "/";
if ( parsedUrl.type === "video" ) {
parsedUrl.normalized += "?v=";
}
parsedUrl.normalized += parsedUrl.id;
}
}
}
return parsedUrl;
}
@wjmazza
Copy link
Author

wjmazza commented Apr 6, 2015

Example/Performance Test:

  • using lodash 3.6.0
  • for performance testing, URLs array consisted of 100,451 valid YouTube "watch" and "channel" URLs in random order
  • 50% of the "watch" URLs are duplicates (meaning each URL in the video list appeared no more or less than twice)
  • Of the 200 channel URLs, only 4 were unique
console.time("parser");
var urls = [/* list of YouTube URLs */];
var groups = {};
var parsedUrls = urls.map(function(v){
    return parseYouTubeURL(v);
});
var validUrls = _.filter(parsedUrls,{valid:true});
var groupedUrls = _.groupBy(validUrls,"type");
_.forEach(groupedUrls,function(group,groupName){
    groups[groupName] = _.uniq(_.map(group,function(parsedUrl){
        return parsedUrl.id;
    }));
});
console.timeEnd("parser");
console.dir(groups);

Console Output Results:

2015-04-06 14:43:40.825 parser: 1949.579ms
2015-04-06 14:43:40.825 Object
                            channel: Array[4]
                            video: Array[50125]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment