Skip to content

Instantly share code, notes, and snippets.

@bellbind
Created February 29, 2012 07:49
Show Gist options
  • Save bellbind/1938958 to your computer and use it in GitHub Desktop.
Save bellbind/1938958 to your computer and use it in GitHub Desktop.
[nodejs][jsdom][xml][gdata]search and parse data from youtube by node.js
// parsing youtube search gdata with jsdom
// [usage]
// npm install jsdom
// node youtubesearch.js > result.xml
// node parsegdata.js
var fs = require("fs");
var jsdom = require("jsdom");
// magic for applying querySelector/querySelectorAll
jsdom.jsdom("", null, {features: {QuerySelector: true}});
var xml = fs.readFileSync("result.xml", "utf-8"); // as string, not buffer
//[reference] xmlns list for youtube feed
var ns = {
atom: "http://www.w3.org/2005/Atom",
media: "http://search.yahoo.com/mrss/",
openSearch: "http://a9.com/-/spec/opensearchrss/1.0/",
gd: "http://schemas.google.com/g/2005",
gml: "http://www.opengis.net/gml",
yt: "http://gdata.youtube.com/schemas/2007",
georss: "http://www.georss.org/georss",
};
var browser = jsdom.browserAugmentation(jsdom.dom.level3.core, {});
var doc = new browser.Document();
doc.innerHTML = xml;
//console.log(doc.innerHTML);
// parse feed
// see: http://code.google.com/apis/youtube/developers_guide_protocol.html
// gdata title
console.log(doc.querySelector("feed>title").textContent);
// get as result entry list
var entries = doc.querySelectorAll("feed>entry");
// pickup 1 entry
var entry = entries[0];
//console.log(entry);
// entry id
console.log(entry.querySelector("id").textContent);
// entry title
console.log(entry.querySelector("title").textContent);
// description
console.log(entry.querySelector("content").textContent);
// date
console.log(new Date(entry.querySelector("published").textContent));
// author name
console.log(entry.querySelector("author>name").textContent);
// html page url for the video
console.log(
entry.querySelector("link[rel='alternate'][type='text/html']").href);
// gdata url (== id): a root of content xml is just this atom entry
console.log(entry.querySelector("link[rel='self']").href);
// misc info for the entry
//var group = entry.getElementsByTagNameNS(ns.media, "group")[0]; // jsdom bug?
var group = entry.getElementsByTagName("media:group")[0];
//console.log(group);
// swf url
var swf = group.querySelector(
"[type='application/x-shockwave-flash'][medium='video']");
console.log(swf.getAttribute("url"));
// thumbnails
var thumbs = group.getElementsByTagName("media:thumbnail");
for (var i = 0; i < thumbs.length; i++) {
var thumb = thumbs[i];
var tHeight = thumb.getAttribute("height");
var tWidth = thumb.getAttribute("width");
var tTime = thumb.getAttribute("time");
var tUrl = thumb.getAttribute("url");
console.log(tUrl + " " + tWidth + "x" + tHeight + " at " + tTime);
}
// accessing youtube search gdata
// this code just prints downloaded search results
// for parsing result, see parsegdata.js
var http = require("http");
var url = require("url");
var keywords = "the beatles";
// see: http://code.google.com/apis/youtube/developers_guide_protocol.html
var searchUri = url.format({
protocol: "http", hostname: "gdata.youtube.com",
pathname: "/feeds/api/videos", query: {
vq: keywords,
orderby: "published",
}});
var pSearchUri = url.parse(searchUri);
var req = http.request({
method: "GET",
host: pSearchUri.host, path: pSearchUri.path,
}, function (res) {
var body = "";
res.on("data", function (chunk) {
body += chunk;
});
res.on("end", function () {
console.log(body);
});
});
req.end();
@bellbind
Copy link
Author

bellbind commented Mar 1, 2012

It is an example for using jsdom as a XML DOM processor.
The processing style is similer as browser's DOM API.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment