Created
February 29, 2012 07:49
-
-
Save bellbind/1938958 to your computer and use it in GitHub Desktop.
[nodejs][jsdom][xml][gdata]search and parse data from youtube by node.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// parsing youtube search gdata with jsdom | |
// [usage] | |
// npm install jsdom | |
// node youtubesearch.js > result.xml | |
// node parsegdata.js | |
var fs = require("fs"); | |
var jsdom = require("jsdom"); | |
// magic for applying querySelector/querySelectorAll | |
jsdom.jsdom("", null, {features: {QuerySelector: true}}); | |
var xml = fs.readFileSync("result.xml", "utf-8"); // as string, not buffer | |
//[reference] xmlns list for youtube feed | |
var ns = { | |
atom: "http://www.w3.org/2005/Atom", | |
media: "http://search.yahoo.com/mrss/", | |
openSearch: "http://a9.com/-/spec/opensearchrss/1.0/", | |
gd: "http://schemas.google.com/g/2005", | |
gml: "http://www.opengis.net/gml", | |
yt: "http://gdata.youtube.com/schemas/2007", | |
georss: "http://www.georss.org/georss", | |
}; | |
var browser = jsdom.browserAugmentation(jsdom.dom.level3.core, {}); | |
var doc = new browser.Document(); | |
doc.innerHTML = xml; | |
//console.log(doc.innerHTML); | |
// parse feed | |
// see: http://code.google.com/apis/youtube/developers_guide_protocol.html | |
// gdata title | |
console.log(doc.querySelector("feed>title").textContent); | |
// get as result entry list | |
var entries = doc.querySelectorAll("feed>entry"); | |
// pickup 1 entry | |
var entry = entries[0]; | |
//console.log(entry); | |
// entry id | |
console.log(entry.querySelector("id").textContent); | |
// entry title | |
console.log(entry.querySelector("title").textContent); | |
// description | |
console.log(entry.querySelector("content").textContent); | |
// date | |
console.log(new Date(entry.querySelector("published").textContent)); | |
// author name | |
console.log(entry.querySelector("author>name").textContent); | |
// html page url for the video | |
console.log( | |
entry.querySelector("link[rel='alternate'][type='text/html']").href); | |
// gdata url (== id): a root of content xml is just this atom entry | |
console.log(entry.querySelector("link[rel='self']").href); | |
// misc info for the entry | |
//var group = entry.getElementsByTagNameNS(ns.media, "group")[0]; // jsdom bug? | |
var group = entry.getElementsByTagName("media:group")[0]; | |
//console.log(group); | |
// swf url | |
var swf = group.querySelector( | |
"[type='application/x-shockwave-flash'][medium='video']"); | |
console.log(swf.getAttribute("url")); | |
// thumbnails | |
var thumbs = group.getElementsByTagName("media:thumbnail"); | |
for (var i = 0; i < thumbs.length; i++) { | |
var thumb = thumbs[i]; | |
var tHeight = thumb.getAttribute("height"); | |
var tWidth = thumb.getAttribute("width"); | |
var tTime = thumb.getAttribute("time"); | |
var tUrl = thumb.getAttribute("url"); | |
console.log(tUrl + " " + tWidth + "x" + tHeight + " at " + tTime); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// accessing youtube search gdata | |
// this code just prints downloaded search results | |
// for parsing result, see parsegdata.js | |
var http = require("http"); | |
var url = require("url"); | |
var keywords = "the beatles"; | |
// see: http://code.google.com/apis/youtube/developers_guide_protocol.html | |
var searchUri = url.format({ | |
protocol: "http", hostname: "gdata.youtube.com", | |
pathname: "/feeds/api/videos", query: { | |
vq: keywords, | |
orderby: "published", | |
}}); | |
var pSearchUri = url.parse(searchUri); | |
var req = http.request({ | |
method: "GET", | |
host: pSearchUri.host, path: pSearchUri.path, | |
}, function (res) { | |
var body = ""; | |
res.on("data", function (chunk) { | |
body += chunk; | |
}); | |
res.on("end", function () { | |
console.log(body); | |
}); | |
}); | |
req.end(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It is an example for using jsdom as a XML DOM processor.
The processing style is similer as browser's DOM API.