Last active
September 5, 2017 15:24
-
-
Save JGarrido/7567954 to your computer and use it in GitHub Desktop.
Nodetube: A basic, updated version of the tutorial at http://net.tutsplus.com/tutorials/javascript-ajax/how-to-scrape-web-pages-with-node-js-and-jquery/. This gets you about halfway through Step 3 of the tutorial, and is updated to reflect the change in markup structure of youtube.com (this is parsing the 'non-authenticated' markup, which is dif…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Module dependencies. | |
*/ | |
var express = require('express') | |
, jsdom = require('jsdom') | |
, request = require('request') | |
, url = require('url') | |
, http = require('http') | |
, path = require('path') | |
; | |
var app = express(); | |
app.configure(function(){ | |
app.set('port', process.env.PORT || 3000); | |
app.set('views', __dirname + '/views'); | |
app.set('view engine', 'jade'); | |
app.use(express.favicon()); | |
app.use(express.logger('dev')); | |
app.use(express.urlencoded()); | |
app.use(express.methodOverride()); | |
app.use(app.router); | |
app.use(express.static(path.join(__dirname, 'public'))); | |
}); | |
app.configure('development', function(){ | |
app.use(express.errorHandler()); | |
}); | |
app.get('/nodetube', function(req, res){ | |
//Tell the request that we want to fetch youtube.com, send the results to a callback function | |
request({uri: 'http://youtube.com'}, function(err, response, body){ | |
var self = this; | |
self.items = new Array();//I feel like I want to save my results in an array | |
//Just a basic error check | |
if(err && response.statusCode !== 200){console.log('Request error.');} | |
//Send the body param as the HTML code we will parse in jsdom | |
//also tell jsdom to attach jQuery in the scripts and loaded from jQuery.com | |
jsdom.env({ | |
html: body, | |
scripts: ['http://code.jquery.com/jquery-1.10.2.min.js'], | |
done: function(err, window){ | |
//Use jQuery just as in a regular HTML page | |
var $ = window.jQuery; | |
var $body = $('body'); | |
var $videos = $body.find('.context-data-item'); | |
$videos.each( function(i, item) { | |
self.items[i] = { | |
time: $(item).attr('data-context-item-time'), | |
type: $(item).attr('data-context-item-type'), | |
id: $(item).attr('data-context-item-id'), | |
views: $(item).attr('data-context-item-views'), | |
title: $(item).attr('data-context-item-title'), | |
user: $(item).attr('data-context-item-user') | |
}; | |
}); | |
console.log( self.items ); | |
res.end( "Done." ); | |
} | |
}); | |
}); | |
}); | |
http.createServer(app).listen(app.get('port'), function(){ | |
console.log("Express server listening on port " + app.get('port')); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment