Skip to content

Instantly share code, notes, and snippets.

@ryan-blunden
Last active December 20, 2015 16:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryan-blunden/6161903 to your computer and use it in GitHub Desktop.
Save ryan-blunden/6161903 to your computer and use it in GitHub Desktop.
Experimenting with jsdom and screen scraping. Scrape a user's tweets via a simple Node.js Express app.
#!/usr/bin/env node
var jsdom = require('jsdom');
var express = require('express');
var app = express();
app.get('/user/:username/timeline/', function (req, res) {
console.log('Get timeline for: ' + req.params.username);
jsdom.env(
'http://www.twitter.com/' + req.params.username,
['http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js'],
function (errors, window) {
var response = {
user: {},
tweets: []
};
console.log(errors);
// Do we have tweets?
if (window.$('.tweet-text').length > 0) {
response.user = {
name: window.$('.fullname .profile-field').text(),
username: window.$('.profile-card-inner .username .screen-name').text(),
avatar: window.$('.profile-picture .avatar')[0].src,
bio: window.$('.bio-container .profile-field').text(),
location: window.$('.location-and-url .location').text()
};
var tweetNodes = window.$('.tweet-text');
for (var i = 0; i < tweetNodes.length; i++) {
response.tweets.push(window.$(tweetNodes[i]).text());
}
}
console.log(response.tweets.length + ' tweets returned\n');
res.json(response);
}
);
});
app.listen(3000);
console.log('Starting started...\n');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment