Skip to content

Instantly share code, notes, and snippets.

@michaelbrooks
Last active August 29, 2015 14:00
Show Gist options
  • Save michaelbrooks/e45f348f3bd6fd426ebd to your computer and use it in GitHub Desktop.
Save michaelbrooks/e45f348f3bd6fd426ebd to your computer and use it in GitHub Desktop.
JavaScript that scrapes tweet data from a page on twitter.com
(function () {
//Extract latitude and longitude if possible.
var geo_reg = /(-?\d+.?\d*),(-?\d+.?\d*)/;
var get_geo = function(tweet) {
var geo = tweet.find('.tweet-geo-text a').attr('href');
if (geo) {
geo = decodeURIComponent(geo);
geo = geo_reg.exec(geo);
if (geo) {
return {
'lat': geo[1],
'lon': geo[2]
};
}
}
return {
'lat': undefined,
'lon': undefined
};
};
//Convert an array of strings into a line of CSV.
//Escapes double-quotes and error-prone whitespace.
var csvfy = function (row) {
return row.map(function (cell) {
if (cell) {
cell = cell.toString()
.replace(/"/g, '""')
.replace(/\r\n/g, " ")
.replace(/\n/g, " ")
.replace(/\t/g, " ");
}
return cell ? '"' + cell.toString().replace('"', '""') + '"' : '""';
}).join(',');
};
var tweets = $('.tweet').map(function () {
var tweet = $(this);
var geo = get_geo(tweet);
return {
id: tweet.data('tweet-id'),
user_id: tweet.find('[data-user-id]').data('user-id'),
username: tweet.find('.username').text(),
text: tweet.find('.tweet-text').text(),
full_name: tweet.find('.fullname').text(),
created_at: tweet.find('[data-time]').data('time'),
latitude: geo.lat,
longitude: geo.lon
};
});
//Build the CSV file
var lines = [];
lines.push(csvfy(["id", "user_id", "username", "full_name", "created_at", "latitude", "longitude", "text"]));
tweets.each(function () {
lines.push(csvfy([this.id, this.user_id, this.username, this.full_name, this.created_at, this.latitude, this.longitude, this.text]));
});
//Add BOM at the start to make Excel read UTF-8
var csv = "\ufeff" + lines.join("\n");
//Download it
var uri = "data:text/csv;charset=utf-8," + encodeURIComponent(csv);
window.location.href = uri;
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment