Skip to content

Instantly share code, notes, and snippets.

@noomz
Created April 11, 2012 14:24
Show Gist options
  • Save noomz/2359624 to your computer and use it in GitHub Desktop.
Save noomz/2359624 to your computer and use it in GitHub Desktop.
var request = require('request'),
twitter = require('twitter'),
mysql = require("mysql");
var client = mysql.createClient({
database: 'earthquake',
user: 'root'
});
twit = new twitter({
});
function getQueryVariable(query, variable) {
var vars = query.substr(1).split("&");
for (var i=0;i<vars.length;i++) {
var pair = vars[i].split("=");
if (pair[0] == variable) {
return pair[1];
}
}
}
function pad (val, len) {
val = String(val);
len = len || 2;
while (val.length < len) val = "0" + val;
return val;
};
var page = 0,
max_id = "",
last_id = "",
params = {"include_entities": true, rpp: 100};
var fetch = function () {
if (page !== 0) {
params.page = page;
}
if (max_id !== "") {
params.max_id = max_id;
}
var tweet = {},
geo = null,
latitude = 0,
longitude = 0,
original_data = '',
search_query = "earthquake OR tsunami OR น้ำลด OR สึนามิ OR แผ่นดินไหว since:2011-04-10";
try {
console.log(params);
twit.search(search_query, params, function (data) {
if (page === 0) {
console.log("Start fetching...\n");
}
else {
console.log("Start fetching page: " + page + "...\n");
}
if (page > 15) {
return;
}
if (data.results && data.results.length === 0) {
console.log("No results.\n");
params.max_id = max_id = last_id;
params.page = page = 1;
setTimeout(function () { fetch(); }, 10000);
return;
}
else {
if (data.results == undefined) {
console.log(data);
}
else {
console.log("Found " + data.results.length + " results.");
for (i in data.results) {
if (data.results.hasOwnProperty(i)) {
tweet = data.results[i];
var date = new Date(tweet.created_at);
date = "" + (date.getYear() + 1900) + "-" + pad(date.getMonth() + 1, 2) + "-" + pad(date.getDate(), 2) + " " + pad(date.getHours(), 2) + ":" + pad(date.getMinutes(), 2) + ":" + pad(date.getSeconds(), 2);
if (tweet.geo !== null) {
latitude = parseFloat(tweet.geo['coordinates'][0]);
longitude = parseFloat(tweet.geo['coordinates'][1]);
geo = 1;
}
else {
geo = null;
latitude = longitude = 0;
}
client.query("INSERT INTO tweet2 (created_at,from_user,from_user_name,geo,latitude,longitude,id,iso_language_code,text,original_data) VALUES (?,?,?,?,?,?,?,?,?,?) ON DUPLICATE KEY UPDATE id=id", [date, tweet.from_user, tweet.from_user_name, geo, latitude, longitude, tweet.id, tweet.iso_language_code, tweet.text, JSON.stringify(tweet)]);
console.log("-- Inserted [" + tweet.from_user_name + "] " + tweet.text);
last_id = tweet.id_str;
}
}
}
}
if (data.next_page) {
params.page = page = getQueryVariable(data.next_page, 'page');
params.max_id = max_id = getQueryVariable(data.next_page, 'max_id');
params.page = page = 1;
params.max_id = max_id = last_id;
console.log("Waiting ... Last id = " + max_id);
setTimeout(function () { fetch(); }, 10000);
return;
}
else {
client.end();
}
});
}
catch (e) {
params.max_id = max_id = last_id;
params.page = page = 1;
setTimeout(function () { fetch(); }, 10000);
return;
}
};
fetch();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment