Skip to content

Instantly share code, notes, and snippets.

@andrewxhill
Created September 6, 2014 18:28
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewxhill/f906c9453ca35d93c998 to your computer and use it in GitHub Desktop.
Save andrewxhill/f906c9453ca35d93c998 to your computer and use it in GitHub Desktop.
Node.js app to harvest a georss feed into CartoDB
var express = require('express');
var app = express();
var http = require('http'),
https = require('https'),
querystring = require('querystring'),
Step = require('step'),
fs = require('fs'),
// jQuery = require('jquery');
parseString = require('xml2js').parseString;
var config = {}
try {
var config = require('./config')
} catch(e) { }
String.prototype.format = (function (i, safe, arg) {
function format() {
var str = this,
len = arguments.length + 1;
for (i = 0; i < len; arg = arguments[i++]) {
safe = typeof arg === 'object' ? JSON.stringify(arg) : arg;
str = str.replace(RegExp('\\{' + (i - 1) + '\\}', 'g'), safe);
}
return str;
}
//format.native = String.prototype.format;
return format;
})();
function sql_escape (str) {
return str.replace(/[\0\x08\x09\x1a\n\r"'\\\%]/g, function (char) {
switch (char) {
case "\0":
return "\\0";
case "\x08":
return "\\b";
case "\x09":
return "\\t";
case "\x1a":
return "\\z";
case "\n":
return "\\n";
case "\r":
return "\\r";
case "'":
return "''";
case "\"":
case "\\":
case "%":
return "\\"+char; // prepends a backslash to backslash, percent,
// and double/single quotes
}
});
}
function callCartoDB(sql) {
var post_data = querystring.stringify({
'api_key' : config.cartodb.api_key,
'q': sql
});
var post_options = {
host: config.cartodb.username+'.cartodb.com',
port: '80',
path: '/api/v2/sql',
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': post_data.length
}
};
var post_req = http.request(post_options, function(res) {
res.setEncoding('utf8');
res.on('data', function (chunk) {
process.stdout.write(".");
});
});
// console.log(post_data)
post_req.write(post_data);
post_req.end();
}
function get_category_label(entry){
var label = null;
if (entry.category != undefined){
if (entry.category[0].$ != undefined){
if (entry.category[0].$.label != undefined){
label = entry.category[0].$.label;
}
}
}
return label
}
function get_category_term(entry){
var term = null;
if (entry.category != undefined){
if (entry.category[0].$ != undefined){
if (entry.category[0].$.term != undefined){
term = entry.category[0].$.term;
}
}
}
return term
}
function get_content(entry){
var content = null;
// console.log(entry.content[0]['_'])
if (entry.content != undefined){
if ("_" in entry.content[0]){
content = entry.content[0]['_'].replace(/\t/g, '').replace(/\n/g,'');
}
}
return content
}
function get_point(entry){
var point = null;
if ("georss:point" in entry){
point = entry["georss:point"][0].split(' ');
point = 'CDB_LatLng({0}, {1})'.format(point[0], point[1])
}
return point
}
function toSqlTime(val){
val = val.trim();
val = val == '' ? null : "'{0}'::timestamp".format(val);
return val
}
function toSqlString(val){
val = val.trim();
val = val == '' ? null : "'{0}'".format(val);
return val
}
var options = {
host: 'www.portlandonline.com',
path: '/scripts/911incidents.cfm'
};
var base_sql = "WITH n(id, title, updated, published, summary, label, term, the_geom, content) AS (VALUES {0}) INSERT INTO portland_911_feed (id, title, updated, published, summary, label, term, the_geom, content) SELECT n.id, n.title, n.updated, n.published, n.summary, n.label, n.term, n.the_geom, n.content FROM n WHERE n.id NOT IN (SELECT id FROM portland_911_feed)";
var total_inserts = 0;
var last_ids = [];
function run(){
var sql = [];
var req = http.get(options, function(res) {
// console.log('STATUS: ' + res.statusCode);
// console.log('HEADERS: ' + JSON.stringify(res.headers));
// Buffer the body entirely for processing as a whole.
var bodyChunks = [];
res.on('data', function(chunk) {
// You can process streamed parts here...
bodyChunks.push(chunk);
}).on('end', function() {
var xml = Buffer.concat(bodyChunks);
// var xml = "<root>Hello xml2js!</root>"
parseString(xml, function (err, result) {
for (i in result.feed.entry){
var id = toSqlString(result.feed.entry[i].id[0]);
if (last_ids.indexOf(id) == -1) {
var curr_sql =
'({0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8})'.format(
id,
toSqlString(result.feed.entry[i].title[0]),
toSqlTime(result.feed.entry[i].updated[0]),
toSqlTime(result.feed.entry[i].published[0]),
toSqlString(result.feed.entry[i].summary[0]),
toSqlString(get_category_label(result.feed.entry[i])),
toSqlString(get_category_term(result.feed.entry[i])),
get_point(result.feed.entry[i]),
toSqlString(get_content(result.feed.entry[i]))
);
total_inserts = total_inserts+1;
sql.push(curr_sql);
}
last_ids.push(id);
}
// console.log(sql.length)
callCartoDB(base_sql.format(sql.join()));
while (last_ids.length > 100){
last_ids.shift();
}
});
// console.log('BODY: ' + body);
// ...and/or process the entire body here.
})
});
req.on('error', function(e) {
// console.log('ERROR: ' + e.message);
});
}
function start(){
run()
setInterval(run, 1800000);
}
app.set('port', (process.env.PORT || 5000))
app.use(express.static(__dirname + '/public'))
app.get('/', function(request, response) {
response.send('Your app has made {0} total inserts'.format(total_inserts))
})
app.listen(app.get('port'), function() {
console.log("Node app is running at localhost:" + app.get('port'));
start();
})
var config = {};
config.cartodb = {
username : "andrew",
api_key : "mykey"
}
module.exports = config;
{
"name": "skylinepdx",
"version": "0.0.1",
"main": "index.js",
"description": "GeoRSS feed to CartoDB",
"licenses": [{ "type": "MIT" }],
"author": {
"name": "Andrew W. Hill, CartoDB",
"email": "andrew@cartodb.com"
},
"engines": {
"node": "0.10.x"
},
"scripts": {
"start": "node index.js"
},
"dependencies": {
"forever-monitor": "1.2.x",
"express": "4.x.x",
"xml2js": "0.4.x",
"http":"0.0.x",
"requestify":"0.1.x",
"step": "0.0.4"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment