Skip to content

Instantly share code, notes, and snippets.

@issacg
Created November 5, 2013 08:30
Show Gist options
  • Save issacg/7315642 to your computer and use it in GitHub Desktop.
Save issacg/7315642 to your computer and use it in GitHub Desktop.
Example to update GeoIP (node.js geoip-lite) from commercial DB
// fetches and converts maxmind lite databases
'use strict';
var cp = require('child_process');
var fs = require('fs');
var http = require('http');
var path = require('path');
var url = require('url');
var zlib = require('zlib');
fs.existsSync = fs.existsSync || path.existsSync;
var async = require('async');
var colors = require('colors');
var LineInputStream = require('line-input-stream');
var rimraf = require('rimraf').sync;
var unzip = require('unzip');
var utils = {};
var dataPath = path.join(__dirname, '..', 'node_modules', 'geoip-lite', 'data');
var tmpPath = path.join(__dirname, '..', 'tmp');
var databases = [{
type: 'country',
url: 'http://download.maxmind.com/app/geoip_download?edition_id=108&suffix=zip&license_key=XXXXXX#/GeoIP.zip',
src: 'GeoIP-108.csv',
dest: 'geoip-country.dat'
},{
type: 'country',
url: 'http://geolite.maxmind.com/download/geoip/database/GeoIPv6.csv.gz',
src: 'GeoIPv6.csv',
dest: 'geoip-country6.dat'
}/*,{
type: 'city-extended',
url: 'http://geolite.maxmind.com/download/geoip/database/GeoLiteCity_CSV/GeoLiteCity_20121204.zip',
src: [
'GeoLiteCity-Blocks.csv',
'GeoLiteCity-Location.csv'
],
dest: [
'geoip-city.dat',
'geoip-city-names.dat'
]
},{
type: 'city',
url: 'http://geolite.maxmind.com/download/geoip/database/GeoLiteCityv6-beta/GeoLiteCityv6.csv.gz',
src: 'GeoLiteCityv6.csv',
dest: 'geoip-city6.dat'
}*/];
utils.aton4 = function(a) {
a = a.split(/\./);
return ((parseInt(a[0], 10)<<24)>>>0) + ((parseInt(a[1], 10)<<16)>>>0) + ((parseInt(a[2], 10)<<8)>>>0) + (parseInt(a[3], 10)>>>0);
};
utils.aton6 = function(a) {
a = a.replace(/"/g, '').split(/:/);
var l = a.length - 1;
var i;
if (a[l] === '') {
a[l] = 0;
}
if (l < 7) {
a.length = 8;
for (i = l; i >= 0 && a[i] !== ''; i--) {
a[7-l+i] = a[i];
}
}
for (i = 0; i < 8; i++) {
if (!a[i]) {
a[i]=0;
} else {
a[i] = parseInt(a[i], 16);
}
}
var r = [];
for (i = 0; i<4; i++) {
r.push(((a[2*i]<<16) + a[2*i+1])>>>0);
}
return r;
};
utils.cmp = function(a, b) {
if (typeof a === 'number' && typeof b === 'number') {
return (a < b ? -1 : (a > b ? 1 : 0));
}
if (a instanceof Array && b instanceof Array) {
return this.cmp6(a, b);
}
return null;
};
utils.cmp6 = function(a, b) {
for (var ii = 0; ii < 2; ii++) {
if (a[ii] < b[ii]) {
return -1;
}
if (a[ii] > b[ii]) {
return 1;
}
}
return 0;
};
utils.isPrivateIP = function(addr) {
addr = addr.toString();
return addr.match(/^10\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})/) != null ||
addr.match(/^192\.168\.([0-9]{1,3})\.([0-9]{1,3})/) != null ||
addr.match(/^172\.16\.([0-9]{1,3})\.([0-9]{1,3})/) != null ||
addr.match(/^127\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})/) != null ||
addr.match(/^169\.254\.([0-9]{1,3})\.([0-9]{1,3})/) != null ||
addr.match(/^fc00:/) != null || addr.match(/^fe80:/) != null;
};
utils.ntoa4 = function(n) {
n = n.toString();
n = '' + (n>>>24&0xff) + '.' + (n>>>16&0xff) + '.' + (n>>>8&0xff) + '.' + (n&0xff);
return n;
};
utils.ntoa6 = function(n) {
var a = "[";
for (var i = 0; i<n.length; i++) {
a += (n[i]>>>16).toString(16) + ':';
a += (n[i]&0xffff).toString(16) + ':';
}
a = a.replace(/:$/, ']').replace(/:0+/g, ':').replace(/::+/, '::');
return a;
};
// __ main __ //
function mkdir(name) {
var dir = path.dirname(name);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
}
function fetch(downloadUrl, cb) {
function getOptions() {
if (process.env.http_proxy) {
var options = url.parse(process.env.http_proxy);
options.path = downloadUrl;
options.headers = {
Host: url.parse(downloadUrl).host
};
return options;
} else {
return url.parse(downloadUrl);
}
}
function onResponse(response) {
var status = response.statusCode;
if (status !== 200) {
console.log('ERROR'.red + ': HTTP Request Failed [%d %s]', status, http.STATUS_CODES[status]);
client.abort();
process.exit();
}
var tmpFilePipe;
var tmpFileStream = fs.createWriteStream(tmpFile);
if (gzip) {
tmpFilePipe = response.pipe(zlib.createGunzip()).pipe(tmpFileStream);
} else {
tmpFilePipe = response.pipe(tmpFileStream);
}
tmpFilePipe.on('close', function() {
console.log(' DONE'.green);
cb(tmpFile, fileName);
});
}
var fileName = downloadUrl.split('/').pop();
var gzip = (fileName.indexOf('.gz') !== -1);
if (gzip) {
fileName = fileName.replace('.gz', '');
}
var tmpFile = path.join(tmpPath, fileName);
mkdir(tmpFile);
var client = http.get(getOptions(), onResponse);
process.stdout.write('Retrieving ' + fileName + ' ...');
}
function extract(wantedFiles, tmpFile, tmpFileName, cb) {
// force cast to array
if (typeof(wantedFiles) === "string")
wantedFiles = new Array(wantedFiles);
if (tmpFileName.indexOf('.zip') === -1) {
cb();
} else {
process.stdout.write('Extracting ' + tmpFileName + ' ...');
var pipeSteam = fs.createReadStream(tmpFile).pipe(unzip.Parse());
pipeSteam.on('entry', function(entry) {
var fileName = entry.path.toString().split('/').pop();
if (wantedFiles.indexOf(fileName) > -1)
entry.pipe(fs.createWriteStream(path.join(path.dirname(tmpFile), fileName)));
else
entry.autodrain();
});
pipeSteam.on('finish', function() {
console.log(' DONE'.green);
cb();
});
}
}
function processCountryData(src, dest, cb) {
function processLine(line) {
if (line.match(/^Copyright/) || !line.match(/\d/)) {
return;
}
var fields = line.split(/, */);
if (fields.length < 6) {
console.log("weird line: %s::", line);
return;
}
var sip;
var eip;
var cc = fields[4].replace(/"/g, '');
var b;
var bsz;
var i;
if (fields[0].match(/:/)) {
// IPv6
bsz = 34;
sip = utils.aton6(fields[0]);
eip = utils.aton6(fields[1]);
b = new Buffer(bsz);
for (i = 0; i < sip.length; i++) {
b.writeUInt32BE(sip[i], i * 4);
}
for (i = 0; i < eip.length; i++) {
b.writeUInt32BE(eip[i], 16 + (i * 4));
}
} else {
// IPv4
bsz = 10;
sip = parseInt(fields[2].replace(/"/g, ''), 10);
eip = parseInt(fields[3].replace(/"/g, ''), 10);
b = new Buffer(bsz);
b.fill(0);
b.writeUInt32BE(sip, 0);
b.writeUInt32BE(eip, 4);
}
b.write(cc, bsz - 2);
fs.writeSync(datFile, b, 0, bsz, null);
}
var dataFile = path.join(dataPath, dest);
var tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
mkdir(dataFile);
process.stdout.write('Processing Data (may take a moment) ...');
var datFile = fs.openSync(dataFile, "w");
var csvStream = new LineInputStream(fs.createReadStream(tmpDataFile), /[\r\n]+/);
csvStream.setEncoding('utf8');
csvStream.on('line', processLine);
csvStream.on('end', function() {
console.log(' DONE'.green);
cb();
});
}
function processCityData(src, dest, cb) {
function processLine(line) {
if (line.match(/^Copyright/) || !line.match(/\d/)) {
return;
}
var fields = line.replace(/"/g, '').split(/, */);
var sip;
var eip;
var locId;
var b;
var bsz;
var i;
if (fields[0].match(/:/)) {
// IPv6
var offset = 0;
var cc = fields[4];
var city = fields[6];
var lat = Math.round(parseFloat(fields[7]) * 10000);
var lon = Math.round(parseFloat(fields[8]) * 10000);
var rg = fields[5];
bsz = 58;
sip = utils.aton6(fields[0]);
eip = utils.aton6(fields[1]);
b = new Buffer(bsz);
b.fill(0);
for (i = 0; i < sip.length; i++) {
b.writeUInt32BE(sip[i], offset);
offset += 4;
}
for (i = 0; i < eip.length; i++) {
b.writeUInt32BE(eip[i], offset);
offset += 4;
}
b.write(cc, offset);
b.write(rg, offset + 2);
b.writeInt32BE(lat, offset + 4);
b.writeInt32BE(lon, offset + 8);
b.write(city, offset + 12);
} else {
// IPv4
bsz = 12;
sip = parseInt(fields[0], 10);
eip = parseInt(fields[1], 10);
locId = parseInt(fields[2], 10);
b = new Buffer(bsz);
b.fill(0);
b.writeUInt32BE(sip>>>0, 0);
b.writeUInt32BE(eip>>>0, 4);
b.writeUInt32BE(locId>>>0, 8);
}
fs.writeSync(datFile, b, 0, b.length, null);
}
var dataFile = path.join(dataPath, dest);
var tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
process.stdout.write('Processing Data (may take a moment) ...');
var datFile = fs.openSync(dataFile, "w");
var csvStream = new LineInputStream(fs.createReadStream(tmpDataFile), /[\r\n]+/);
csvStream.setEncoding('utf8');
csvStream.on('line', processLine);
csvStream.on('end', function() {
cb();
});
}
function processCityDataNames(src, dest, cb) {
function processLine(line, i, a) {
if (line.match(/^Copyright/) || !line.match(/\d/)) {
return;
}
var fields = line.replace(/"/g, '').split(/, */);
var cc = fields[1];
var rg = fields[2];
var city = fields[3];
var lat = Math.round(parseFloat(fields[5]) * 10000);
var lon = Math.round(parseFloat(fields[6]) * 10000);
var b;
var sz = 32;
b = new Buffer(sz);
b.fill(0);
b.write(cc, 0);
b.write(rg, 2);
b.writeInt32BE(lat, 4);
b.writeInt32BE(lon, 8);
b.write(city, 12);
fs.writeSync(datFile, b, 0, b.length, null);
}
var dataFile = path.join(dataPath, dest);
var tmpDataFile = path.join(tmpPath, src);
rimraf(dataFile);
var datFile = fs.openSync(dataFile, "w");
var csvStream = new LineInputStream(fs.createReadStream(tmpDataFile), /[\r\n]+/);
csvStream.setEncoding('utf8');
csvStream.on('line', processLine);
csvStream.on('end', function() {
cb();
});
}
function processData(type, src, dest, cb) {
if (type === 'country') {
processCountryData(src, dest, cb);
} else if (type === 'city-extended') {
processCityData(src[0], dest[0], function() {
processCityDataNames(src[1], dest[1], function() {
console.log(' DONE'.green);
cb();
});
});
} else {
processCityData(src, dest, function() {
console.log(' DONE'.green);
cb();
});
}
}
function main() {
rimraf(tmpPath);
mkdir(tmpPath);
async.forEachSeries(databases, function(database, nextDatabase) {
fetch(database.url, function(tmpFile, tmpFileName) {
extract(database.src, tmpFile, tmpFileName, function() {
processData(database.type, database.src, database.dest, function() {
console.log();
nextDatabase();
});
});
});
}, function(err) {
console.log();
if (err) {
console.log('Failed to Update Databases from MaxMind.'.red);
process.exit();
} else {
console.log('Successfully Updated Databases from MaxMind.'.green);
}
});
}
if (require.main === module) {
main();
} else {
module.exports = main;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment