Skip to content

Instantly share code, notes, and snippets.

@moklick
Last active August 29, 2015 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moklick/d87f64651d879b49002d to your computer and use it in GitHub Desktop.
Save moklick/d87f64651d879b49002d to your computer and use it in GitHub Desktop.
Boilerplate for processing (read and write) large geojson/json files with streams
// boilerplate for processing large geojson/json files..
// npm install --save through2 JSONStream
var through = require('through2');
var JSONStream = require('JSONStream');
var fs = require('fs');
var inputStream = fs.createReadStream('./test.geojson');
var outputStream = fs.createWriteStream('./cleaned-geojson.json');
// for measuring the process time
var startTime = +new Date();
// kind of hacky ?!
var geoJsonStart = '{"type": "FeatureCollection", "features":';
var geoJsonEnd = '}';
// here we can check if current geometry is valid, inside a polygon, etc
var propManipulator = through.obj(function(chunk, enc, callback){
// if(isValid)
this.push(getCleanObj(chunk));
callback();
});
outputStream.write(geoJsonStart);
inputStream
.pipe(JSONStream.parse('features.*'))
.pipe(propManipulator)
.pipe(JSONStream.stringify('[',',',']'))
.on('end', function(){
outputStream.write(geoJsonEnd);
console.log('Finished in', (+new Date() - startTime) / 1000, 'seconds.');
})
.pipe(outputStream)
// example helper function
function getCleanObj(dirtyObj){
return {
geometry : {
coordinates : dirtyObj.geometry.coordinates
},
properties: {
id: dirtyObj.properties.gml_id,
height: dirtyObj.properties.floors * 3.5
}
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment