Skip to content

Instantly share code, notes, and snippets.

@bobylito
Created March 22, 2015 11:18
Show Gist options
  • Save bobylito/e2d13e78ece07a97f0ba to your computer and use it in GitHub Desktop.
Save bobylito/e2d13e78ece07a97f0ba to your computer and use it in GitHub Desktop.
Import data to Algolia with Node.js streams.
// npm install agentkeepalive batch-stream csv-parse stream-transform
var HttpsAgent = require('agentkeepalive').HttpsAgent;
var Algolia = require('algolia-search');
var stream = require( 'stream' );
var parse = require('csv-parse');
var fs = require('fs')
var transform = require('stream-transform');
var Batch = require( 'batch-stream' );
var args = process.argv.slice(2);
if( args.length < 2 ) {
console.log( "Usage : node import.js APP_ID API_KEY" );
return 1;
}
var fileStream = fs.createReadStream( 'datasets/datasets.csv', { autoclose : true } );
var parser = parse( { comment: '#', delimiter : ";" } );
fileStream.pipe( parser )
.pipe( transform( toAlgoliaRecord ) )
.pipe( new Batch( { size : 10000 } ) )
.pipe( algoliaSaveStream( args ) );
function toAlgoliaRecord( data, cb ) {
var record = {
"objectID" : data[0],
"title" : data[1],
"slug" : data[2],
"url" : data[3],
"organization" : data[4],
"organization_id" : data[5],
"supplier" : data[6],
"description" : data[7],
"frequency" : data[8],
"license" : data[9],
"private" : ( data[10] === "True" ? true : false ),
"featured" : ( data[11] === "True" ? true : false ),
"created_at" : Date.parse( data[12] ),
"last_modified" : Date.parse( data[13] ),
"tags" : data[14].split( "," ),
"metric.nb_uniq_visitors" : parseInt( data[15] ),
"metric.views" : parseInt( data[16] ),
"metric.followers" : parseInt( data[17] ),
"metric.reuses" : parseInt( data[18] ),
"metric.nb_visits" : parseInt( data[19] ),
"metric.nb_hits" : parseInt( data[20] ),
"metric.issues" : parseInt( data[21] )
};
cb( null, record );
}
function algoliaSaveStream( parameters ) {
var appId = parameters[ 0 ];
var apiKey = parameters[ 1 ];
var keepaliveAgent = new HttpsAgent({
maxSockets: 1,
maxKeepAliveRequests: 0, // no limit on max requests per keepalive socket
maxKeepAliveTime: 30000 // keepalive for 30 seconds
});
var client = new Algolia(appId, apiKey, keepaliveAgent);
var index = client.initIndex( "opendatafrance" );
var streamToAlgolia = new stream.Stream()
streamToAlgolia.writable = true;
streamToAlgolia.write = function ( data ) {
index.saveObjects( data, function( error, content ) {
if ( error ) console.error( "ERROR: %s", content.message );
} );
return true;
}
streamToAlgolia.end = function (data) {
}
return streamToAlgolia;
}
@redox
Copy link

redox commented Mar 22, 2015

Looks good, feel free to use http://www.realtime-search.com/ to share your engine!

@vvo
Copy link

vvo commented Mar 23, 2015

nice one

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment