Skip to content

Instantly share code, notes, and snippets.

@svolpe43
Created October 28, 2016 18:35
Show Gist options
  • Save svolpe43/baf2b6ac00a0978e0a0cd3364358a3ee to your computer and use it in GitHub Desktop.
Save svolpe43/baf2b6ac00a0978e0a0cd3364358a3ee to your computer and use it in GitHub Desktop.
Extract and index html pages into a Solr Cloud stack.
var request = require('request');
var fs = require('fs');
var solr_host = 'http://<>-us-east-1.sr-dev.acquia.com';
var collection;
var host;
if(process.argv[2] && process.argv[3]){
collection = process.argv[2];
host = solr_host.replace(/<>/, process.argv[3]);
console.log(collection, host);
index();
}else{
console.log('Usage: node solr.js <collection> <stack>');
}
function index(){
fs.readdir('posts', function(err, data){
extract_rec(0, data, function(){});
});
}
function extract_rec(cur, files, callback){
if(cur == files.length){
console.log('All done.');
return;
}
console.log(files[cur]);
extract_and_index(collection, files[cur], function(err, data){
if(err){
console.log(err);
}else{
console.log(data);
}
extract_rec(cur + 1, files, callback);
});
}
function extract_and_index(collection, file, callback){
fs.readFile('posts/' + file, function(err, data){
request.post({
url: host + '/solr/' + collection + '/update/extract?literal.id=' + file + '&commit=true&wt=json',
headers: {
'content-type' : 'text/html'
},
body: data
}, function (err, resp, body) {
if(err){
callback(err, null);
}else{
callback(null, body);
}
});
});
}
function query(collection){
request.get({
url: solr_host + '/solr/' + collection + '/select?q=*:*&start=0&rows=0&wt=json'
}, function (err, resp, body) {
if(err){
console.log(err);
}else{
console.log(body);
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment