Created
October 28, 2016 18:35
-
-
Save svolpe43/baf2b6ac00a0978e0a0cd3364358a3ee to your computer and use it in GitHub Desktop.
Extract and index html pages into a Solr Cloud stack.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var request = require('request'); | |
var fs = require('fs'); | |
var solr_host = 'http://<>-us-east-1.sr-dev.acquia.com'; | |
var collection; | |
var host; | |
if(process.argv[2] && process.argv[3]){ | |
collection = process.argv[2]; | |
host = solr_host.replace(/<>/, process.argv[3]); | |
console.log(collection, host); | |
index(); | |
}else{ | |
console.log('Usage: node solr.js <collection> <stack>'); | |
} | |
function index(){ | |
fs.readdir('posts', function(err, data){ | |
extract_rec(0, data, function(){}); | |
}); | |
} | |
function extract_rec(cur, files, callback){ | |
if(cur == files.length){ | |
console.log('All done.'); | |
return; | |
} | |
console.log(files[cur]); | |
extract_and_index(collection, files[cur], function(err, data){ | |
if(err){ | |
console.log(err); | |
}else{ | |
console.log(data); | |
} | |
extract_rec(cur + 1, files, callback); | |
}); | |
} | |
function extract_and_index(collection, file, callback){ | |
fs.readFile('posts/' + file, function(err, data){ | |
request.post({ | |
url: host + '/solr/' + collection + '/update/extract?literal.id=' + file + '&commit=true&wt=json', | |
headers: { | |
'content-type' : 'text/html' | |
}, | |
body: data | |
}, function (err, resp, body) { | |
if(err){ | |
callback(err, null); | |
}else{ | |
callback(null, body); | |
} | |
}); | |
}); | |
} | |
function query(collection){ | |
request.get({ | |
url: solr_host + '/solr/' + collection + '/select?q=*:*&start=0&rows=0&wt=json' | |
}, function (err, resp, body) { | |
if(err){ | |
console.log(err); | |
}else{ | |
console.log(body); | |
} | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment