Skip to content

Instantly share code, notes, and snippets.

@orangejulius
Last active April 22, 2016 14:10
Show Gist options
  • Save orangejulius/3b16e8c836de06906c7afd14ca37adc3 to your computer and use it in GitHub Desktop.
Save orangejulius/3b16e8c836de06906c7afd14ca37adc3 to your computer and use it in GitHub Desktop.
var elasticsearch = require('elasticsearch');
/*
* Elasticsearch document updater script
*
* Given a query that returns the ids of records meeting some sort of criteria,
* run a bunch of individual update queries to fix the records.
*
* Obviously for this to work, the search query has to return fewer and fewer
* documents as those changes are made.
*/
var client = new elasticsearch.Client({
host: 'localhost:9200',
//host: '192.168.6.145:9200' //dev
});
// how many documents to update in one run of the script
var query_size = 100;
client.search({
"index": "pelias",
"body": {
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match": {
"parent.region_id": "85682581"
}
},
{
"match": {
"parent.country_a": "SWE"
}
}
],
"should": [
{
"terms": {
"source": [
"whosonfirst",
"geonames"
]
}
}
]
}
}
}
}
},
"size": query_size,
"fields": []
}).then(function(body){
console.log(body.hits.total + " hits!");
if (body.hits.total === 0) {
return;
}
var update_doc = {
"parent": {
"country_a": [ "DNK"],
"country": ["Denmark"],
"country_id": "85633121"
}
};
var bulk_body = body.hits.hits.reduce(function(bulk, hit) {
var header = {
update: {
_index: 'pelias',
_type: hit['_type'],
_id: hit['_id']
}
};
var body = {
doc: update_doc
};
return bulk.concat(header, body);
}, []);
console.log("updating " + body.hits.hits.length + " documents");
client.bulk({
body: bulk_body,
refresh: true
},function(err, resp) {
if (err) {
console.log("there was an error");
console.log(err);
console.log(resp);
} else {
console.log("updated!");
}
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment