Skip to content

Instantly share code, notes, and snippets.

@russorat
Last active September 1, 2015 19:59
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save russorat/fdc2264ba54665dfd23a to your computer and use it in GitHub Desktop.
Save russorat/fdc2264ba54665dfd23a to your computer and use it in GitHub Desktop.
This code is meant to be used on script.google.com in conjuction with a google spreadsheet. It will transform a set of rows and columns (with headers) into json documents and index them in an Elasticsearch cluster.
// If you are running this from a spreadsheet, you can leave this blank and
// it will pick up the active spreadsheet. If you are running this from a
// new script.google.com project, you should put the url of the google sheet here.
var SPREADSHEET_URL = '';
var ES_HOST = {
host : '', // for found.io, something.loc.aws.found.io
port : 9243, // the port, usually 9200 or 9243
username : '',
password : '',
use_ssl : true // set to false to use http
}
// You can enter your own index or use the default
// which is the name of the spreadsheet lowercased
// and underscored
var INDEX = '' || getDefaultIndex();
var INDEX_TYPE = 'spreadsheet';
// If this template does not exist, it will create one
// using the DEFULT_TEMPLATE found at the bottom of this doc.
// Leave empty to disable template management.
var TEMPLATE_NAME = 'google_spreadsheet';
// If you have a lot of data, enable this to use the bulk api
// https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
var BULK = true;
function sendToES() {
var data = getActiveSheet().getDataRange().getValues();
var headers = data.shift();
for(var i in headers) {
headers[i] = headers[i].replace(/[^0-9a-zA-Z]/g,'_'); // clean up the column names for index keys
headers[i] = headers[i].toLowerCase();
}
var bulkList = [];
if(TEMPLATE_NAME) { createTemplate(); }
for(var i in data) {
var row = data[i];
var toInsert = {};
for(var c in row) {
toInsert[headers[c]] = row[c];
}
if(BULK) {
bulkList.push(JSON.stringify({ "index" : { "_index" : INDEX, "_type" : INDEX_TYPE } }));
bulkList.push(JSON.stringify(toInsert));
// Don't hit the UrlFetchApp limits of 10MB for POST calls.
if(bulkList.length >= 2000) {
postDataToES(bulkList.join("\n")+"\n");
bulkList = [];
}
} else {
postDataToES(JSON.stringify(toInsert));
}
}
if(BULK && bulkList.length > 0) {
postDataToES(bulkList.join("\n")+"\n");
}
}
function postDataToES(data) {
var url = [(ES_HOST.use_ssl) ? 'https://' : 'http://',
ES_HOST.host,':',ES_HOST.port].join('');
if(BULK) {
url += '/_bulk';
} else {
url += ['/',INDEX,'/',INDEX_TYPE].join('');
}
var options = getDefaultOptions();
options.method = 'POST';
options['payload'] = data;
options.headers["Content-Type"] = "application/json";
var resp = UrlFetchApp.fetch(url, options);
}
function createTemplate() {
var url = [(ES_HOST.use_ssl) ? 'https://' : 'http://',
ES_HOST.host,':',ES_HOST.port,
'/_template/',TEMPLATE_NAME].join('')
var options = getDefaultOptions();
options['muteHttpExceptions'] = true;
var resp = UrlFetchApp.fetch(url, options);
if(resp.getResponseCode() == 404) {
options = getDefaultOptions();
options.method = 'POST';
options['payload'] = JSON.stringify(DEFAULT_TEMPLATE);
options.headers["Content-Type"] = "application/json";
resp = UrlFetchApp.fetch(url, options);
} else {
Logger.log('Template already exists');
}
}
function getDefaultOptions() {
var options = {
method : 'GET',
headers : { },
}
if(ES_HOST.username) {
options.headers["Authorization"] = "Basic " + Utilities.base64Encode(ES_HOST.username + ":" + ES_HOST.password);
}
return options;
}
function getDefaultIndex() {
var index_name = getActiveSheet().getParent().getName();
return index_name.replace(/[^0-9a-zA-Z]/g,'_').toLowerCase()
}
function getActiveSheet() {
if(SPREADSHEET_URL == '') {
return SpreadsheetApp.getActiveSheet();
} else {
return SpreadsheetApp.openByUrl(SPREADSHEET_URL).getActiveSheet();
}
}
var DEFAULT_TEMPLATE = {
"order": 0,
"template": INDEX,
"settings": {
"index.refresh_interval": "5s",
"index.analysis.analyzer.default.type": "standard",
"index.number_of_replicas": "1",
"index.number_of_shards": "1",
"index.analysis.analyzer.default.stopwords": "_none_"
},
"mappings": {
"_default_": {
"dynamic_templates": [
{
"string_fields": {
"mapping": {
"fields": {
"{name}": {
"index": "analyzed",
"omit_norms": true,
"type": "string"
},
"raw": {
"search_analyzer": "keyword",
"ignore_above": 256,
"index": "not_analyzed",
"type": "string"
}
},
"type": "multi_field"
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"_all": {
"enabled": true
}
}
},
"aliases": {
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment