russorat/google_sheet_to_es_cluster.js

## google_sheet_to_es_cluster.js
// If you are running this from a spreadsheet, you can leave this blank and
// it will pick up the active spreadsheet. If you are running this from a
// new script.google.com project, you should put the url of the google sheet here.
var SPREADSHEET_URL = '';
var ES_HOST = {
  host : '', // for found.io, something.loc.aws.found.io
  port : 9243, // the port, usually 9200 or 9243
  username : '',
  password : '',
  use_ssl : true // set to false to use http
}
// You can enter your own index or use the default
// which is the name of the spreadsheet lowercased
// and underscored
var INDEX = '' || getDefaultIndex();
var INDEX_TYPE = 'spreadsheet';
// If this template does not exist, it will create one
// using the DEFULT_TEMPLATE found at the bottom of this doc.
// Leave empty to disable template management.
var TEMPLATE_NAME = 'google_spreadsheet';
// If you have a lot of data, enable this to use the bulk api
// https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
var BULK = true;

function sendToES() {
  var data = getActiveSheet().getDataRange().getValues();
  var headers = data.shift();
  for(var i in headers) {
    headers[i] = headers[i].replace(/[^0-9a-zA-Z]/g,'_'); // clean up the column names for index keys
    headers[i] = headers[i].toLowerCase();
  }
  var bulkList = [];
  if(TEMPLATE_NAME) { createTemplate(); }
  for(var i in data) {
    var row = data[i];
    var toInsert = {};
    for(var c in row) {
      toInsert[headers[c]] = row[c];
    }
    if(BULK) {
      bulkList.push(JSON.stringify({ "index" : { "_index" : INDEX, "_type" : INDEX_TYPE } }));
      bulkList.push(JSON.stringify(toInsert));
      // Don't hit the UrlFetchApp limits of 10MB for POST calls.
      if(bulkList.length >= 2000) {
        postDataToES(bulkList.join("\n")+"\n");
        bulkList = [];
      }
    } else {
      postDataToES(JSON.stringify(toInsert));
    }
  }
  if(BULK && bulkList.length > 0) {
    postDataToES(bulkList.join("\n")+"\n");
  }
}

function postDataToES(data) {
  var url = [(ES_HOST.use_ssl) ? 'https://' : 'http://',
             ES_HOST.host,':',ES_HOST.port].join('');
  if(BULK) {
    url += '/_bulk';
  } else {
    url += ['/',INDEX,'/',INDEX_TYPE].join('');
  }
  var options = getDefaultOptions();
  options.method = 'POST';
  options['payload'] = data;
  options.headers["Content-Type"] = "application/json";
  var resp = UrlFetchApp.fetch(url, options);
}

function createTemplate() {
  var url = [(ES_HOST.use_ssl) ? 'https://' : 'http://',
             ES_HOST.host,':',ES_HOST.port,
            '/_template/',TEMPLATE_NAME].join('')
  var options = getDefaultOptions();
  options['muteHttpExceptions'] = true;
  var resp = UrlFetchApp.fetch(url, options);
  if(resp.getResponseCode() == 404) {
    options = getDefaultOptions();
    options.method = 'POST';
    options['payload'] = JSON.stringify(DEFAULT_TEMPLATE);
    options.headers["Content-Type"] = "application/json";
    resp = UrlFetchApp.fetch(url, options);
  } else {
    Logger.log('Template already exists');
  }
}

function getDefaultOptions() {
  var options = {
    method : 'GET',
    headers : { },
  }
  if(ES_HOST.username) {
    options.headers["Authorization"] = "Basic " + Utilities.base64Encode(ES_HOST.username + ":" + ES_HOST.password);
  }
  return options;
}

function getDefaultIndex() {
  var index_name = getActiveSheet().getParent().getName();
  return index_name.replace(/[^0-9a-zA-Z]/g,'_').toLowerCase()
}

function getActiveSheet() {
  if(SPREADSHEET_URL == '') {
    return SpreadsheetApp.getActiveSheet();
  } else {
    return SpreadsheetApp.openByUrl(SPREADSHEET_URL).getActiveSheet();
  }
}

var DEFAULT_TEMPLATE = {
  "order": 0,
  "template": INDEX,
  "settings": {
    "index.refresh_interval": "5s",
    "index.analysis.analyzer.default.type": "standard",
    "index.number_of_replicas": "1",
    "index.number_of_shards": "1",
    "index.analysis.analyzer.default.stopwords": "_none_"
  },
  "mappings": {
    "_default_": {
      "dynamic_templates": [
        {
          "string_fields": {
            "mapping": {
              "fields": {
                "{name}": {
                  "index": "analyzed",
                  "omit_norms": true,
                  "type": "string"
                },
                "raw": {
                  "search_analyzer": "keyword",
                  "ignore_above": 256,
                  "index": "not_analyzed",
                  "type": "string"
                }
              },
              "type": "multi_field"
            },
            "match_mapping_type": "string",
            "match": "*"
          }
        }
      ],
      "_all": {
        "enabled": true
      }
    }
  },
  "aliases": {
  }
}
	// If you are running this from a spreadsheet, you can leave this blank and
	// it will pick up the active spreadsheet. If you are running this from a
	// new script.google.com project, you should put the url of the google sheet here.
	var SPREADSHEET_URL = '';
	var ES_HOST = {
	host : '', // for found.io, something.loc.aws.found.io
	port : 9243, // the port, usually 9200 or 9243
	username : '',
	password : '',
	use_ssl : true // set to false to use http
	}
	// You can enter your own index or use the default
	// which is the name of the spreadsheet lowercased
	// and underscored
	var INDEX = '' \|\| getDefaultIndex();
	var INDEX_TYPE = 'spreadsheet';
	// If this template does not exist, it will create one
	// using the DEFULT_TEMPLATE found at the bottom of this doc.
	// Leave empty to disable template management.
	var TEMPLATE_NAME = 'google_spreadsheet';
	// If you have a lot of data, enable this to use the bulk api
	// https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
	var BULK = true;

	function sendToES() {
	var data = getActiveSheet().getDataRange().getValues();
	var headers = data.shift();
	for(var i in headers) {
	headers[i] = headers[i].replace(/[^0-9a-zA-Z]/g,'_'); // clean up the column names for index keys
	headers[i] = headers[i].toLowerCase();
	}
	var bulkList = [];
	if(TEMPLATE_NAME) { createTemplate(); }
	for(var i in data) {
	var row = data[i];
	var toInsert = {};
	for(var c in row) {
	toInsert[headers[c]] = row[c];
	}
	if(BULK) {
	bulkList.push(JSON.stringify({ "index" : { "_index" : INDEX, "_type" : INDEX_TYPE } }));
	bulkList.push(JSON.stringify(toInsert));
	// Don't hit the UrlFetchApp limits of 10MB for POST calls.
	if(bulkList.length >= 2000) {
	postDataToES(bulkList.join("\n")+"\n");
	bulkList = [];
	}
	} else {
	postDataToES(JSON.stringify(toInsert));
	}
	}
	if(BULK && bulkList.length > 0) {
	postDataToES(bulkList.join("\n")+"\n");
	}
	}

	function postDataToES(data) {
	var url = [(ES_HOST.use_ssl) ? 'https://' : 'http://',
	ES_HOST.host,':',ES_HOST.port].join('');
	if(BULK) {
	url += '/_bulk';
	} else {
	url += ['/',INDEX,'/',INDEX_TYPE].join('');
	}
	var options = getDefaultOptions();
	options.method = 'POST';
	options['payload'] = data;
	options.headers["Content-Type"] = "application/json";
	var resp = UrlFetchApp.fetch(url, options);
	}

	function createTemplate() {
	var url = [(ES_HOST.use_ssl) ? 'https://' : 'http://',
	ES_HOST.host,':',ES_HOST.port,
	'/_template/',TEMPLATE_NAME].join('')
	var options = getDefaultOptions();
	options['muteHttpExceptions'] = true;
	var resp = UrlFetchApp.fetch(url, options);
	if(resp.getResponseCode() == 404) {
	options = getDefaultOptions();
	options.method = 'POST';
	options['payload'] = JSON.stringify(DEFAULT_TEMPLATE);
	options.headers["Content-Type"] = "application/json";
	resp = UrlFetchApp.fetch(url, options);
	} else {
	Logger.log('Template already exists');
	}
	}

	function getDefaultOptions() {
	var options = {
	method : 'GET',
	headers : { },
	}
	if(ES_HOST.username) {
	options.headers["Authorization"] = "Basic " + Utilities.base64Encode(ES_HOST.username + ":" + ES_HOST.password);
	}
	return options;
	}

	function getDefaultIndex() {
	var index_name = getActiveSheet().getParent().getName();
	return index_name.replace(/[^0-9a-zA-Z]/g,'_').toLowerCase()
	}

	function getActiveSheet() {
	if(SPREADSHEET_URL == '') {
	return SpreadsheetApp.getActiveSheet();
	} else {
	return SpreadsheetApp.openByUrl(SPREADSHEET_URL).getActiveSheet();
	}
	}

	var DEFAULT_TEMPLATE = {
	"order": 0,
	"template": INDEX,
	"settings": {
	"index.refresh_interval": "5s",
	"index.analysis.analyzer.default.type": "standard",
	"index.number_of_replicas": "1",
	"index.number_of_shards": "1",
	"index.analysis.analyzer.default.stopwords": "_none_"
	},
	"mappings": {
	"_default_": {
	"dynamic_templates": [
	{
	"string_fields": {
	"mapping": {
	"fields": {
	"{name}": {
	"index": "analyzed",
	"omit_norms": true,
	"type": "string"
	},
	"raw": {
	"search_analyzer": "keyword",
	"ignore_above": 256,
	"index": "not_analyzed",
	"type": "string"
	}
	},
	"type": "multi_field"
	},
	"match_mapping_type": "string",
	"match": "*"
	}
	}
	],
	"_all": {
	"enabled": true
	}
	}
	},
	"aliases": {
	}
	}