thanthos/reindex.sh

## reindex.sh
#Reindexing your Elasticsearch indice with limited resource can be a painw when you have limited resources and need it running at the same time
#Hence it is advisable to size up the quantity and break it down into chunks based on time.
#Look to Kibana. The break down is already done for you even as you perform your search.
#Just pop up the request and the aggregation query is there.
#Using this, you can tally your document count according to time to verify your activities.

#I need to do this as due to resource constrains. Logstash input plugin sometimes hit into error and the plugin restart.
#When it restarts the query get executed again. With logstash plugin-input-Elasticsearch, it resume a new search.
#Any previous scroll ID is discarded. This is something you do not want happening.
#You can end up with more document in the target than the source. #Thus breaking it down to chucks limit the corruption and makes remediation easier.
#This automates the process of executing the logstash config one after another. Otherwise, manually is going be costly in terms of time.

#So the strategy is like this:
#1)create a logstash config template with ${START} and ${END} tag which we will replace using SED command with the actual time value.
#2)create a input.data file that will have 2 value per line, START and END EPOCH time.
#3)The script will loop through the input and create the actual logstash config file and execute it.

#It is my experience that with approx 1GB of memory, you should be performing approx 30K document in one iteration.
#Dependency: Logstash (prefereably in path), Cygwin(For windows), sed. Assume everything is happening in the current directory.
#Lastly using a diff tool to compare the source and target aggregation result to verify the process.


#Start input.dat content.
#input.dat content format.
#1455174000000 1455184800000
#1455206400000 1455217200000
#1455271200000 1455282000000
#1455476400000 1455487200000
#
#Make sure it end with a new line.
#End input.dat content

#Start Sample Config template
#You can break up the data into segment anyway you see appropriate beside using time.
#-----------------------------------------------
#input {
#	elasticsearch {
#		scan => false
#		query => '
#{ "query": {
#    "filtered": {
#      "query": {"query_string": {"query": "*"}},
#      "filter": {
#	"bool": {
#        "must": [
#            {"range": {"@timestamp": {"gte": ${START},"lte": ${END},"format": "epoch_millis"}}}
#          ],
#        "must_not": []
#       	}
#      }}
#  }
#}'
#	docinfo => true
#	}
#}
#filter {
#	metrics {
#	  meter => "events"
#	  add_tag => "metric"
#	}
#}
#output {
#	if "metric" in [tags] {
#        	stdout { }
#    	}
#}
# End Template

#Start Script file that will read the segmentation, generate the config file and run it.
#!/bin/sh
PREFIX=rindex;

function run(){
	cat reindex.preconfig|sed -e 's/\${START}/'$1'/' -e 's/\${END}/'$2'/' > $PREFIX"_"$1".conf";
	logstash -f $PREFIX"_"$1".conf";
}

while read LINE
do
	run $LINE;
done < input.dat;
	#Reindexing your Elasticsearch indice with limited resource can be a painw when you have limited resources and need it running at the same time
	#Hence it is advisable to size up the quantity and break it down into chunks based on time.
	#Look to Kibana. The break down is already done for you even as you perform your search.
	#Just pop up the request and the aggregation query is there.
	#Using this, you can tally your document count according to time to verify your activities.

	#I need to do this as due to resource constrains. Logstash input plugin sometimes hit into error and the plugin restart.
	#When it restarts the query get executed again. With logstash plugin-input-Elasticsearch, it resume a new search.
	#Any previous scroll ID is discarded. This is something you do not want happening.
	#You can end up with more document in the target than the source. #Thus breaking it down to chucks limit the corruption and makes remediation easier.
	#This automates the process of executing the logstash config one after another. Otherwise, manually is going be costly in terms of time.

	#So the strategy is like this:
	#1)create a logstash config template with ${START} and ${END} tag which we will replace using SED command with the actual time value.
	#2)create a input.data file that will have 2 value per line, START and END EPOCH time.
	#3)The script will loop through the input and create the actual logstash config file and execute it.

	#It is my experience that with approx 1GB of memory, you should be performing approx 30K document in one iteration.
	#Dependency: Logstash (prefereably in path), Cygwin(For windows), sed. Assume everything is happening in the current directory.
	#Lastly using a diff tool to compare the source and target aggregation result to verify the process.



	#Start input.dat content.
	#input.dat content format.
	#1455174000000 1455184800000
	#1455206400000 1455217200000
	#1455271200000 1455282000000
	#1455476400000 1455487200000
	#
	#Make sure it end with a new line.
	#End input.dat content

	#Start Sample Config template
	#You can break up the data into segment anyway you see appropriate beside using time.
	#-----------------------------------------------
	#input {
	# elasticsearch {
	# scan => false
	# query => '
	#{ "query": {
	# "filtered": {
	# "query": {"query_string": {"query": "*"}},
	# "filter": {
	# "bool": {
	# "must": [
	# {"range": {"@timestamp": {"gte": ${START},"lte": ${END},"format": "epoch_millis"}}}
	# ],
	# "must_not": []
	# }
	# }}
	# }
	#}'
	# docinfo => true
	# }
	#}
	#filter {
	# metrics {
	# meter => "events"
	# add_tag => "metric"
	# }
	#}
	#output {
	# if "metric" in [tags] {
	# stdout { }
	# }
	#}
	# End Template

	#Start Script file that will read the segmentation, generate the config file and run it.
	#!/bin/sh
	PREFIX=rindex;

	function run(){
	cat reindex.preconfig\|sed -e 's/\${START}/'$1'/' -e 's/\${END}/'$2'/' > $PREFIX"_"$1".conf";
	logstash -f $PREFIX"_"$1".conf";
	}

	while read LINE
	do
	run $LINE;
	done < input.dat;