Created
February 8, 2023 18:46
-
-
Save echo-devnull/f63e75d51054fa5bd08d98e4e3b6e66f to your computer and use it in GitHub Desktop.
Rebooting elastic search nodes one by one
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
- hosts: elasticsearch | |
serial: 1 | |
become: true | |
user: ansible | |
# First we make sure the cluster is not in a weird state before we begin rebooting | |
# That would be bad. | |
# We turn on sharding, making sure all nodes are "in sync" with each other | |
pre_tasks: | |
- name: Turn on the sharding from the {{ansible_hostname}} | |
shell: "curl -H 'Content-Type: application/json' -XPUT {{ ansible_ssh_host }}:9200/_cluster/settings -d '{ \"transient\" : { \"cluster.routing.allocation.enable\" : \"all\" }}'" | |
register: result | |
until: result.stdout.find("acknowledged") != -1 | |
retries: 200 | |
delay: 3 | |
changed_when: result.stdout.find('"acknowledged":true') != -1 | |
run_once: true | |
# Here we wait until the cluster is indeed synched if the precious command turned sharding | |
# "on" from a previous "off" state. | |
- name: Wait for cluster to be green before we begin | |
shell: curl -s -m 2 {{ ansible_ssh_host }}:9200/_cat/health | cut -d ' ' -f 4 | |
register: result | |
until: result.stdout.find("green") != -1 | |
retries: 200 | |
delay: 3 | |
tasks: | |
# Right! Before we begin a reboot, first check if the cluster is green | |
- name: Wait for cluster to be green before we begin | |
shell: curl -s -m 2 {{ ansible_ssh_host }}:9200/_cat/health | cut -d ' ' -f 4 | |
register: result | |
until: result.stdout.find("green") != -1 | |
retries: 200 | |
delay: 3 | |
# Now we turn off sharding, because we are not replacing a node, merely rebooting it. | |
# This way the nodes that are still live don't start redistributing their stuff thinking | |
# our node died! | |
- name: Turn off sharding from the {{ansible_hostname}} | |
shell: "curl -H 'Content-Type: application/json' -XPUT {{ ansible_ssh_host }}:9200/_cluster/settings -d '{ \"transient\" : { \"cluster.routing.allocation.enable\" : \"none\" }}'" | |
register: result | |
until: result.stdout.find('"acknowledged"') != -1 | |
retries: 200 | |
delay: 3 | |
changed_when: result.stdout.find('"acknowledged":true') != -1 | |
- name: Flush sync speeds up recovery after reboot {{ansible_hostname}} | |
shell: "curl -XPOST {{ ansible_ssh_host }}:9200/_flush/synced" | |
register: result | |
until: result.stdout.find('"successful"') != -1 | |
retries: 200 | |
delay: 3 | |
- name: Sleep for 5 seconds and continue with play | |
wait_for: timeout=5 | |
- name: Reboot the {{ansible_hostname}} | |
shell: "sleep 5 && reboot" | |
async: 1 | |
poll: 0 | |
- name: Wait 600 seconds for target connection to become reachable/usable | |
wait_for_connection: | |
connect_timeout: 20 | |
sleep: 5 | |
delay: 30 | |
timeout: 600 | |
- name: Can I connect to the ElasticSearch port from localhost? | |
wait_for: | |
state: present | |
port: 9200 | |
delay: 0 | |
sleep: 10 | |
host: '{{ (ansible_ssh_host|default(ansible_host))|default(inventory_hostname) }}' | |
# Let's see if the node can see itself in the cluster | |
- name: Wait for node "{{ansible_hostname}}" to be in the cluster again. | |
shell: "curl -s -m 2 '{{ ansible_ssh_host }}:9200/_cat/nodes?h=name' | tr -d ' ' | grep -E '^{{ansible_hostname}}$' " | |
register: result | |
until: result.rc == 0 | |
retries: 200 | |
delay: 3 | |
# If so, turn the sharding back on, so we can actually, really join! | |
- name: Turn on the sharding from the {{ansible_hostname}} | |
shell: "curl -H 'Content-Type: application/json' -XPUT {{ ansible_ssh_host }}:9200/_cluster/settings -d '{ \"transient\" : { \"cluster.routing.allocation.enable\" : \"all\" }}'" | |
register: result | |
until: result.stdout.find("acknowledged") != -1 | |
retries: 200 | |
delay: 3 | |
changed_when: result.stdout.find('"acknowledged":true') != -1 | |
run_once: true | |
# It's just good manners to make sure we are green again, before continuing. | |
- name: Wait for cluster to be green again | |
shell: curl -s -m 2 {{ ansible_ssh_host }}:9200/_cat/health | cut -d ' ' -f 4 | |
register: result | |
until: result.stdout.find("green") != -1 | |
retries: 200 | |
delay: 3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment