Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Ansible Playbook (role) to patch ES Data nodes (WIP)
---
- name: yum_check
command:
cmd: /bin/yum check-upgrade
warn: no
register: yum_update
ignore_errors: true
failed_when: yum_update.rc == 1
- name: reboot_check
command: /bin/needs-restarting -r
ignore_errors: true
register: needs_reboot
failed_when: needs_reboot.rc > 1
tags:
- reboot
# - debug:
# var: yum_update
# - debug:
# var: needs_reboot
# We checked to see if the VM needed to be patched or rebooted.
# If not, we end the playbook for that host..
- name: end_host_check
when: (yum_update.rc == 0) and
(needs_reboot.rc == 0)
meta: end_host
- name: Get auth from curator
slurp:
src: "{{ curator_yaml }}"
register: curator_config
tags:
- prepare
when: curator_yaml is defined
- set_fact:
es_user: "{{(curator_config['content'] | b64decode | from_yaml)['client']['http_auth'].split(':')[0] }}"
es_pass: "{{(curator_config['content'] | b64decode | from_yaml)['client']['http_auth'].split(':')[1] }}"
cacheable: no
tags:
- prepare
# - debug:
# var: es_service
- set_fact:
es_service: elasticsearch-{{ inventory_hostname }}
when: es_service is undefined
tags:
- prepare
# - debug:
# var: es_service
- name: BEFORE PATCHING wait until cluster is healthy
uri:
url: https://{{ inventory_hostname }}:9200/_cat/health?h=st
method: GET
validate_certs: false
return_content: true
user: "{{ es_user }}"
password: "{{ es_pass }}"
register: cluster_status
until: cluster_status.content == "green\n"
retries: 90
delay: 60
tags:
- health_check
# - debug:
# var: cluster_status
- name: Get cluster details
uri:
url: https://{{ inventory_hostname }}:9200/
validate_certs: false
user: "{{ es_user }}"
password: "{{ es_pass }}"
force_basic_auth: yes
method: GET
register: es_cluster_details
# - debug:
# var: es_cluster_details
- name: install yum-utils
package:
name: yum-utils
state: present
- name: yum update
yum:
name: "*"
state: latest
exclude: elasticsearch
# disablerepo: rsyslog-stable
tags:
- yum_update
- name: needs reboot
command: /bin/needs-restarting -r
ignore_errors: true
register: needs_reboot
failed_when: needs_reboot.rc > 1
tags:
- reboot
- name: delay rebalancing
when: needs_reboot.rc == 1
uri:
url: https://{{ inventory_hostname }}:9200/_cluster/settings
method: PUT
body_format: json
body:
{ "persistent": { "cluster.routing.allocation.enable": "primaries" } }
validate_certs: false
user: "{{ es_user }}"
password: "{{ es_pass }}"
timeout: 90
tags:
- delay_rebalancing
# TODO: Check for "acknowledged":true
- pause:
seconds: 15
- name: flush synced makes recovery faster
when: needs_reboot.rc == 1
uri:
url: https://{{ inventory_hostname }}:9200/_flush/synced
method: POST
validate_certs: false
user: "{{ es_user }}"
password: "{{ es_pass }}"
status_code:
- 200
- 409 # Conflict - https://www.elastic.co/guide/en/elasticsearch/reference/6.8/indices-synced-flush.html
register: sync_status
until: sync_status.json._shards.failed == 0
tags: flush_synced
ignore_errors: yes # if it fails we can still continue
retries: 10
delay: 30
- name: stop elasticsearch
when: needs_reboot.rc == 1
service:
name: "{{ es_service }}"
state: stopped
tags:
- stop_elasticsearch
- pause:
seconds: 10
- name: reboot_machine
when: needs_reboot.rc == 1
become_method: sudo
reboot:
reboot_timeout: 600
test_command: uptime
register: reboot_return
tags:
- reboot
## this needs modified to use morpheus
# - name: reset_if_needed
# when: (reboot_return.failed == "false")
# vmware_guest_powerstate:
# hostname: "{{ vcenter_server }}"
# username: "{{ vcenter_username }}"
# password: "{{ vcenter_password }}"
# name: "{{ vm_name }}"
# folder: "{{ vm_folder }}"
# validate_certs: no
# state: restarted
# force: yes
# delegate_to: localhost
- ping:
- name: start elasticsearch
service:
name: "{{ es_service }}"
state: started
tags:
- start_elasticsearch
- pause:
seconds: 5
- name: wait for port 9200 to be available
wait_for:
port: 9200
state: started
host: "{{ inventory_hostname }}"
delay: 30
- name: Pause for the dust to settle...
pause:
seconds: 10
- name: Confirm the node joins the cluster
uri:
url: https://{{ inventory_hostname }}:9200/
validate_certs: false
user: "{{ es_user }}"
password: "{{ es_pass }}"
force_basic_auth: yes
method: GET
register: es_post_cluster_state
retries: 10
delay: 20
until: es_post_cluster_state.json.cluster_uuid == es_cluster_details.json.cluster_uuid
- name: re-enable rebalancing
uri:
url: https://{{ inventory_hostname }}:9200/_cluster/settings
method: PUT
body_format: json
body:
{ "persistent": { "cluster.routing.allocation.enable": null } }
validate_certs: false
user: "{{ es_user }}"
password: "{{ es_pass }}"
timeout: 90
register: response
until: "response.json.acknowledged == true"
retries: 5
delay: 30
tags:
- reenable_rebalancing
- name: wait until cluster is healthy
uri:
url: https://{{ inventory_hostname }}:9200/_cat/health?h=st
method: GET
validate_certs: false
return_content: true
user: "{{ es_user }}"
password: "{{ es_pass }}"
register: response
until: response.content == "green\n"
retries: 90
delay: 60
---
- hosts: data
become: true
become_method: sudo
ignore_unreachable: true
gather_facts: false
serial: 1
# The reset isn't always working, so I am commenting it out for now.
# vars_prompt:
# - name: vcenter_username
# prompt: "Please enter in your vCenter username"
# private: no
# when: vcenter_username is undefined
#
# - name: vcenter_password
# prompt: "Please enter in your vCenter password"
# private: yes
# when: vcenter_password is undefined
tasks:
- name: import es patching tasks
import_role:
name: elasticsearch
tasks_from: patch_es_data_nodes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment