Skip to content

Instantly share code, notes, and snippets.

@bittusarkar
Created December 13, 2018 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bittusarkar/b0d39f5d38926b180897c73da40c4bd3 to your computer and use it in GitHub Desktop.
Save bittusarkar/b0d39f5d38926b180897c73da40c4bd3 to your computer and use it in GitHub Desktop.
GET _cluster/health
{
"cluster_name": "almsearchprodweu2_es5",
"status": "yellow",
"timed_out": false,
"number_of_nodes": 16,
"number_of_data_nodes": 10,
"active_primary_shards": 3964,
"active_shards": 7924,
"relocating_shards": 0,
"initializing_shards": 10,
"unassigned_shards": 118,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 98.41033283656234
}
===
GET _cat/allocation?v
shards disk.indices disk.used disk.avail disk.total disk.percent host ip node
1035 558.4gb 562.2gb 3.4tb 3.9tb 13 192.168.0.154 192.168.0.154 es-d04-rm
14 468kb 310.8mb 3.9tb 3.9tb 0 192.168.0.160 192.168.0.160 es-d10-rm
410 108.1gb 110.2gb 3.8tb 3.9tb 2 192.168.0.155 192.168.0.155 es-d05-rm
44 9.6kb 310.3mb 3.9tb 3.9tb 0 192.168.0.158 192.168.0.158 es-d08-rm
1902 841.6gb 847.9gb 3.1tb 3.9tb 20 192.168.0.151 192.168.0.151 es-d01-rm
1863 917.3gb 925.2gb 3tb 3.9tb 22 192.168.0.152 192.168.0.152 es-d02-rm
1994 1tb 1.1tb 2.8tb 3.9tb 27 192.168.0.153 192.168.0.153 es-d03-rm
208 21.7gb 22.2gb 3.9tb 3.9tb 0 192.168.0.157 192.168.0.157 es-d07-rm
407 98.1gb 99.3gb 3.8tb 3.9tb 2 192.168.0.159 192.168.0.159 es-d09-rm
57 12.8kb 311.3mb 3.9tb 3.9tb 0 192.168.0.156 192.168.0.156 es-d06-rm
118 UNASSIGNED
===
GET _cluster/settings
{
"persistent": {},
"transient": {
"cluster": {
"routing": {
"allocation": {
"exclude": {
"_ip": "192.168.0.151"
}
}
}
}
}
}
===
GET _cluster/allocation/explain
{
"index": "codesearchshared_8_0",
"shard": 33,
"primary": false,
"current_state": "unassigned",
"unassigned_info": {
"reason": "MANUAL_ALLOCATION",
"at": "2018-12-13T07:28:52.698Z",
"details": "failed shard on node [EeSbzForSNeuewm02TeSlA]: failed recovery, failure RecoveryFailedException[[codesearchshared_8_0][33]: Recovery failed from {es-d01-rm}{05PeyBBySq-qL0NVHwdVmw}{fGTsu8lHRNyuDlUglbjAlg}{192.168.0.151}{192.168.0.151:9300}{faultDomain=0, updateDomain=0} into {es-d08-rm}{EeSbzForSNeuewm02TeSlA}{7YAg4GuxRBORgeZ2ND0nDg}{192.168.0.158}{192.168.0.158:9300}{faultDomain=1, updateDomain=2} (no activity after [30m])]; nested: ElasticsearchTimeoutException[no activity after [30m]]; ",
"last_allocation_status": "no_attempt"
},
"can_allocate": "throttled",
"allocate_explanation": "allocation temporarily throttled",
"node_allocation_decisions": [
{
"node_id": "0pxsNqEkQ_Kel-CdOUrDqA",
"node_name": "es-d10-rm",
"transport_address": "192.168.0.160:9300",
"node_attributes": {
"faultDomain": "1",
"updateDomain": "4"
},
"node_decision": "throttled",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [0pxsNqEkQ_Kel-CdOUrDqA] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
},
{
"node_id": "EeSbzForSNeuewm02TeSlA",
"node_name": "es-d08-rm",
"transport_address": "192.168.0.158:9300",
"node_attributes": {
"faultDomain": "1",
"updateDomain": "2"
},
"node_decision": "throttled",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [EeSbzForSNeuewm02TeSlA] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
},
{
"node_id": "p20g3NziScCwW9QaczvXZA",
"node_name": "es-d02-rm",
"transport_address": "192.168.0.152:9300",
"node_attributes": {
"faultDomain": "1",
"updateDomain": "1"
},
"node_decision": "throttled",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [p20g3NziScCwW9QaczvXZA] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
},
{
"node_id": "xSBwpjSuSNm-lQrjyb-H1g",
"node_name": "es-d04-rm",
"transport_address": "192.168.0.154:9300",
"node_attributes": {
"faultDomain": "1",
"updateDomain": "3"
},
"node_decision": "throttled",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [xSBwpjSuSNm-lQrjyb-H1g] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
},
{
"node_id": "05PeyBBySq-qL0NVHwdVmw",
"node_name": "es-d01-rm",
"transport_address": "192.168.0.151:9300",
"node_attributes": {
"faultDomain": "0",
"updateDomain": "0"
},
"node_decision": "no",
"store": {
"matching_sync_id": true
},
"deciders": [
{
"decider": "filter",
"decision": "NO",
"explanation": """node matches cluster setting [cluster.routing.allocation.exclude] filters [_ip:"192.168.0.151"]"""
},
{
"decider": "same_shard",
"decision": "NO",
"explanation": "the shard cannot be allocated to the same node on which a copy of the shard already exists [[codesearchshared_8_0][33], node[05PeyBBySq-qL0NVHwdVmw], [P], s[STARTED], a[id=53jHddp9RUmBs2zL0KGauQ]]"
},
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [05PeyBBySq-qL0NVHwdVmw] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are too many copies of the shard allocated to nodes with attribute [updateDomain], there are [2] total configured shard copies for this shard id and [9] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
},
{
"node_id": "54R6JMXNT0KHWRTkrKOzpg",
"node_name": "es-d06-rm",
"transport_address": "192.168.0.156:9300",
"node_attributes": {
"faultDomain": "1",
"updateDomain": "0"
},
"node_decision": "no",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [54R6JMXNT0KHWRTkrKOzpg] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are too many copies of the shard allocated to nodes with attribute [updateDomain], there are [2] total configured shard copies for this shard id and [9] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
},
{
"node_id": "HNSZGsRASyaTCfZONDmtIw",
"node_name": "es-d09-rm",
"transport_address": "192.168.0.159:9300",
"node_attributes": {
"faultDomain": "0",
"updateDomain": "3"
},
"node_decision": "no",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [HNSZGsRASyaTCfZONDmtIw] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are too many copies of the shard allocated to nodes with attribute [faultDomain], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
},
{
"node_id": "m-dZa9fLT1-EBP4S5KW9yQ",
"node_name": "es-d07-rm",
"transport_address": "192.168.0.157:9300",
"node_attributes": {
"faultDomain": "0",
"updateDomain": "1"
},
"node_decision": "no",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [m-dZa9fLT1-EBP4S5KW9yQ] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are too many copies of the shard allocated to nodes with attribute [faultDomain], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
},
{
"node_id": "ox2cTQ3BQw6e6q78DihFdg",
"node_name": "es-d05-rm",
"transport_address": "192.168.0.155:9300",
"node_attributes": {
"faultDomain": "0",
"updateDomain": "4"
},
"node_decision": "no",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [ox2cTQ3BQw6e6q78DihFdg] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are too many copies of the shard allocated to nodes with attribute [faultDomain], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
},
{
"node_id": "xmCfM_q6Saqf-xCyiao3Uw",
"node_name": "es-d03-rm",
"transport_address": "192.168.0.153:9300",
"node_attributes": {
"faultDomain": "0",
"updateDomain": "2"
},
"node_decision": "no",
"deciders": [
{
"decider": "throttling",
"decision": "THROTTLE",
"explanation": "reached the limit of outgoing shard recoveries [10] on the node [xmCfM_q6Saqf-xCyiao3Uw] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=10] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are too many copies of the shard allocated to nodes with attribute [faultDomain], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment