Skip to content

Instantly share code, notes, and snippets.

@mikepea
Created January 12, 2015 11:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikepea/511ebb8b723f69cea3a4 to your computer and use it in GitHub Desktop.
Save mikepea/511ebb8b723f69cea3a4 to your computer and use it in GitHub Desktop.
ElasticSearch output of _nodes/stats?pretty -- for analysis of why heap needs to be over 2GB.
{
"cluster_name" : "elasticsearch",
"nodes" : {
"REDACTED" : {
"timestamp" : 1421060315273,
"name" : "monitoring-02",
"transport_address" : "inet[/REDACTED:9300]",
"host" : "monitoring-02",
"ip" : [ "inet[/REDACTED:9300]", "NONE" ],
"indices" : {
"docs" : {
"count" : 33572665,
"deleted" : 0
},
"store" : {
"size_in_bytes" : 35325488265,
"throttle_time_in_millis" : 1001644
},
"indexing" : {
"index_total" : 2385254,
"index_time_in_millis" : 1056177,
"index_current" : 0,
"delete_total" : 0,
"delete_time_in_millis" : 0,
"delete_current" : 0
},
"get" : {
"total" : 2,
"time_in_millis" : 1,
"exists_total" : 2,
"exists_time_in_millis" : 1,
"missing_total" : 0,
"missing_time_in_millis" : 0,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 5980,
"query_time_in_millis" : 23715,
"query_current" : 0,
"fetch_total" : 686,
"fetch_time_in_millis" : 1142,
"fetch_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 25591,
"total_time_in_millis" : 8145961,
"total_docs" : 89042897,
"total_size_in_bytes" : 105938452318
},
"refresh" : {
"total" : 235568,
"total_time_in_millis" : 2608859
},
"flush" : {
"total" : 841,
"total_time_in_millis" : 519917
},
"warmer" : {
"current" : 0,
"total" : 492264,
"total_time_in_millis" : 115589
},
"filter_cache" : {
"memory_size_in_bytes" : 100688,
"evictions" : 0
},
"id_cache" : {
"memory_size_in_bytes" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 3713608,
"evictions" : 0
},
"percolate" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0,
"memory_size_in_bytes" : -1,
"memory_size" : "-1b",
"queries" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 2883,
"memory_in_bytes" : 1275165840,
"index_writer_memory_in_bytes" : 475612,
"version_map_memory_in_bytes" : 6536
},
"translog" : {
"operations" : 21629,
"size_in_bytes" : 0
},
"suggest" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0
}
},
"os" : {
"timestamp" : 1421060316390,
"uptime_in_millis" : 308426,
"load_average" : [ 2.16, 1.71, 1.55 ],
"cpu" : {
"sys" : 1,
"user" : 9,
"idle" : 84,
"usage" : 10,
"stolen" : 0
},
"mem" : {
"free_in_bytes" : 1263923200,
"used_in_bytes" : 7108706304,
"free_percent" : 45,
"used_percent" : 54,
"actual_free_in_bytes" : 3821703168,
"actual_used_in_bytes" : 4550926336
},
"swap" : {
"used_in_bytes" : 159744,
"free_in_bytes" : 821919744
}
},
"process" : {
"timestamp" : 1421060316391,
"open_file_descriptors" : 9482,
"cpu" : {
"percent" : 32,
"sys_in_millis" : 7198300,
"user_in_millis" : 49129050,
"total_in_millis" : 56327350
},
"mem" : {
"resident_in_bytes" : 2771247104,
"share_in_bytes" : 38199296,
"total_virtual_in_bytes" : 19026513920
}
},
"jvm" : {
"timestamp" : 1421060316396,
"uptime_in_millis" : 303072733,
"mem" : {
"heap_used_in_bytes" : 2264790672,
"heap_used_percent" : 71,
"heap_committed_in_bytes" : 3186360320,
"heap_max_in_bytes" : 3186360320,
"non_heap_used_in_bytes" : 52775528,
"non_heap_committed_in_bytes" : 80363520,
"pools" : {
"young" : {
"used_in_bytes" : 211515000,
"max_in_bytes" : 279183360,
"peak_used_in_bytes" : 279183360,
"peak_max_in_bytes" : 279183360
},
"survivor" : {
"used_in_bytes" : 16317936,
"max_in_bytes" : 34865152,
"peak_used_in_bytes" : 34865152,
"peak_max_in_bytes" : 34865152
},
"old" : {
"used_in_bytes" : 2036957736,
"max_in_bytes" : 2872311808,
"peak_used_in_bytes" : 2160199936,
"peak_max_in_bytes" : 2872311808
}
}
},
"threads" : {
"count" : 72,
"peak_count" : 76
},
"gc" : {
"collectors" : {
"young" : {
"collection_count" : 83026,
"collection_time_in_millis" : 1033012
},
"old" : {
"collection_count" : 274,
"collection_time_in_millis" : 21758
}
}
},
"buffer_pools" : {
"direct" : {
"count" : 64,
"used_in_bytes" : 13061311,
"total_capacity_in_bytes" : 13061311
},
"mapped" : {
"count" : 3978,
"used_in_bytes" : 13254262144,
"total_capacity_in_bytes" : 13254262144
}
}
},
"thread_pool" : {
"generic" : {
"threads" : 2,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 5,
"completed" : 91516
},
"index" : {
"threads" : 1,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 1,
"completed" : 1
},
"snapshot_data" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"bench" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"get" : {
"threads" : 2,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 2,
"completed" : 2
},
"snapshot" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"merge" : {
"threads" : 2,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 2,
"completed" : 496234
},
"suggest" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"bulk" : {
"threads" : 4,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 4,
"completed" : 2385254
},
"optimize" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"warmer" : {
"threads" : 2,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 2,
"completed" : 256252
},
"flush" : {
"threads" : 2,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 2,
"completed" : 229764
},
"search" : {
"threads" : 12,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 12,
"completed" : 6666
},
"percolate" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"management" : {
"threads" : 3,
"queue" : 0,
"active" : 1,
"rejected" : 0,
"largest" : 3,
"completed" : 50530
},
"refresh" : {
"threads" : 2,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 2,
"completed" : 235334
}
},
"network" : {
"tcp" : {
"active_opens" : 440155,
"passive_opens" : 872034,
"curr_estab" : 130,
"in_segs" : 43919765,
"out_segs" : 42829383,
"retrans_segs" : 4198,
"estab_resets" : 4099,
"attempt_fails" : 204,
"in_errs" : 0,
"out_rsts" : 3735
}
},
"fs" : {
"timestamp" : 1421060316396,
"total" : {
"total_in_bytes" : 221333442560,
"free_in_bytes" : 184169394176,
"available_in_bytes" : 172902658048,
"disk_reads" : 66460,
"disk_writes" : 6116675,
"disk_io_op" : 6183135,
"disk_read_size_in_bytes" : 1265398784,
"disk_write_size_in_bytes" : 133966233600,
"disk_io_size_in_bytes" : 135231632384
},
"data" : [ {
"path" : "/data/elasticsearch/elasticsearch/nodes/0",
"mount" : "/data",
"dev" : "/dev/mapper/vg_autodata-lv_data",
"total_in_bytes" : 221333442560,
"free_in_bytes" : 184169394176,
"available_in_bytes" : 172902658048,
"disk_reads" : 66460,
"disk_writes" : 6116675,
"disk_io_op" : 6183135,
"disk_read_size_in_bytes" : 1265398784,
"disk_write_size_in_bytes" : 133966233600,
"disk_io_size_in_bytes" : 135231632384
} ]
},
"transport" : {
"server_open" : 13,
"rx_count" : 0,
"rx_size_in_bytes" : 0,
"tx_count" : 0,
"tx_size_in_bytes" : 0
},
"http" : {
"current_open" : 3,
"total_opened" : 61850
},
"fielddata_breaker" : {
"maximum_size_in_bytes" : 1911816192,
"maximum_size" : "1.7gb",
"estimated_size_in_bytes" : 3713608,
"estimated_size" : "3.5mb",
"overhead" : 1.03,
"tripped" : 0
}
}
}
}
@mikepea
Copy link
Author

mikepea commented Jan 12, 2015

Discussion afterwards in #elasticsearch on Freenode:

11:04 <dakrone> mikepea: you have a very high segment count and it looks like the segments memory_in_bytes is about 1.2gb out of your 3gb heap
11:04 <dakrone> mikepea: also, it looks like you're using swap (which will make things very slow for JVM-based processes!)
11:05 <dakrone> mikepea: how many shards does this cluster have?
11:05 <mikepea> just the one
11:05 <mikepea> I'd say it's a typical 'small' ELK stack box. 
11:06 ⇐ johnraz_ quit (~johnraz@unaffiliated/johnraz) Ping timeout: 245 seconds
11:06 <dakrone> one shard? that is a lot of segments for a single shard
11:06 <dakrone> how big is the index?
11:06 <dakrone> size and document-wise
11:08 <mikepea> dakrone: 33GB for all indices, currently there are 44 indices (logstash daily pattern)
11:09 → costin_ joined (~costin@109.166.132.126)
11:09 <mikepea> is indices->docs->count the doc count? In which case 33 million docs.
11:10  → jluis joined  ⇐ costin, falood and ArnaudM quit  
11:12 <dakrone> mikepea: oh okay, segment count is okay for 44 indices, in that case, since it's just lucene segment memory, that seems pretty reasonable, you have only the single node?
11:13 <mikepea> yeah, at the moment. Considering the potential of creating a larger stack, but for now it's not cost justifiable to have many ES nodes in the stack. 
11:13 ⇐ darkelda quit (~darkelda@unaffiliated/darkelda) Ping timeout: 265 seconds
11:15 <dakrone> okay, well if you can't add nodes, you could delete or close older indices, which would free the lucene memory associated with them
11:15 <mikepea> i guess this is a 'tuning ELK' question really. How can we reduce the segments memory_in_bytes count for an ELK system that's largely concerned with quick query of the current day's index.
11:15 <mikepea> is there any way of saying 'load this index into segment memory dynamically'?
11:16 <dakrone> mikepea: something you could do is optimize older indices that you are not indexing into into a single segment, see: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-optimize.html#indices-optimize
11:16 <dakrone> set max_num_segments to 1 in that case
11:16 ⇐ Xylakant quit (~Xylakant@87.253.171.208) Quit: Leaving...
11:16 <dakrone> you can also use the curator tool to do it
11:18  → nickjj joined  ⇐ gschuring quit  
11:19 <mikepea> that sounds like a fine plan. We already have curator in place, but just for purging old data. I'll start graphing  segments memory_in_bytes and enable optimizing in curator.
11:19 <mikepea> thanks a million, this was gold knowledge. I'll update the gist with this discussion :)

@mikepea
Copy link
Author

mikepea commented Jan 14, 2015

As a follow-up, I configured curator to run daily as curator optimize --older-than 3 --max_num_segments 1

This dropped segments (on a test system) down from 600ish to 300ish, and heap seemed to reduce to 40% (from 80%). Interestingly segment memory (on the test system) only reduced from 250MB to 225MB.

I'll post results for the production system above when it has been applied.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment