Skip to content

Instantly share code, notes, and snippets.

@justizin
Created May 9, 2014 18:26
Show Gist options
  • Save justizin/d491591d46fdba0ac4e4 to your computer and use it in GitHub Desktop.
Save justizin/d491591d46fdba0ac4e4 to your computer and use it in GitHub Desktop.
gmond.conf changes?
* template[/etc/ganglia/gmond.conf] action create
- update content in file /etc/ganglia/gmond.conf from 65a4c8 to d56b00
--- /etc/ganglia/gmond.conf 2014-04-28 05:56:26.363457002 +0000
+++ /tmp/chef-rendered-template20140509-3493-11iakcf 2014-05-09 18:23:08.551457005 +0000
@@ -1,345 +1,345 @@
-/* This configuration is as close to 2.5.x default behavior as possible
- The values closely match ./gmond/metric.h definitions in 2.5.x */
-globals {
- daemonize = yes
- setuid = yes
- user = ganglia
- debug_level = 0
- max_udp_msg_len = 1472
- mute = no
- deaf = no
- host_dmax = 0 /*secs */
- cleanup_threshold = 300 /*secs */
- gexec = no
+/* This configuration is as close to 2.5.x default behavior as possible
+ The values closely match ./gmond/metric.h definitions in 2.5.x */
+globals {
+ daemonize = yes
+ setuid = yes
+ user = ganglia
+ debug_level = 0
+ max_udp_msg_len = 1472
+ mute = no
+ deaf = no
+ host_dmax = 0 /*secs */
+ cleanup_threshold = 300 /*secs */
+ gexec = no
send_metadata_interval = 30
override_hostname = prod-graphs01
-
-}
-/* If a cluster attribute is specified, then all gmond hosts are wrapped inside
- * of a <CLUSTER> tag. If you do not specify a cluster tag, then all <HOSTS> will
- * NOT be wrapped inside of a <CLUSTER> tag. */
-cluster {
+}
+
+/* If a cluster attribute is specified, then all gmond hosts are wrapped inside
+ * of a <CLUSTER> tag. If you do not specify a cluster tag, then all <HOSTS> will
+ * NOT be wrapped inside of a <CLUSTER> tag. */
+cluster {
name = "default"
- owner = "unspecified"
- latlong = "unspecified"
- url = "unspecified"
-}
+ owner = "unspecified"
+ latlong = "unspecified"
+ url = "unspecified"
+}
-/* The host section describes attributes of the host, like the location */
-host {
- location = "unspecified"
-}
+/* The host section describes attributes of the host, like the location */
+host {
+ location = "unspecified"
+}
-/* Feel free to specify as many udp_send_channels as you like. Gmond
- used to only support having a single channel */
-udp_send_channel {
+/* Feel free to specify as many udp_send_channels as you like. Gmond
+ used to only support having a single channel */
+udp_send_channel {
host = 10.176.253.79
port = 18651
- ttl = 1
-}
+ ttl = 1
+}
/* always send to localhost */
-udp_send_channel {
+udp_send_channel {
host = 127.0.0.1
port = 8649
- ttl = 1
+ ttl = 1
}
-/* You can specify as many udp_recv_channels as you like as well. */
-udp_recv_channel {
- port = 8649
-}
+/* You can specify as many udp_recv_channels as you like as well. */
+udp_recv_channel {
+ port = 8649
+}
-/* You can specify as many tcp_accept_channels as you like to share
- an xml description of the state of the cluster */
-tcp_accept_channel {
- port = 8649
-}
+/* You can specify as many tcp_accept_channels as you like to share
+ an xml description of the state of the cluster */
+tcp_accept_channel {
+ port = 8649
+}
-/* Each metrics module that is referenced by gmond must be specified and
- loaded. If the module has been statically linked with gmond, it does not
- require a load path. However all dynamically loadable modules must include
- a load path. */
-modules {
- module {
- name = "core_metrics"
- }
- module {
- name = "cpu_module"
- path = "modcpu.so"
- }
- module {
- name = "disk_module"
- path = "moddisk.so"
- }
- module {
- name = "load_module"
- path = "modload.so"
- }
- module {
- name = "mem_module"
- path = "modmem.so"
- }
- module {
- name = "net_module"
- path = "modnet.so"
- }
- module {
- name = "proc_module"
- path = "modproc.so"
- }
- module {
- name = "sys_module"
- path = "modsys.so"
- }
-}
+/* Each metrics module that is referenced by gmond must be specified and
+ loaded. If the module has been statically linked with gmond, it does not
+ require a load path. However all dynamically loadable modules must include
+ a load path. */
+modules {
+ module {
+ name = "core_metrics"
+ }
+ module {
+ name = "cpu_module"
+ path = "modcpu.so"
+ }
+ module {
+ name = "disk_module"
+ path = "moddisk.so"
+ }
+ module {
+ name = "load_module"
+ path = "modload.so"
+ }
+ module {
+ name = "mem_module"
+ path = "modmem.so"
+ }
+ module {
+ name = "net_module"
+ path = "modnet.so"
+ }
+ module {
+ name = "proc_module"
+ path = "modproc.so"
+ }
+ module {
+ name = "sys_module"
+ path = "modsys.so"
+ }
+}
-include ('/etc/ganglia/conf.d/*.conf')
+include ('/etc/ganglia/conf.d/*.conf')
-/* The old internal 2.5.x metric array has been replaced by the following
- collection_group directives. What follows is the default behavior for
- collecting and sending metrics that is as close to 2.5.x behavior as
+/* The old internal 2.5.x metric array has been replaced by the following
+ collection_group directives. What follows is the default behavior for
+ collecting and sending metrics that is as close to 2.5.x behavior as
possible. */
-/* This collection group will cause a heartbeat (or beacon) to be sent every
- 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses
- the age of the running gmond. */
-collection_group {
- collect_once = yes
- time_threshold = 20
- metric {
- name = "heartbeat"
- }
-}
+/* This collection group will cause a heartbeat (or beacon) to be sent every
+ 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses
+ the age of the running gmond. */
+collection_group {
+ collect_once = yes
+ time_threshold = 20
+ metric {
+ name = "heartbeat"
+ }
+}
-/* This collection group will send general info about this host every 1200 secs.
- This information doesn't change between reboots and is only collected once. */
-collection_group {
- collect_once = yes
- time_threshold = 1200
- metric {
- name = "cpu_num"
- title = "CPU Count"
- }
- metric {
- name = "cpu_speed"
- title = "CPU Speed"
- }
- metric {
- name = "mem_total"
- title = "Memory Total"
- }
- /* Should this be here? Swap can be added/removed between reboots. */
- metric {
- name = "swap_total"
- title = "Swap Space Total"
- }
- metric {
- name = "boottime"
- title = "Last Boot Time"
- }
- metric {
- name = "machine_type"
- title = "Machine Type"
- }
- metric {
- name = "os_name"
- title = "Operating System"
- }
- metric {
- name = "os_release"
- title = "Operating System Release"
- }
- metric {
- name = "location"
- title = "Location"
- }
-}
+/* This collection group will send general info about this host every 1200 secs.
+ This information doesn't change between reboots and is only collected once. */
+collection_group {
+ collect_once = yes
+ time_threshold = 1200
+ metric {
+ name = "cpu_num"
+ title = "CPU Count"
+ }
+ metric {
+ name = "cpu_speed"
+ title = "CPU Speed"
+ }
+ metric {
+ name = "mem_total"
+ title = "Memory Total"
+ }
+ /* Should this be here? Swap can be added/removed between reboots. */
+ metric {
+ name = "swap_total"
+ title = "Swap Space Total"
+ }
+ metric {
+ name = "boottime"
+ title = "Last Boot Time"
+ }
+ metric {
+ name = "machine_type"
+ title = "Machine Type"
+ }
+ metric {
+ name = "os_name"
+ title = "Operating System"
+ }
+ metric {
+ name = "os_release"
+ title = "Operating System Release"
+ }
+ metric {
+ name = "location"
+ title = "Location"
+ }
+}
/* This collection group will send the status of gexecd for this host every 300 secs */
-/* Unlike 2.5.x the default behavior is to report gexecd OFF. */
-collection_group {
- collect_once = yes
- time_threshold = 300
- metric {
- name = "gexec"
- title = "Gexec Status"
- }
-}
+/* Unlike 2.5.x the default behavior is to report gexecd OFF. */
+collection_group {
+ collect_once = yes
+ time_threshold = 300
+ metric {
+ name = "gexec"
+ title = "Gexec Status"
+ }
+}
-/* This collection group will collect the CPU status info every 20 secs.
- The time threshold is set to 90 seconds. In honesty, this time_threshold could be
- set significantly higher to reduce unneccessary network chatter. */
-collection_group {
- collect_every = 20
- time_threshold = 90
- /* CPU status */
- metric {
- name = "cpu_user"
- value_threshold = "1.0"
- title = "CPU User"
- }
- metric {
- name = "cpu_system"
- value_threshold = "1.0"
- title = "CPU System"
- }
- metric {
- name = "cpu_idle"
- value_threshold = "5.0"
- title = "CPU Idle"
- }
- metric {
- name = "cpu_nice"
- value_threshold = "1.0"
- title = "CPU Nice"
- }
- metric {
- name = "cpu_aidle"
- value_threshold = "5.0"
- title = "CPU aidle"
- }
- metric {
- name = "cpu_wio"
- value_threshold = "1.0"
- title = "CPU wio"
- }
- /* The next two metrics are optional if you want more detail...
- ... since they are accounted for in cpu_system.
- metric {
- name = "cpu_intr"
- value_threshold = "1.0"
- title = "CPU intr"
- }
- metric {
- name = "cpu_sintr"
- value_threshold = "1.0"
- title = "CPU sintr"
- }
- */
-}
+/* This collection group will collect the CPU status info every 20 secs.
+ The time threshold is set to 90 seconds. In honesty, this time_threshold could be
+ set significantly higher to reduce unneccessary network chatter. */
+collection_group {
+ collect_every = 20
+ time_threshold = 90
+ /* CPU status */
+ metric {
+ name = "cpu_user"
+ value_threshold = "1.0"
+ title = "CPU User"
+ }
+ metric {
+ name = "cpu_system"
+ value_threshold = "1.0"
+ title = "CPU System"
+ }
+ metric {
+ name = "cpu_idle"
+ value_threshold = "5.0"
+ title = "CPU Idle"
+ }
+ metric {
+ name = "cpu_nice"
+ value_threshold = "1.0"
+ title = "CPU Nice"
+ }
+ metric {
+ name = "cpu_aidle"
+ value_threshold = "5.0"
+ title = "CPU aidle"
+ }
+ metric {
+ name = "cpu_wio"
+ value_threshold = "1.0"
+ title = "CPU wio"
+ }
+ /* The next two metrics are optional if you want more detail...
+ ... since they are accounted for in cpu_system.
+ metric {
+ name = "cpu_intr"
+ value_threshold = "1.0"
+ title = "CPU intr"
+ }
+ metric {
+ name = "cpu_sintr"
+ value_threshold = "1.0"
+ title = "CPU sintr"
+ }
+ */
+}
-collection_group {
- collect_every = 20
- time_threshold = 90
- /* Load Averages */
- metric {
- name = "load_one"
- value_threshold = "1.0"
- title = "One Minute Load Average"
- }
- metric {
- name = "load_five"
- value_threshold = "1.0"
- title = "Five Minute Load Average"
- }
- metric {
- name = "load_fifteen"
- value_threshold = "1.0"
- title = "Fifteen Minute Load Average"
+collection_group {
+ collect_every = 20
+ time_threshold = 90
+ /* Load Averages */
+ metric {
+ name = "load_one"
+ value_threshold = "1.0"
+ title = "One Minute Load Average"
}
-}
+ metric {
+ name = "load_five"
+ value_threshold = "1.0"
+ title = "Five Minute Load Average"
+ }
+ metric {
+ name = "load_fifteen"
+ value_threshold = "1.0"
+ title = "Fifteen Minute Load Average"
+ }
+}
-/* This group collects the number of running and total processes */
-collection_group {
- collect_every = 80
- time_threshold = 950
- metric {
- name = "proc_run"
- value_threshold = "1.0"
- title = "Total Running Processes"
- }
- metric {
- name = "proc_total"
- value_threshold = "1.0"
- title = "Total Processes"
- }
+/* This group collects the number of running and total processes */
+collection_group {
+ collect_every = 80
+ time_threshold = 950
+ metric {
+ name = "proc_run"
+ value_threshold = "1.0"
+ title = "Total Running Processes"
+ }
+ metric {
+ name = "proc_total"
+ value_threshold = "1.0"
+ title = "Total Processes"
+ }
}
-/* This collection group grabs the volatile memory metrics every 40 secs and
- sends them at least every 180 secs. This time_threshold can be increased
- significantly to reduce unneeded network traffic. */
-collection_group {
- collect_every = 40
- time_threshold = 180
- metric {
- name = "mem_free"
- value_threshold = "1024.0"
- title = "Free Memory"
- }
- metric {
- name = "mem_shared"
- value_threshold = "1024.0"
- title = "Shared Memory"
- }
- metric {
- name = "mem_buffers"
- value_threshold = "1024.0"
- title = "Memory Buffers"
- }
- metric {
- name = "mem_cached"
- value_threshold = "1024.0"
- title = "Cached Memory"
- }
- metric {
- name = "swap_free"
- value_threshold = "1024.0"
- title = "Free Swap Space"
- }
-}
+/* This collection group grabs the volatile memory metrics every 40 secs and
+ sends them at least every 180 secs. This time_threshold can be increased
+ significantly to reduce unneeded network traffic. */
+collection_group {
+ collect_every = 40
+ time_threshold = 180
+ metric {
+ name = "mem_free"
+ value_threshold = "1024.0"
+ title = "Free Memory"
+ }
+ metric {
+ name = "mem_shared"
+ value_threshold = "1024.0"
+ title = "Shared Memory"
+ }
+ metric {
+ name = "mem_buffers"
+ value_threshold = "1024.0"
+ title = "Memory Buffers"
+ }
+ metric {
+ name = "mem_cached"
+ value_threshold = "1024.0"
+ title = "Cached Memory"
+ }
+ metric {
+ name = "swap_free"
+ value_threshold = "1024.0"
+ title = "Free Swap Space"
+ }
+}
-collection_group {
- collect_every = 40
- time_threshold = 300
- metric {
- name = "bytes_out"
- value_threshold = 4096
- title = "Bytes Sent"
- }
- metric {
- name = "bytes_in"
- value_threshold = 4096
- title = "Bytes Received"
- }
- metric {
- name = "pkts_in"
- value_threshold = 256
- title = "Packets Received"
- }
- metric {
- name = "pkts_out"
- value_threshold = 256
- title = "Packets Sent"
- }
+collection_group {
+ collect_every = 40
+ time_threshold = 300
+ metric {
+ name = "bytes_out"
+ value_threshold = 4096
+ title = "Bytes Sent"
+ }
+ metric {
+ name = "bytes_in"
+ value_threshold = 4096
+ title = "Bytes Received"
+ }
+ metric {
+ name = "pkts_in"
+ value_threshold = 256
+ title = "Packets Received"
+ }
+ metric {
+ name = "pkts_out"
+ value_threshold = 256
+ title = "Packets Sent"
+ }
}
-/* Different than 2.5.x default since the old config made no sense */
-collection_group {
- collect_every = 1800
- time_threshold = 3600
- metric {
- name = "disk_total"
- value_threshold = 1.0
- title = "Total Disk Space"
- }
+/* Different than 2.5.x default since the old config made no sense */
+collection_group {
+ collect_every = 1800
+ time_threshold = 3600
+ metric {
+ name = "disk_total"
+ value_threshold = 1.0
+ title = "Total Disk Space"
+ }
}
-collection_group {
- collect_every = 40
- time_threshold = 180
- metric {
- name = "disk_free"
- value_threshold = 1.0
- title = "Disk Space Available"
- }
- metric {
- name = "part_max_used"
- value_threshold = 1.0
- title = "Maximum Disk Space Used"
- }
+collection_group {
+ collect_every = 40
+ time_threshold = 180
+ metric {
+ name = "disk_free"
+ value_threshold = 1.0
+ title = "Disk Space Available"
+ }
+ metric {
+ name = "part_max_used"
+ value_threshold = 1.0
+ title = "Maximum Disk Space Used"
+ }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment