Skip to content

Instantly share code, notes, and snippets.

@mstoykov
Last active March 12, 2021 11:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mstoykov/232beeafb65d2fd187d7295bba4ebd71 to your computer and use it in GitHub Desktop.
Save mstoykov/232beeafb65d2fd187d7295bba4ebd71 to your computer and use it in GitHub Desktop.
telegraf influxdb aggregation
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 2,
"links": [],
"panels": [
{
"cards": {
"cardPadding": null,
"cardRound": null
},
"color": {
"cardColor": "#7d64ff",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "opacity"
},
"dataFormat": "tsbuckets",
"datasource": "InfluxDB-1",
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"heatmap": {},
"hideZeroBuckets": true,
"highlightCards": true,
"id": 2,
"legend": {
"show": true
},
"pluginVersion": "7.3.6",
"reverseYBuckets": false,
"targets": [
{
"alias": "",
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"le"
],
"type": "tag"
},
{
"params": [
"gt"
],
"type": "tag"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "http_req_duration",
"orderByTime": "ASC",
"policy": "default",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value_bucket"
],
"type": "field"
},
{
"params": [],
"type": "first"
}
]
],
"tags": []
}
],
"timeFrom": null,
"timeShift": null,
"title": "Panel Title",
"tooltip": {
"show": true,
"showHistogram": false
},
"transformations": [
{
"id": "seriesToColumns",
"options": {
"byField": "Time"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"http_req_duration.first {gt: 1000, le: 2000}": false
},
"indexByName": {
"Time": 0,
"http_req_duration.first {gt: -Inf, le: 0}": 1,
"http_req_duration.first {gt: 0, le: 50}": 2,
"http_req_duration.first {gt: 100, le: 120}": 4,
"http_req_duration.first {gt: 1000, le: 2000}": 9,
"http_req_duration.first {gt: 120, le: 150}": 5,
"http_req_duration.first {gt: 150, le: 200}": 6,
"http_req_duration.first {gt: 200, le: 500}": 7,
"http_req_duration.first {gt: 2000, le: +Inf}": 10,
"http_req_duration.first {gt: 50, le: 100}": 3,
"http_req_duration.first {gt: 500, le: 1000}": 8
},
"renameByName": {
"http_req_duration.first {gt: -Inf, le: 0}": "0",
"http_req_duration.first {gt: 0, le: 50}": "0-50",
"http_req_duration.first {gt: 100, le: 120}": "100-120",
"http_req_duration.first {gt: 1000, le: 2000}": "1000-2000",
"http_req_duration.first {gt: 120, le: 150}": "120-150",
"http_req_duration.first {gt: 150, le: 200}": "150-200",
"http_req_duration.first {gt: 200, le: 500}": "200-500",
"http_req_duration.first {gt: 2000, le: +Inf}": "2000+",
"http_req_duration.first {gt: 50, le: 100}": "50-100",
"http_req_duration.first {gt: 500, le: 1000}": "500-1000"
}
}
}
],
"type": "heatmap",
"xAxis": {
"show": true
},
"xBucketNumber": null,
"xBucketSize": null,
"yAxis": {
"decimals": null,
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true,
"splitFactor": null
},
"yBucketBound": "auto",
"yBucketNumber": null,
"yBucketSize": null
}
],
"refresh": false,
"schemaVersion": 26,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "2021-03-12T10:36:12.778Z",
"to": "2021-03-12T10:44:38.572Z"
},
"timepicker": {},
"timezone": "",
"title": "influxdb aggregation Copy",
"uid": "5l0aUYUGz",
"version": 3
}
# Telegraf Configuration
#
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
#
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
#
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
#
# Environment variables can be used anywhere in this config file, simply surround
# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"),
# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR})
# Global tags can be specified here in key="value" format.
[global_tags]
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
# rack = "1a"
## Environment variables can be used as tags, and throughout the config file
# user = "$USER"
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "10s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will send metrics to outputs in batches of at most
## metric_batch_size metrics.
## This controls the size of writes that Telegraf sends to output plugins.
metric_batch_size = 5000
## Maximum number of unwritten metrics per output. Increasing this value
## allows for longer periods of output downtime without dropping metrics at the
## cost of higher maximum memory usage.
metric_buffer_limit = 100000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"
## Default flushing interval for all outputs. Maximum flush_interval will be
## flush_interval + flush_jitter
flush_interval = "1s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## By default or when set to "0s", precision will be set to the same
## timestamp order as the collection interval, with the maximum being 1s.
## ie, when interval = "10s", precision will be "1s"
## when interval = "250ms", precision will be "1ms"
## Precision will NOT be used for service inputs. It is up to each individual
## service input to set the timestamp at the appropriate precision.
## Valid time units are "ns", "us" (or "µs"), "ms", "s".
precision = ""
## Log at debug level.
debug = true
## Log only error level messages.
# quiet = false
## Log target controls the destination for logs and can be one of "file",
## "stderr" or, on Windows, "eventlog". When set to "file", the output file
## is determined by the "logfile" setting.
logtarget = "stderr"
## Name of the file to be logged to when using the "file" logtarget. If set to
## the empty string then logs are written to stderr.
# logfile = ""
## The logfile will be rotated after the time interval specified. When set
## to 0 no time based rotation is performed. Logs are rotated only when
## written to, if there is no log activity rotation may be delayed.
# logfile_rotation_interval = "0d"
## The logfile will be rotated when it becomes larger than the specified
## size. When set to 0 no size based rotation is performed.
# logfile_rotation_max_size = "0MB"
## Maximum number of rotated archives to keep, any older logs are deleted.
## If set to -1, no archives are removed.
# logfile_rotation_max_archives = 5
## Override default hostname, if empty use os.Hostname()
hostname = ""
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = false
###############################################################################
# OUTPUT PLUGINS #
###############################################################################
# Configuration for sending metrics to InfluxDB
[[outputs.influxdb_v2]]
## The URLs of the InfluxDB cluster nodes.
##
## Multiple URLs can be specified for a single cluster, only ONE of the
## urls will be written to each interval.
## ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"]
urls = ["http://127.0.0.1:8086"]
## Token for authentication.
token = "WtcQX31NksChPXruKd0uHV-yZoS8LA9UqoykB2PbXa7hw4cdXELwiylFCOe1pHMCPYLkOFtAFAAAhX2Fv7QOxg=="
## Organization is the name of the organization you wish to write to; must exist.
organization = "m.org"
## Destination bucket to write into.
bucket = "m.bucket"
## The value of this tag will be used to determine the bucket. If this
## tag is not set the 'bucket' option is used as the default.
# bucket_tag = ""
## If true, the bucket tag will not be added to the metric.
# exclude_bucket_tag = false
## Timeout for HTTP messages.
# timeout = "5s"
## Additional HTTP headers
# http_headers = {"X-Special-Header" = "Special-Value"}
## HTTP Proxy override, if unset values the standard proxy environment
## variables are consulted to determine which proxy, if any, should be used.
# http_proxy = "http://corporate.proxy:3128"
## HTTP User-Agent
# user_agent = "telegraf"
## Content-Encoding for write request body, can be set to "gzip" to
## compress body or "identity" to apply no encoding.
content_encoding = "gzip"
## Enable or disable uint support for writing uints influxdb 2.0.
# influx_uint_support = false
## Optional TLS Config for use on HTTP connections.
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
# # Create aggregate histograms.
[[aggregators.histogram]]
## The period in which to flush the aggregator.
period = "5s"
## If true, the histogram will be reset on flush instead
## of accumulating the results.
reset = true
## Whether bucket values should be accumulated. If set to false, "gt" tag will be added.
## Defaults to true.
cumulative = false
grace="10s"
namepass=["http_req_duration"]
## If true, the original metric will be dropped by the
## aggregator and will not get sent to the output plugins.
drop_original = true
## Example config that aggregates all fields of the metric.
[[aggregators.histogram.config]]
## Right borders of buckets (with +Inf implicitly added).
buckets = [0.0, 50.0, 100.0, 120.0, 150.0, 200.0, 500.0, 1000.0, 2000.0]
## The name of metric.
measurement_name="http_req_duration"
fields = ["value"]
# Accept metrics over InfluxDB 1.x HTTP API
[[inputs.influxdb_listener]]
## Address and port to host InfluxDB listener on
service_address = ":8186"
## maximum duration before timing out read of the request
read_timeout = "10s"
## maximum duration before timing out write of the response
write_timeout = "10s"
## Maximum allowed HTTP request body size in bytes.
## 0 means to use the default of 32MiB.
max_body_size = "32MiB"
namepass=["http_req_duration", "vus"]
## Optional tag name used to store the database.
## If the write has a database in the query string then it will be kept in this tag name.
## This tag can be used in downstream outputs.
## The default value of nothing means it will be off and the database will not be recorded.
# database_tag = ""
## If set the retention policy specified in the write query will be added as
## the value of this tag name.
# retention_policy_tag = ""
## Set one or more allowed client CA certificate file names to
## enable mutually authenticated TLS connections
# tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
## Add service certificate and key
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Optional username and password to accept for HTTP basic authentication.
## You probably want to make sure you have TLS configured above for this.
# basic_username = "foobar"
# basic_password = "barfoo"
@mstoykov
Copy link
Author

image

problems:

  1. you need to know your buckets
  2. pretty bad performance with 1k RPS even with the namepass at 1k it starts dropping metrics from the aggregation and uses 400% CPU for me

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment