Skip to content

Instantly share code, notes, and snippets.

@MarkusH
Created December 5, 2019 10:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MarkusH/606d0958beb62ce5dc72f8686bc97f34 to your computer and use it in GitHub Desktop.
Save MarkusH/606d0958beb62ce5dc72f8686bc97f34 to your computer and use it in GitHub Desktop.
data_dir = "/var/lib/vector"
# Sources
[sources.nginx_access_logs]
type = "file"
exclude = ["/var/log/nginx/error.log"]
include = ["/var/log/nginx/*.log"]
file_key = "log"
host_key = "server"
[sources.nginx_access_logs.fingerprinting]
strategy = "device_and_inode"
# Transforms
[transforms.nginx_access_parser]
inputs = ["nginx_access_logs"]
type = "regex_parser"
field = "message"
drop_field = false
regex = '^(?P<remote>[^ ]*) ([^ ]*) (?P<ident>[^ ]*) \[(?P<timestamp>[^\]]+)\] "(?P<method>\S+)(?: +(?P<path>[^"]*?)(?: +\S*)?)?" (?P<code>[^ ]*) (?P<size>[^ ]*) "(?P<referer>[^"]*)" "(?P<agent>[^"]*)"$'
[transforms.nginx_access_parser.types]
code = "int"
size = "int"
timestamp = "timestamp|%d/%b/%Y:%H:%M:%S %z"
[transforms.nginx_access_geoip]
inputs = ["nginx_access_parser"]
type = "geoip"
source = "remote"
target = "remote_geoip"
database = "/usr/share/GeoIP/GeoLite2-City.mmdb"
[transforms.nginx_access_host]
inputs = ["nginx_access_geoip"]
type = "regex_parser"
field = "log"
drop_field = false
regex = '(?P<host>[^/]+)\.log$'
[transforms.transform_host]
inputs = ["nginx_access_host"]
type = "lua"
source = 'if event["host"] == "access" then event["host"] = ""; end'
# Sinks
[sinks.cratedb_nginx]
# CREATE TABLE "logging"."nginx_access" (
# "agent" TEXT,
# "code" SMALLINT,
# "host" TEXT,
# "ident" TEXT,
# "log" TEXT,
# "message" TEXT,
# "method" TEXT,
# "path" TEXT,
# "referer" TEXT,
# "remote" IP,
# "remote_geoip" OBJECT(STRICT) AS (
# "city_name" TEXT,
# "continent_code" TEXT,
# "country_code" TEXT,
# "latitude" REAL,
# "longitude" REAL,
# "postal_code" TEXT,
# "timezone" TEXT
# ),
# "server" TEXT,
# "size" BIGINT,
# "timestamp" TIMESTAMP WITH TIME ZONE NOT NULL
# )
# CLUSTERED INTO 4 SHARDS
# PARTITIONED BY ("host")
# WITH (
# column_policy = 'strict',
# number_of_replicas = '0'
# )
inputs = ["transform_host"]
batch_size = 10000
type = "cratedb"
host = "http://******:4200"
user = "*****"
password = "*****"
schema = "logging"
table = "nginx_access"
columns = ["timestamp", "message", "remote", "remote_geoip", "host", "ident", "method", "path", "code", "size", "referer", "agent", "log", "server"]
keys = ["timestamp", "message", "remote", "remote_geoip", "host", "ident", "method", "path", "code", "size", "referer", "agent", "log", "server"]
request_rate_limit_num = 50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment