Skip to content

Instantly share code, notes, and snippets.

@agup006
Created December 23, 2022 18:12
Show Gist options
  • Save agup006/75fc186a99cfdeb57879a897e41067dd to your computer and use it in GitHub Desktop.
Save agup006/75fc186a99cfdeb57879a897e41067dd to your computer and use it in GitHub Desktop.
local dquote = ('"'):byte()
local function extract_simple(record, start)
local index = record:find(',', start)
local stop_index
local next_offset
if index ~= nil then
stop_index = index - 1
next_offset = index + 1
end
return start, stop_index, next_offset
end
local function extract_quoted(record, start)
start = start + 1
local offset = start
while true do
local index = record:find('"', offset)
local next_index = index + 1
local next_byte = record:byte(next_index)
if next_byte ~= dquote then
-- found the end index of the field, return it
return start, index - 1, index + 2
end
offset = index + 2 -- advance both dquotes
end
end
local function split_csv(record)
local rv = {}
local offset = 1
while offset ~= nil do
local start_idx
local stop_idx
if record:byte(offset) == dquote then
start_idx, stop_idx, offset = extract_quoted(record, offset)
else
start_idx, stop_idx, offset = extract_simple(record, offset)
end
table.insert(rv, record:sub(start_idx, stop_idx))
end
return rv
end
local function parse_message(message, target)
if not message then
return
end
for k, v in message:gmatch('(%S+)=(%S+),?') do
target[k:gsub('^{', '')] = v:gsub('}$', ''):gsub(',$', '')
end
end
local function parse_ms(record, key)
if record[key] ~= nil then
record[key] = tonumber(record[key]:gsub('ms$',''), 10)
end
end
local function normalize_urls(record)
local key = 'BO-API-URL'
if record[key] == nil then
return
end
record[key] = record[key]
:gsub('[0-9a-fA-F]+-[0-9a-fA-F]+-[0-9a-fA-F]+-[0-9a-fA-F]+-[0-9a-fA-F]+', '?')
:gsub('[0-9A-Z]+-[0-9A-Z]+-[0-9A-Z]+', '?')
:gsub('%d+', '?')
:gsub('instruments/[^/]+', '?')
:gsub('funding/([^/]+)/[^/]+', 'funding/%1/?')
:gsub('orders/[^/]+', 'orders/?')
end
local function allow_keys(record, allowed)
local rv = {}
for key, value in pairs(record) do
for _, allowed_key in ipairs(allowed) do
if key == allowed_key then
rv[key] = value
break
end
end
end
return rv
end
-- declare a module variable to hold header
local headers = {}
function cb_filter(tag, timestamp, record)
if headers[tag] == nil then
headers[tag] = split_csv(record._raw)
-- return -1 to drop the header
return -1, timestamp, record
end
local data = {}
local fields = split_csv(record._raw)
for index, header in ipairs(headers[tag]) do
data[header] = fields[index]
end
parse_message(data.message, data)
normalize_urls(data)
parse_ms(data, 'PROCESSING-TIME')
timestamp = (data.messageTime)/1000
data = allow_keys(data, {'BO-API-URL', 'BO-API-HTTP-METHOD', 'RESPONSE-STATUS-CODE', 'API-PARTNER', 'PROCESSING-TIME' })
data.timestamp = timestamp
return 1, timestamp, data
end
[INPUT]
Name go-s3-replay-plugin
Tag bo-api
threaded on
aws_access_key {{ secrets.aws_access_key }}
aws_secret_key {{ secrets.aws_secret_key }}
aws_bucket_name bucket-calyptia-0011
aws_bucket_region us-east-2
logs statsLoggerHistoricalSV_2022-12-01_03_33_18-32d4ddb4-d372-47e7-9915-e993946a9401.csv
threaded on
[Filter]
Name lua
Match bo-api
script {{ files.csvlua }}
call cb_filter
[FILTER]
name parser
key_name timestamp
parser drivewealth
[FILTER]
name modify
remove timestamp
match *
[OUTPUT]
Name stdout
format json
match *
[OUTPUT]
Name s3
workers 1
Match *
aws_shared_credentials_file {{ files.creds }}
json_date_format java_sql_timestamp
bucket bucket-calyptia-0011
region us-east-2
use_put_object on
s3_key_format /$TAG/ARROWTEST/%Y/%m/%d/%H/%M/%S
store_dir /data/storage/s3
upload_timeout 10s
[PARSER]
Name apache
Format regex
Regex ^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name apache2
Format regex
Regex ^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>.*)")?$
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name apache_error
Format regex
Regex ^\[[^ ]* (?<time>[^\]]*)\] \[(?<level>[^\]]*)\](?: \[pid (?<pid>[^\]]*)\])?( \[client (?<client>[^\]]*)\])? (?<message>.*)$
[PARSER]
Name nginx
Format regex
Regex ^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
# https://rubular.com/r/IhIbCAIs7ImOkc
Name k8s-nginx-ingress
Format regex
Regex ^(?<host>[^ ]*) - (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "(?<referer>[^\"]*)" "(?<agent>[^\"]*)" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] (\[(?<proxy_alternative_upstream_name>[^ ]*)\] )?(?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<reg_id>[^ ]*).*$
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name json
Format json
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
[PARSER]
Name docker
Format json
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L
Time_Keep On
# --
# Since Fluent Bit v1.2, if you are parsing Docker logs and using
# the Kubernetes filter, it's not longer required to decode the
# 'log' key.
#
# Command | Decoder | Field | Optional Action
# =============|==================|=================
#Decode_Field_As json log
[PARSER]
Name docker-daemon
Format regex
Regex time="(?<time>[^ ]*)" level=(?<level>[^ ]*) msg="(?<msg>[^ ].*)"
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L
Time_Keep On
[PARSER]
Name syslog-rfc5424
Format regex
Regex ^\<(?<pri>[0-9]{1,5})\>1 (?<time>[^ ]+) (?<host>[^ ]+) (?<ident>[^ ]+) (?<pid>[-0-9]+) (?<msgid>[^ ]+) (?<extradata>(\[(.*?)\]|-)) (?<message>.+)$
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L%z
Time_Keep On
[PARSER]
Name syslog-rfc3164-local
Format regex
Regex ^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$
Time_Key time
Time_Format %b %d %H:%M:%S
Time_Keep On
[PARSER]
Name syslog-rfc3164
Format regex
Regex /^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
Time_Key time
Time_Format %b %d %H:%M:%S
Time_Keep On
[PARSER]
Name drivewealth
Format regex
Regex ^(?<timestamp>.*)$
Time_Format %s.%L
Time_Key timestamp
[PARSER]
Name mongodb
Format regex
Regex ^(?<time>[^ ]*)\s+(?<severity>\w)\s+(?<component>[^ ]+)\s+\[(?<context>[^\]]+)]\s+(?<message>.*?) *(?<ms>(\d+))?(:?ms)?$
Time_Format %Y-%m-%dT%H:%M:%S.%L
Time_Keep On
Time_Key time
[PARSER]
# https://rubular.com/r/0VZmcYcLWMGAp1
Name envoy
Format regex
Regex ^\[(?<start_time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)? (?<protocol>\S+)" (?<code>[^ ]*) (?<response_flags>[^ ]*) (?<bytes_received>[^ ]*) (?<bytes_sent>[^ ]*) (?<duration>[^ ]*) (?<x_envoy_upstream_service_time>[^ ]*) "(?<x_forwarded_for>[^ ]*)" "(?<user_agent>[^\"]*)" "(?<request_id>[^\"]*)" "(?<authority>[^ ]*)" "(?<upstream_host>[^ ]*)"
Time_Format %Y-%m-%dT%H:%M:%S.%L%z
Time_Keep On
Time_Key start_time
[PARSER]
# https://rubular.com/r/17KGEdDClwiuDG
Name istio-envoy-proxy
Format regex
Regex ^\[(?<start_time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)? (?<protocol>\S+)" (?<response_code>[^ ]*) (?<response_flags>[^ ]*) (?<response_code_details>[^ ]*) (?<connection_termination_details>[^ ]*) (?<upstream_transport_failure_reason>[^ ]*) (?<bytes_received>[^ ]*) (?<bytes_sent>[^ ]*) (?<duration>[^ ]*) (?<x_envoy_upstream_service_time>[^ ]*) "(?<x_forwarded_for>[^ ]*)" "(?<user_agent>[^\"]*)" "(?<x_request_id>[^\"]*)" (?<authority>[^ ]*)" "(?<upstream_host>[^ ]*)" (?<upstream_cluster>[^ ]*) (?<upstream_local_address>[^ ]*) (?<downstream_local_address>[^ ]*) (?<downstream_remote_address>[^ ]*) (?<requested_server_name>[^ ]*) (?<route_name>[^ ]*)
Time_Format %Y-%m-%dT%H:%M:%S.%L%z
Time_Keep On
Time_Key start_time
[PARSER]
# http://rubular.com/r/tjUt3Awgg4
Name cri
Format regex
Regex ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<message>.*)$
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L%z
Time_Keep On
[PARSER]
Name json-decode
Format json
Decode_Field_As escaped_utf8 message
[PARSER]
Name kube-custom
Format regex
Regex (?<tag>[^.]+)?\.?(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
@ofthomas76
Copy link

[INPUT]
    log_level debug
    Name go-s3-replay-plugin
    Tag bo-api
    aws_access_key {{ secrets.aws_access_key }}
    aws_secret_key {{ secrets.aws_secret_key }}
    aws_bucket_name dev.dwt.apimetrics
    aws_bucket_region us-east-1
    logs source/*

[Filter]
    Name    lua
    Match   bo-api 
    script  {{ files.csvlua }}
    call    cb_filter

[FILTER]
    name    parser
    key_name timestamp 
    parser  drivewealth

[OUTPUT]
    Name stdout
    format json
    match *

[OUTPUT]
    Name s3
    Match bo-api
    workers 1
    aws_shared_credentials_file {{ files.creds }}
    bucket dev.dwt.apimetrics
    region us-east-1
    json_date_format java_sql_timestamp
    s3_key_format /$TAG/%Y/%m/%d/%H/%M/%S
    store_dir /data/storage/s3
    upload_timeout 10s

[OUTPUT]
    Name http
    Match bo-api
    host endpoint4.collection.sumologic.com
    port 443
    uri /receiver/v1/http/ZaVnC4dhaV3oPE-dgVqA22KhOtj1Y2nOMBkLzw_VFJqlaG9KwG_1s7PoB-8lDFANShQ1CIb8JT3y66IUoINcrFfwJ6zoGnIWyL2lJmeyRIKdbk2ncAVjuQ==
    format json_lines
    tls on
    json_date_key timestamp
    json_date_format iso8601

[OUTPUT]
    Name stdout
    format json
    match bo-api

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment