Skip to content

Instantly share code, notes, and snippets.

@javasoze
Last active July 25, 2017 05:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save javasoze/c681f5820417399d512d405bd2e92207 to your computer and use it in GitHub Desktop.
Save javasoze/c681f5820417399d512d405bd2e92207 to your computer and use it in GitHub Desktop.
Dashbase Configurations
# base directory for storage
baseDir: myDataDir # e.g. /data/index
# how to handle old time-slices
retention:
numDays: number_days # number of days before expiring time-slices. This number can be a fraction e.g. 0.5 days
expirationHandler:
type: purge # purge the expired time-slices
# cluster configuration
cluster:
name: clusterName # name of the cluster, e.g. myNginxLogs
url: zookeeper_urls # zookeeper urls, e.g. zk1:2181,zk2,2181
# indexer configuration
# We suggest default indexing values are kept
indexer:
# the following parameters configure how often to flush the timeslice,
# the indexer will trigger a flush when either one of the conditions are met
maxDocsPerSegment: number_events # maximum number of events to gather before flushing the timeslice, default 1,000,000
maxIndexingDelayInSec: number_seconds # how long to wait before flushing the timeslices, default 45 seconds
numIndexingThreads: number_threads # number of indexing threads, default 2
# aws cloudwatch firehose that reads from a logGrouop and logStream
type: cloudwatch
params:
group: myLogGroup # log group
stream: myLogStream # log stream
# Recursively streaming in all files of a given pattern in a specified directory, where each file is line-delimited
type: dir
params:
baseDir: mydir # directory name, e.g. /var/logs
pattern: filePattern # regex pattern for file names, e.g. ^(.*)nginx(.*).log
# streams from a line-delimited file
type: file
params:
file: myFile # file name
# Pushed based firehose listening on the /v1/firehose/http/insert end-point in a Dashbase server
type: http
# Kafka 0.10.x firehose (https://kafka.apache.org/)
type: kafka10
params:
hosts: brokerHosts # broker hosts
groupId: myGroupId # consumer group id
topic: myTopic # kafka topic
pollIntervalMs: interval # poll interval in ms, default 1000ms
partitions: partitionList # list of partitions to stream, if empty, will be managed automatically
kafkaProps: props # additional kafka properties, see: https://kafka.apache.org/documentation/#consumerconfigs
# firehose that streams from a NSQ queue
type: nsq
params:
lookupAddress: host # nsq host, default: localhost
lookupPort: port # nsq port, default: 4161
topic: myTopic # topic to read from
channel: myChannel # channel
queueSize: 1000000 # queue buffer size: default 1,000,000
# aws s3 firehose that reads a s3 bucket
type: s3
params:
bucket: myBucket # bucket name
prefix: myService/logs # optional prefix of sub-buckets
start-after: 2017-04-09/ # optional parameter to start from a order list of files, useful for files organized with dates
pattern: "((19|20)\\d\\d)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])" # only match file names matching the pattern
gzipped: true # optinal flag to apply gzip decoding on the files
# syslog firehose that listens on a port with provided protocol
type: syslog
params:
port: 32376 # port to listen on, default 32376
protocol: udp # udp/tcp, default udp
# parses a line of csv (comma separated value), with schema defined in the pattern section
# e.g.
# 127.0.0.1,blah blah,2017/07/30,this is a message,200,Safari Webkit
type: csv
params:
pattern:
timeFormat: yyyy/MM/dd
schema:
- name: host
type: meta
- name: -
- name: time
type: time
- name: message
type: text
- name: responseCode
type: meta
- name: agent
type: text
# parses a json string
type: json
params:
flatten: # if configured, json will be flattened, details visit: https://github.com/wnameless/json-flattener
separator: '.' # separator char, default: '.'
keepArray: false
unicode: false
pattern: # by default, string values will be configured as text, and numbers will be configured as numeric
schema: # the schema overrides the defaults on a field
- name: timestamp # look at the "timestamp" field for time value.
# If not specified, auto time is configured, which takes current system time
type: time
- name: myMeta # overrides a field to a different type
type: meta
timeFormat: epoch # time format if provided in the data, e.g. epoch, iso, or a specific format string
# parser a line of key,value pairs delimited by the specified delimiter, key and values are separated by a separator
# e.g. key1:val1,key2:val2, if separator is ":" and delimiter is ","
type: keyvalue
params:
separator: ":"
delimiter: ","
pattern:
timeFormat: epoch
schema:
- name: type
type: meta
- name: time
type: time
- name: message
type: text
- name: number
type: numeric
# parses an arbitrary string with regular expression
# this is an example of parsing nginx access and error logs
type: regex
params:
patterns: # a list of patterns, the parser will evaluate in order until one is matched.
- regex: "^([\\d.]+) (\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (.+?)\" (\\d{3}) (\\d+) (\\d+) \"([^\"]+)\" \"([^\"]+)\""
schema:
- name: host
type: meta
- type: '-' # '-' tells parser to ignore this group
- type: '-'
- type: '-'
- type: time
- name: request
type: text
- name: response
type: meta
- name: bytesSent
type: numeric
- type: '-'
- type: '-'
- name: agent
type: text
timeFormat: "dd/MMM/yyyy:HH:mm:ss Z" # time format
- regex: "^(\\d{4}/\\d{2}/\\d{2} \\d{2}:\\d{2}:\\d{2}) \\[(\\S+)\\] (\\d+)\\#(\\d+)\\: \\*(\\d+) (.+) client: (.+), server: (.+)"
schema:
- type: time
- name: level
type: meta
- type: '-'
- type: '-'
- type: '-'
- name: message
type: text
- name: host
type: meta
timeFormat: "yyyy/MM/dd HH:mm:ss"
additionalColumns: # additional manual columns to create if matches
- name: error
type: meta
value: true
#dashbase configuration
dashbase:
# cluster configuration
cluster:
name: my_logs
url: localhost:2181
# base directory
baseDir: /data/index
# firehose configuration
firehose:
type: dir
params:
baseDir: /data/input/nginx
pattern: ^(.*)nginx(.*).log
# indexing parameters (optional)
indexer:
maxDocsPerSegment: 1000000
# retention parameters
retention:
expirationHandler:
params:
type: purge
numDays: 14
# parser configuraiton
parser:
flatten:
separator: '.'
pattern: # by default, string values will be configured as text, and numbers will be configured as numeric
schema: # the schema overrides the defaults on a field
- name: timestamp # look at the "timestamp" field for time value.
# If not specified, auto time is configured, which takes current system time
type: time
- name: myMeta # overrides a field to a different type
type: meta
timeFormat: epoch
# dropwizard server configuration
server:
applicationConnectors:
- type: http
port: 7888
applicationContextPath: /
adminContextPath: /admin
adminConnectors:
- type: http
port: 7887
# dropwizard logging configuration
logging:
level: INFO
appenders:
- type: file
currentLogFilename: logs/dashbase.log
archive: true
archivedLogFilenamePattern: logs/dashbase-%d.log
archivedFileCount: 9
timeZone: UTC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment