Skip to content

Instantly share code, notes, and snippets.

@andrewkroh
Last active October 11, 2019 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewkroh/917e6cbe9ce57e77f5914a0b0660e626 to your computer and use it in GitHub Desktop.
Save andrewkroh/917e6cbe9ce57e77f5914a0b0660e626 to your computer and use it in GitHub Desktop.
Elasticsearch Ingest Node Enrich Processor Example - Top 1M Domain Ranks
PUT /_ingest/pipeline/domain_rank_enrichment
{
"description" : "Enriching domains with rank.",
"processors" : [
{
"enrich" : {
"policy_name": "dns-domain-top1m-rank",
"field" : "dns.question.name",
"target_field": "_temp"
}
},
{
"script": {
"if": "ctx._temp != null",
"source": """
def enrichment = ctx._temp[0];
ctx.remove('_temp');
ctx.dns.question.rank = enrichment.rank;
"""
}
}
]
}
PUT /_enrich/policy/dns-domain-top1m-rank
{
"match": {
"indices": "top1m-domains-2019.10.03",
"match_field": "domain",
"enrich_fields": ["rank"]
}
}
POST /_enrich/policy/dns-domain-top1m-rank/_execute
filebeat.inputs:
- type: log
paths:
# Download and unpack the file from:
# http://s3-us-west-1.amazonaws.com/umbrella-static/index.html
- top-1m.csv
processors:
- add_fields:
fields:
event.module: top1m
target: ""
- decode_csv_fields:
fields:
message: top1m
separator: ','
- extract_array:
field: top1m
mappings:
rank: 0
domain: 1
- convert:
fields:
- {from: rank, to: '@metadata.id'}
- convert:
fields:
- {from: rank, type: integer}
- drop_fields:
fields: [top1m, message]
ignore_missing: true
output.elasticsearch:
hosts:
- http://localhost:9200
indices:
- index: "top1m-domains-%{+yyyy.MM.dd}"
when.equals:
event.module: top1m
POST /_ingest/pipeline/domain_rank_enrichment/_simulate
{
"docs": [
{
"_index": "index",
"_id": "1",
"_source": {
"dns": {
"question": {
"name": "www.google.com"
}
}
}
}
,
{
"_index": "index",
"_id": "1",
"_source": {
"dns": {
"question": {
"name": "not.in.the.top1m.domains.com"
}
}
}
}
]
}
{
"docs" : [
{
"doc" : {
"_index" : "index",
"_type" : "_doc",
"_id" : "1",
"_source" : {
"dns" : {
"question" : {
"name" : "not.in.the.top1m.domains.com"
}
}
},
"_ingest" : {
"timestamp" : "2019-10-11T14:53:47.952376Z"
}
}
},
{
"doc" : {
"_index" : "index",
"_type" : "_doc",
"_id" : "1",
"_source" : {
"dns" : {
"question" : {
"name" : "www.google.com",
"rank" : 15
}
}
},
"_ingest" : {
"timestamp" : "2019-10-11T14:53:47.952366Z"
}
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment