Skip to content

Instantly share code, notes, and snippets.

@ludwigm
Created January 4, 2016 14:14
Show Gist options
  • Save ludwigm/f0b51211f968ee305b4a to your computer and use it in GitHub Desktop.
Save ludwigm/f0b51211f968ee305b4a to your computer and use it in GitHub Desktop.
ElasticSearch for Analytics presentation - Paste buffer
bin/elasticsearch
bin/plugin install mobz/elasticsearch-head // Broken in ES 2.0
bin/kibana -dev
http://localhost:9200/
http://localhost:9200/_plugin/head/
http://localhost:5601/
http://localhost:5601/app/sense
chrome-extension://lhjgkmllcaadmopgmanpapmpjgmfcfig/index.html
OR Postman
Data creation and intro
DELETE /demo/
GET _cat/indices?v
POST /demo/transactions
{
"product": "iPod",
"price": 5,
"date": "2015-01-01T00:00.000Z"
}
POST /demo/customer
{
"dateOfBirth": "1987-09-17",
"gender": "male",
"zipCode": "85586"
}
POST /demo/transaction_enriched
{
"product": "iPod",
"price": 5,
"date": "2015-01-01T00:00.000Z",
"customer": {
"dateOfBirth": "1987-09-17",
"gender": "male",
"zipCode": "85586"
}
}
GET _cat/indices?v
GET /demo/_mapping/
DELETE /demo/
PUT /demo/transactions/_bulk
{"index": {}}
{"product": "iPod", "price": 5, "date": "2015-01-01T00:00.000Z"}
{"index": {}}
{"product": "TV", "price": 1, "date": "2015-01-02T00:00.000Z"}
{"index": {}}
{"product": "Hifi", "price": 2, "date": "2015-01-03T00:00.000Z"}
{"index": {}}
{"product": "Car", "price": 5, "date": "2015-01-04T00:00.000Z"}
{"index": {}}
{"product": "iPod", "price": 50, "date": "2015-02-01T00:00.000Z"}
{"index": {}}
{"product": "TV", "price": 5, "date": "2015-02-02T00:00.000Z"}
GET _stats/indices
GET /demo/_search
{
"query": {
"match_all": {}
}
}
PUT /demo/customer/_bulk
{"index": {}}
{"dateOfBirth": "1987-09-17", "gender": "male", "zipCode" : "85586"}
{"index": {}}
{"dateOfBirth": "1995-01-01", "gender": "female", "zipCode" : "80804"}
{"index": {}}
{"dateOfBirth": "1937-01-01", "gender": "female", "zipCode" : "12345"}
Do Kibana round-trip
Show filter with auto-completion
GET /demo/_search
{
"query": {
"term": {
"gender": {
"value": "male"
}
}
}
}
OR matching
GET /demo/_search
{
"query": {
"bool": {
"should": [
{
"term": {
"gender": {
"value": "male"
}
}
},
{
"term": {
"zipCode": {
"value": "12345"
}
}
}
]
}
}
}
Short alternative
GET /demo/_search
{
"query": {
"terms": {
"zipCode": [
"12345",
"85586"
]
}
}
}
Scripted fields:
GET demo/customer/_search
{
"query": {
"match_all": {}
},
"script_fields": {
"age": {
"script": "DateTime.now().year - doc['dateOfBirth'].date.year"
}
}
}
Show aggregation format
Range aggregation with scripted metrics
ET demo/customer/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"by_age_range": {
"range": {
"script": "DateTime.now().year - doc['dateOfBirth'].date.year",
"ranges": [
{
"from": 0,
"to": 20
},
{
"from": 20,
"to": 40
},
{
"from": 40,
"to": 100
}
]
}
}
}
}
Index complex objects
PUT /demo/transaction_enriched/_bulk
{"index": {}}
{"product": "iPod", "price": 5, "date": "2015-01-01T00:00.000Z", "customer" : {"dateOfBirth": "1987-09-17", "gender": "male", "zipCode" : "85586"}}
{"index": {}}
{"product": "TV", "price": 1, "date": "2015-01-02T00:00.000Z", "customer" : {"dateOfBirth": "1937-01-01", "gender": "female", "zipCode" : "12345"}}
{"index": {}}
{"product": "Hifi", "price": 2, "date": "2015-01-03T00:00.000Z", "customer" : {"dateOfBirth": "1987-09-17", "gender": "male", "zipCode" : "85586"}}
{"index": {}}
{"product": "Car", "price": 5, "date": "2015-01-04T00:00.000Z", "customer" : {"dateOfBirth": "1987-09-17", "gender": "male", "zipCode" : "85586"}}
{"index": {}}
{"product": "iPod", "price": 50, "date": "2015-02-01T00:00.000Z", "customer" : {"dateOfBirth": "1987-09-17", "gender": "male", "zipCode" : "85586"}}
{"index": {}}
{"product": "TV", "price": 5, "date": "2015-02-02T00:00.000Z", "customer" : {"dateOfBirth": "1937-01-01", "gender": "female", "zipCode" : "12345"}}
GET demo/transaction_enriched/_search
Aggregate on complex objects
GET demo/transaction_enriched/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"by_age_range": {
"range": {
"script": "DateTime.now().year - doc['customer.dateOfBirth'].date.year",
"ranges": [
{
"from": 0,
"to": 20
},
{
"from": 20,
"to": 40
},
{
"from": 20,
"to": 100
}
]
},
"aggs": {
"sum_by_age_group": {
"sum": {
"field": "price"
}
}
}
}
}
}
Date histogram
GET demo/transaction_enriched/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"by_month": {
"date_histogram": {
"field": "date",
"interval": "month"
}
}
}
}
Date histogram + aggregated metrics
GET demo/transaction_enriched/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"by_month": {
"date_histogram": {
"field": "date",
"interval": "month"
},
"aggs": {
"sum_by_month": {
"sum": {
"field": "price"
}
},
"avg_by_month": {
"avg": {
"field": "price"
}
},
"percentiles_by_month": {
"percentiles": {
"field": "price"
}
}
}
}
}
}
Add car data
DELETE /cars
PUT /cars/transactions/_bulk
{ "index": {}}
{ "price" : 10000, "color" : "red", "make" : "honda", "sold" : "2014-10-28" }
{ "index": {}}
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
{ "index": {}}
{ "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2014-05-18" }
{ "index": {}}
{ "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2014-07-02" }
{ "index": {}}
{ "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2014-08-19" }
{ "index": {}}
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
{ "index": {}}
{ "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2014-01-01" }
{ "index": {}}
{ "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2014-02-12" }
{ "index": {}}
{ "price" : 20000, "color" : "black", "make" : "Audi", "sold" : "2014-09-28" }
{ "index": {}}
{ "price" : 20000, "color" : "red", "make" : "Audi", "sold" : "2014-12-05" }
{ "index": {}}
{ "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2014-03-18" }
{ "index": {}}
{ "price" : 5000, "color" : "blue", "make" : "Tata", "sold" : "2014-07-02" }
{ "index": {}}
{ "price" : 7000, "color" : "green", "make" : "Tata", "sold" : "2014-09-19" }
{ "index": {}}
{ "price" : 20000, "color" : "red", "make" : "Audi", "sold" : "2014-05-05" }
{ "index": {}}
{ "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2014-01-01" }
{ "index": {}}
{ "price" : 25000, "color" : "blue", "make" : "honda", "sold" : "2014-01-12" }
{ "index": {}}
{ "price" : 22000, "color" : "blue", "make" : "honda", "sold" : "2014-02-12" }
{ "index": {}}
{ "price" : 23000, "color" : "blue", "make" : "honda", "sold" : "2014-03-12" }
{ "index": {}}
{ "price" : 18000, "color" : "blue", "make" : "honda", "sold" : "2014-03-12" }
{ "index": {}}
{ "price" : 45000, "color" : "yellow", "make" : "Audi", "sold" : "2014-04-05" }
Normal aggregations on cars
GET /cars/transactions/_search?search_type=count
{
"aggs":{
"sales_per_month":{
"date_histogram":{
"field":"sold",
"interval":"month",
"format":"yyyy-MM-dd"
},
"aggs":{
"monthly_sum":{
"sum":{
"field":"price"
}
}
}
}
}
}
Pipeline aggs: Average bucket
GET /cars/transactions/_search?search_type=count
{
"aggs":{
"sales_per_month":{
"date_histogram":{
"field":"sold",
"interval":"month",
"format":"yyyy-MM-dd"
},
"aggs":{
"monthly_sum":{
"sum":{
"field":"price"
}
}
}
},
"avg_monthly_sales":{
"avg_bucket":{
"buckets_path":"sales_per_month>monthly_sum"
}
}
}
}
Pipeline aggs: Complex with multiple aggs
GET /cars/transactions/_search?search_type=count
{
"aggs":{
"sales_per_month":{
"date_histogram":{
"field":"sold",
"interval":"month",
"format":"yyyy-MM-dd"
},
"aggs":{
"monthly_sum":{
"sum":{
"field":"price"
}
}
}
},
"sales_per_quarter":{
"date_histogram":{
"field":"sold",
"interval":"quarter",
"format":"yyyy-MM"
},
"aggs":{
"quarterly_sum":{
"sum":{
"field":"price"
}
}
}
},
"avg_monthly_sales":{
"avg_bucket":{
"buckets_path":"sales_per_month>monthly_sum"
}
},
"best_quarter_sale":{
"max_bucket":{
"buckets_path":"sales_per_quarter>quarterly_sum"
}
}
}
}
Pipeline aggs: Cumulative sums
GET /cars/transactions/_search?search_type=count
{
"aggs": {
"sales_per_month": {
"date_histogram": {
"field": "sold",
"interval": "month",
"format": "yyyy-MM-dd"
},
"aggs": {
"monthly_sum": {
"sum": {
"field": "price"
}
},
"cumulative_sales": {
"cumulative_sum": {
"buckets_path": "monthly_sum"
}
}
}
}
}
}
Pipeline aggs: Filter aggregation + Scripted pipeline agg
GET /cars/transactions/_search?search_type=count
{
"aggs": {
"sales_per_month": {
"date_histogram": {
"field": "sold",
"interval": "month",
"format": "yyyy-MM-dd"
},
"aggs": {
"monthly_sum": {
"sum": {
"field": "price"
}
},
"bmw_car": {
"filter": {
"term": {
"make": "bmw"
}
},
"aggs": {
"sales": {
"sum": {
"field": "price"
}
}
}
},
"bmw_percentage": {
"bucket_script": {
"buckets_path": {
"bmwSales": "bmw_car>sales",
"totalSales": "monthly_sum"
},
"script": "bmwSales / totalSales * 100"
}
}
}
}
}
}
Statistics: Extended stats
GET /cars/transactions/_search?search_type=count
{
"aggs":{
"sales_per_month":{
"date_histogram":{
"field":"sold",
"interval":"month",
"format":"yyyy-MM-dd"
},
"aggs":{
"stats":{
"extended_stats":{
"field":"price"
}
}
}
}
}
}
Pipeline aggs: Moving average (Timeline smoothing)
GET /cars/transactions/_search?search_type=count
{
"aggs": {
"sales_per_month": {
"date_histogram": {
"field": "sold",
"interval": "month",
"format": "yyyy-MM-dd"
},
"aggs": {
"stats": {
"extended_stats": {
"field": "price"
}
},
"movavg_mean": {
"moving_avg": {
"buckets_path": "stats.avg",
"window": 3,
"model": "ewma",
"settings": {
"alpha": 0.1
}
}
}
}
}
}
Edge Charts
http://localhost:8080/index2.html
$(document).ready(function() {
var esQuery = {
"aggs":{
"sales_per_month":{
"date_histogram":{
"field":"sold",
"interval":"month",
"format":"yyyy-MM-dd"
},
"aggs":{
"monthly_sum":{
"sum":{
"field":"price"
}
}
}
}
}
};
d3.xhr("http://localhost:9200/cars/transactions/_search?search_type=count")
.header("Content-Type", "application/json")
.post(
JSON.stringify(esQuery),
function(err, rawData){
var data = JSON.parse(rawData.response);
console.log("got response", data);
drawChart(data);
}
);
})
function drawChart(data) {
var buckets = data.aggregations.sales_per_month.buckets;
console.log("buckets: ", buckets);
var graphData = [];
_.forEach(buckets, function(bucket){
graphData.push({
x: bucket.key_as_string,
y: bucket.doc_count
});
})
var chart = d4.charts.column().outerWidth(800);
d3.select('#graph')
.datum(graphData)
.call(chart);
}
Edge Charts Kibana plugin
gulp dev
http://localhost:5601/app/edge-charts-plugin
Timelion
http://localhost:5601/app/timelion#/New-TimeLion-Sheet?_g=(refreshInterval:(display:Off,pause:!f,value:0),time:(from:now-5y,interval:'1M',mode:quick,to:now))&_a=(columns:3,interval:'1M',otherInterval:'1w',rows:3,selected:0,sheet:!('.es(*,%20metric%3D!'max:price!').bars(4).points(radius%3D3,%20weight%3D1)','(.es(*),%20.es(make:bmw)).lines(fill%3D3)','.es(make:bmw).divide(.es(*)).multiply(100).bars()','.es(*).derivative()'))
Maximum per month
.es(*, metric='max:price').bars(4).points(radius=3, weight=1)
Multiple charts
(.es(*), .es(make:bmw)).lines(fill=3)
Division with timelines
.es(make:bmw).divide(.es(*)).multiply(100).bars()
Derivative
.es(*).derivative()
Edge JVM
Scala
val client = ElasticClient.transport(ElasticsearchClientUri("elasticsearch://localhost:9300"))
val resp = client.execute{
search in "demo" / "transaction_enriched" aggregations(
aggregation datehistogram ("by_month") field("date") interval(DateHistogramInterval.MONTH)
aggregations(
aggregation avg("avg_by_month") field("price")
)
)
}.await
val hist:Histogram = resp.aggregations.get("by_month")
hist.getBuckets.map(bucket => {
val avg:Avg = bucket.getAggregations.get("avg_by_month")
println(s"${bucket.getKeyAsString}: ${avg.getValue}")
})
Java
Client client = TransportClient.builder().build()
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName("localhost"), 9300));
SearchResponse response = client.prepareSearch("demo")
.setTypes("transaction_enriched")
.addAggregation(
AggregationBuilders.dateHistogram("by_month")
.interval(DateHistogramInterval.MONTH)
.field("date")
.subAggregation(
AggregationBuilders.avg("avg_by_month")
.field("price")
)
)
.execute()
.actionGet();
Histogram agg = response.getAggregations().get("by_month");
agg.getBuckets().stream().forEach( bucket -> {
Avg subAgg = bucket.getAggregations().get("avg_by_month");
System.out.println("bucket = " + bucket.getKeyAsString() + ": " + subAgg.getValue());
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment