Skip to content

Instantly share code, notes, and snippets.

@davidkyle
Last active December 12, 2020 09:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidkyle/0bc9c84d60251c468a9e927c7228ba2d to your computer and use it in GitHub Desktop.
Save davidkyle/0bc9c84d60251c468a9e927c7228ba2d to your computer and use it in GitHub Desktop.
Aggregations and Visualisations supporting the Inference Pipeline Agg blog

Getting started with the Inference Aggregation

First ingest the customer churn data and trained model

Ingest the linked csv files above into Elastic using the Kibana Data Visualizer. You will find the Data Visualizer in the Machine Learning section. Ingest into the indices calls and customers.

Grab the trained inference model and upload it to your cluster with this curl command changing user:password and localhost:9200 as appropriate.

curl -u user:password -XPUT -H "Content-Type: application/json" "http://localhost:9200/_ml/inference/telco_churn" -d @telco_churn_model.json

Now try the queries an dashboard

GET calls,customers/_search
{
"size": 0,
"aggs": {
"phone_number": {
"composite": {
"size": 100,
"sources": [
{
"phone_number": {
"terms": {
"field": "phone_number"
}
}
}
]
},
"aggs": {
"call_charges": {
"sum": {
"field": "call_charges"
}
},
"call_duration": {
"sum": {
"field": "call_duration"
}
},
"call_count": {
"value_count": {
"field": "dialled_number"
}
},
"customer_service_calls": {
"sum": {
"field": "customer_service_calls"
}
},
"number_vmail_messages": {
"sum": {
"field": "number_vmail_messages"
}
},
"account_length": {
"scripted_metric": {
"init_script": "state.account_length = 0",
"map_script": "state.account_length = params._source.account_length",
"combine_script": "return state.account_length",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"international_plan": {
"scripted_metric": {
"init_script": "state.international_plan = ''",
"map_script": "state.international_plan = params._source.international_plan",
"combine_script": "return state.international_plan",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"voice_mail_plan": {
"scripted_metric": {
"init_script": "state.voice_mail_plan = ''",
"map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
"combine_script": "return state.voice_mail_plan",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"state": {
"scripted_metric": {
"init_script": "state.state = ''",
"map_script": "state.state = params._source.state",
"combine_script": "return state.state",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"churn_classification": {
"inference": {
"model_id": "telco_churn",
"inference_config": {
"classification": {
"prediction_field_type": "number"
}
},
"buckets_path": {
"account_length": "account_length.value",
"call_charges": "call_charges.value",
"call_count": "call_count.value",
"call_duration": "call_duration.value",
"customer_service_calls": "customer_service_calls.value",
"international_plan": "international_plan.value",
"number_vmail_messages": "number_vmail_messages.value",
"state": "state.value"
}
}
}
}
}
}
}
GET calls,customers/_search
{
"size": 0,
"aggs": {
"phone_number": {
"composite": {
"size": 100,
"sources": [
{
"phone_number": {
"terms": {
"field": "phone_number"
}
}
}
]
},
"aggs": {
"call_charges": {
"sum": {
"field": "call_charges"
}
},
"call_duration": {
"sum": {
"field": "call_duration"
}
},
"call_count": {
"value_count": {
"field": "dialled_number"
}
},
"customer_service_calls": {
"sum": {
"field": "customer_service_calls"
}
},
"number_vmail_messages": {
"sum": {
"field": "number_vmail_messages"
}
},
"account_length": {
"scripted_metric": {
"init_script": "state.account_length = 0",
"map_script": "state.account_length = params._source.account_length",
"combine_script": "return state.account_length",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"international_plan": {
"scripted_metric": {
"init_script": "state.international_plan = ''",
"map_script": "state.international_plan = params._source.international_plan",
"combine_script": "return state.international_plan",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"voice_mail_plan": {
"scripted_metric": {
"init_script": "state.voice_mail_plan = ''",
"map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
"combine_script": "return state.voice_mail_plan",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"state": {
"scripted_metric": {
"init_script": "state.state = ''",
"map_script": "state.state = params._source.state",
"combine_script": "return state.state",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"churn_classification": {
"inference": {
"model_id": "telco_churn",
"inference_config": {
"classification": {
"prediction_field_type": "number"
}
},
"buckets_path": {
"account_length": "account_length.value",
"call_charges": "call_charges.value",
"call_count": "call_count.value",
"call_duration": "call_duration.value",
"customer_service_calls": "customer_service_calls.value",
"international_plan": "international_plan.value",
"number_vmail_messages": "number_vmail_messages.value",
"state": "state.value"
}
}
},
"will_churn_filter": {
"bucket_selector": {
"buckets_path": {
"will_churn": "churn_classification>value"
},
"script": "params.will_churn > 0"
}
}
}
}
}
}
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json"
"title": "Telco Churn Predictions"
// Define the data source
"data": {
"url": {
// Do not apply dashboard context filters
"%context%": false
// Which index to search
"index": "calls,customers"
// The Inference Pipeline agg
"body": {
"size": 0,
"aggs": {
"phone_number": {
"composite": {
"size": 100,
"sources": [
{
"phone_number": {
"terms": {
"field": "phone_number"
}
}
}
]
},
"aggs": {
"call_charges": {
"sum": {
"field": "call_charges"
}
},
"call_duration": {
"sum": {
"field": "call_duration"
}
},
"call_count": {
"value_count": {
"field": "dialled_number"
}
},
"customer_service_calls": {
"sum": {
"field": "customer_service_calls"
}
},
"number_vmail_messages": {
"sum": {
"field": "number_vmail_messages"
}
},
"account_length": {
"scripted_metric": {
"init_script": "state.account_length = 0",
"map_script": "state.account_length = params._source.account_length",
"combine_script": "return state.account_length",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"international_plan": {
"scripted_metric": {
"init_script": "state.international_plan = ''",
"map_script": "state.international_plan = params._source.international_plan",
"combine_script": "return state.international_plan",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"voice_mail_plan": {
"scripted_metric": {
"init_script": "state.voice_mail_plan = ''",
"map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
"combine_script": "return state.voice_mail_plan",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"state": {
"scripted_metric": {
"init_script": "state.state = ''",
"map_script": "state.state = params._source.state",
"combine_script": "return state.state",
"reduce_script": "for (d in states) if (d != null) return d"
}
},
"churn_classification": {
"inference": {
"model_id": "telco_churn",
"inference_config": {
"classification": {
"prediction_field_type": "number"
}
},
"buckets_path": {
"account_length": "account_length.value",
"call_charges": "call_charges.value",
"call_count": "call_count.value",
"call_duration": "call_duration.value",
"customer_service_calls": "customer_service_calls.value",
"international_plan": "international_plan.value",
"number_vmail_messages": "number_vmail_messages.value",
"state": "state.value"
}
}
}
}
}
}
}
}
/*
For our graph, we only need the list of bucket values. Use the format.property to discard everything else.
*/
"format": {"property": "aggregations.phone_number.buckets"}
},
/*
The aggregation result tree needs to be transformed into a format we can plot.
In this case group by the predicted class and count the docs of each class.
First give the classification field a expressive name
*/
"transform": [
{
"lookup": "churn_classification.value",
"from": {
"data": {
"values": [
{"category": 0, "classification_class": "Won't Churn"},
{"category": 1, "classification_class": "Will Churn"}
]
},
"key": "category",
"fields": ["classification_class"]
}
},
{
"aggregate": [
{
"op": "count",
"as": "class_count"
}
],
"groupby": ["classification_class"]
}
],
"mark": "arc",
"encoding": {
"theta": {"field": "class_count", "type": "quantitative"},
"color": {"field": "classification_class", "type": "nominal", "legend": {"title": null }}
},
"view": {"stroke": null}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment