davidkyle/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Getting started with the Inference Aggregation

First ingest the customer churn data and trained model


calls.csv
customers.csv

Ingest the linked csv files above into Elastic using the Kibana Data Visualizer. You will find the Data Visualizer in the Machine Learning section. Ingest into the indices calls and customers.

telco_churn_model.json

Grab the trained inference model and upload it to your cluster with this curl command changing user:password and localhost:9200 as appropriate.
curl -u user:password -XPUT -H "Content-Type: application/json" "http://localhost:9200/_ml/inference/telco_churn" -d @telco_churn_model.json

Now try the queries an dashboard


churn_classification_query.json
vega_telco_churn_viz.js


## churn_classification_query.json
GET calls,customers/_search
{
  "size": 0,
  "aggs": {
    "phone_number": {
      "composite": {
        "size": 100,
        "sources": [
          {
            "phone_number": {
              "terms": {
                "field": "phone_number"
              }
            }
          }
        ]
      },
      "aggs": {
        "call_charges": {
          "sum": {
            "field": "call_charges"
          }
        },
        "call_duration": {
          "sum": {
            "field": "call_duration"
          }
        },
        "call_count": {
          "value_count": {
            "field": "dialled_number"
          }
        },
        "customer_service_calls": {
          "sum": {
            "field": "customer_service_calls"
          }
        },
        "number_vmail_messages": {
          "sum": {
            "field": "number_vmail_messages"
          }
        },
        "account_length": {
          "scripted_metric": {
            "init_script": "state.account_length = 0",
            "map_script": "state.account_length = params._source.account_length",
            "combine_script": "return state.account_length",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "international_plan": {
          "scripted_metric": {
            "init_script": "state.international_plan = ''",
            "map_script": "state.international_plan = params._source.international_plan",
            "combine_script": "return state.international_plan",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "voice_mail_plan": {
          "scripted_metric": {
            "init_script": "state.voice_mail_plan = ''",
            "map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
            "combine_script": "return state.voice_mail_plan",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "state": {
          "scripted_metric": {
            "init_script": "state.state = ''",
            "map_script": "state.state = params._source.state",
            "combine_script": "return state.state",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "churn_classification": {
          "inference": {
            "model_id": "telco_churn",
            "inference_config": {
              "classification": {
                "prediction_field_type": "number"
              }
            },
            "buckets_path": {
              "account_length": "account_length.value",
              "call_charges": "call_charges.value",
              "call_count": "call_count.value",
              "call_duration": "call_duration.value",
              "customer_service_calls": "customer_service_calls.value",
              "international_plan": "international_plan.value",
              "number_vmail_messages": "number_vmail_messages.value",
              "state": "state.value"
            }
          }
        }
      }
    }
  }
}

## churn_classification_query_with_churn_filter.json
GET calls,customers/_search
{
  "size": 0,
  "aggs": {
    "phone_number": {
      "composite": {
        "size": 100,
        "sources": [
          {
            "phone_number": {
              "terms": {
                "field": "phone_number"
              }
            }
          }
        ]
      },
      "aggs": {
        "call_charges": {
          "sum": {
            "field": "call_charges"
          }
        },
        "call_duration": {
          "sum": {
            "field": "call_duration"
          }
        },
        "call_count": {
          "value_count": {
            "field": "dialled_number"
          }
        },
        "customer_service_calls": {
          "sum": {
            "field": "customer_service_calls"
          }
        },
        "number_vmail_messages": {
          "sum": {
            "field": "number_vmail_messages"
          }
        },
        "account_length": {
          "scripted_metric": {
            "init_script": "state.account_length = 0",
            "map_script": "state.account_length = params._source.account_length",
            "combine_script": "return state.account_length",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "international_plan": {
          "scripted_metric": {
            "init_script": "state.international_plan = ''",
            "map_script": "state.international_plan = params._source.international_plan",
            "combine_script": "return state.international_plan",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "voice_mail_plan": {
          "scripted_metric": {
            "init_script": "state.voice_mail_plan = ''",
            "map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
            "combine_script": "return state.voice_mail_plan",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "state": {
          "scripted_metric": {
            "init_script": "state.state = ''",
            "map_script": "state.state = params._source.state",
            "combine_script": "return state.state",
            "reduce_script": "for (d in states) if (d != null) return d"
          }
        },
        "churn_classification": {
          "inference": {
            "model_id": "telco_churn",
            "inference_config": {
              "classification": {
                "prediction_field_type": "number"
              }
            },
            "buckets_path": {
              "account_length": "account_length.value",
              "call_charges": "call_charges.value",
              "call_count": "call_count.value",
              "call_duration": "call_duration.value",
              "customer_service_calls": "customer_service_calls.value",
              "international_plan": "international_plan.value",
              "number_vmail_messages": "number_vmail_messages.value",
              "state": "state.value"
            }
          }
        },
        "will_churn_filter": {
          "bucket_selector": {
            "buckets_path": {
              "will_churn": "churn_classification>value"
            },
            "script": "params.will_churn > 0"
          }
        }
      }
    }
  }
}

## vega_telco_churn_viz.js
{
  "$schema": "https://vega.github.io/schema/vega-lite/v4.json"
  "title": "Telco Churn Predictions"

  // Define the data source
  "data": {
    "url": {

      // Do not apply dashboard context filters
      "%context%": false

      // Which index to search
      "index": "calls,customers"

      // The Inference Pipeline agg
      "body": {
        "size": 0,
        "aggs": {
          "phone_number": {
            "composite": {
              "size": 100,
              "sources": [
                {
                  "phone_number": {
                    "terms": {
                      "field": "phone_number"
                    }
                  }
                }
              ]
            },
            "aggs": {
              "call_charges": {
                "sum": {
                  "field": "call_charges"
                }
              },
              "call_duration": {
                "sum": {
                  "field": "call_duration"
                }
              },
              "call_count": {
                "value_count": {
                  "field": "dialled_number"
                }
              },
              "customer_service_calls": {
                "sum": {
                  "field": "customer_service_calls"
                }
              },
              "number_vmail_messages": {
                "sum": {
                  "field": "number_vmail_messages"
                }
              },
              "account_length": {
                "scripted_metric": {
                  "init_script": "state.account_length = 0",
                  "map_script": "state.account_length = params._source.account_length",
                  "combine_script": "return state.account_length",
                  "reduce_script": "for (d in states) if (d != null) return d"
                }
              },
              "international_plan": {
                "scripted_metric": {
                  "init_script": "state.international_plan = ''",
                  "map_script": "state.international_plan = params._source.international_plan",
                  "combine_script": "return state.international_plan",
                  "reduce_script": "for (d in states) if (d != null) return d"
                }
              },
              "voice_mail_plan": {
                "scripted_metric": {
                  "init_script": "state.voice_mail_plan = ''",
                  "map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
                  "combine_script": "return state.voice_mail_plan",
                  "reduce_script": "for (d in states) if (d != null) return d"
                }
              },
              "state": {
                "scripted_metric": {
                  "init_script": "state.state = ''",
                  "map_script": "state.state = params._source.state",
                  "combine_script": "return state.state",
                  "reduce_script": "for (d in states) if (d != null) return d"
                }
              },
              "churn_classification": {
                "inference": {
                  "model_id": "telco_churn",
                  "inference_config": {
                    "classification": {
                      "prediction_field_type": "number"
                    }
                  },
                  "buckets_path": {
                    "account_length": "account_length.value",
                    "call_charges": "call_charges.value",
                    "call_count": "call_count.value",
                    "call_duration": "call_duration.value",
                    "customer_service_calls": "customer_service_calls.value",
                    "international_plan": "international_plan.value",
                    "number_vmail_messages": "number_vmail_messages.value",
                    "state": "state.value"
                  }
                }
              }
            }
          }
        }
      }
    }

/*
For our graph, we only need the list of bucket values.  Use the format.property to discard everything else.
*/
    "format": {"property": "aggregations.phone_number.buckets"}
  },

  /*
  The aggregation result tree needs to be transformed into a format we can plot.
  In this case group by the predicted class and count the docs of each class.
  First give the classification field a expressive name
  */
  "transform": [
    {
      "lookup": "churn_classification.value",
      "from": {
        "data": {
          "values": [
            {"category": 0, "classification_class": "Won't Churn"},
            {"category": 1, "classification_class": "Will Churn"}
          ]
        },
        "key": "category",
        "fields": ["classification_class"]
      }
    },
    {
      "aggregate": [
        {
          "op": "count",
          "as": "class_count"
        }
      ],
      "groupby": ["classification_class"]
    }
  ],

  "mark": "arc",
  "encoding": {
    "theta": {"field": "class_count", "type": "quantitative"},
    "color": {"field": "classification_class", "type": "nominal", "legend": {"title": null }}
  },

 "view": {"stroke": null}
}
	GET calls,customers/_search
	{
	"size": 0,
	"aggs": {
	"phone_number": {
	"composite": {
	"size": 100,
	"sources": [
	{
	"phone_number": {
	"terms": {
	"field": "phone_number"
	}
	}
	}
	]
	},
	"aggs": {
	"call_charges": {
	"sum": {
	"field": "call_charges"
	}
	},
	"call_duration": {
	"sum": {
	"field": "call_duration"
	}
	},
	"call_count": {
	"value_count": {
	"field": "dialled_number"
	}
	},
	"customer_service_calls": {
	"sum": {
	"field": "customer_service_calls"
	}
	},
	"number_vmail_messages": {
	"sum": {
	"field": "number_vmail_messages"
	}
	},
	"account_length": {
	"scripted_metric": {
	"init_script": "state.account_length = 0",
	"map_script": "state.account_length = params._source.account_length",
	"combine_script": "return state.account_length",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"international_plan": {
	"scripted_metric": {
	"init_script": "state.international_plan = ''",
	"map_script": "state.international_plan = params._source.international_plan",
	"combine_script": "return state.international_plan",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"voice_mail_plan": {
	"scripted_metric": {
	"init_script": "state.voice_mail_plan = ''",
	"map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
	"combine_script": "return state.voice_mail_plan",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"state": {
	"scripted_metric": {
	"init_script": "state.state = ''",
	"map_script": "state.state = params._source.state",
	"combine_script": "return state.state",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"churn_classification": {
	"inference": {
	"model_id": "telco_churn",
	"inference_config": {
	"classification": {
	"prediction_field_type": "number"
	}
	},
	"buckets_path": {
	"account_length": "account_length.value",
	"call_charges": "call_charges.value",
	"call_count": "call_count.value",
	"call_duration": "call_duration.value",
	"customer_service_calls": "customer_service_calls.value",
	"international_plan": "international_plan.value",
	"number_vmail_messages": "number_vmail_messages.value",
	"state": "state.value"
	}
	}
	}
	}
	}
	}
	}
	{
	"$schema": "https://vega.github.io/schema/vega-lite/v4.json"
	"title": "Telco Churn Predictions"

	// Define the data source
	"data": {
	"url": {

	// Do not apply dashboard context filters
	"%context%": false

	// Which index to search
	"index": "calls,customers"

	// The Inference Pipeline agg
	"body": {
	"size": 0,
	"aggs": {
	"phone_number": {
	"composite": {
	"size": 100,
	"sources": [
	{
	"phone_number": {
	"terms": {
	"field": "phone_number"
	}
	}
	}
	]
	},
	"aggs": {
	"call_charges": {
	"sum": {
	"field": "call_charges"
	}
	},
	"call_duration": {
	"sum": {
	"field": "call_duration"
	}
	},
	"call_count": {
	"value_count": {
	"field": "dialled_number"
	}
	},
	"customer_service_calls": {
	"sum": {
	"field": "customer_service_calls"
	}
	},
	"number_vmail_messages": {
	"sum": {
	"field": "number_vmail_messages"
	}
	},
	"account_length": {
	"scripted_metric": {
	"init_script": "state.account_length = 0",
	"map_script": "state.account_length = params._source.account_length",
	"combine_script": "return state.account_length",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"international_plan": {
	"scripted_metric": {
	"init_script": "state.international_plan = ''",
	"map_script": "state.international_plan = params._source.international_plan",
	"combine_script": "return state.international_plan",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"voice_mail_plan": {
	"scripted_metric": {
	"init_script": "state.voice_mail_plan = ''",
	"map_script": "state.voice_mail_plan = params._source.voice_mail_plan",
	"combine_script": "return state.voice_mail_plan",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"state": {
	"scripted_metric": {
	"init_script": "state.state = ''",
	"map_script": "state.state = params._source.state",
	"combine_script": "return state.state",
	"reduce_script": "for (d in states) if (d != null) return d"
	}
	},
	"churn_classification": {
	"inference": {
	"model_id": "telco_churn",
	"inference_config": {
	"classification": {
	"prediction_field_type": "number"
	}
	},
	"buckets_path": {
	"account_length": "account_length.value",
	"call_charges": "call_charges.value",
	"call_count": "call_count.value",
	"call_duration": "call_duration.value",
	"customer_service_calls": "customer_service_calls.value",
	"international_plan": "international_plan.value",
	"number_vmail_messages": "number_vmail_messages.value",
	"state": "state.value"
	}
	}
	}
	}
	}
	}
	}
	}

	/*
	For our graph, we only need the list of bucket values. Use the format.property to discard everything else.
	*/
	"format": {"property": "aggregations.phone_number.buckets"}
	},

	/*
	The aggregation result tree needs to be transformed into a format we can plot.
	In this case group by the predicted class and count the docs of each class.
	First give the classification field a expressive name
	*/
	"transform": [
	{
	"lookup": "churn_classification.value",
	"from": {
	"data": {
	"values": [
	{"category": 0, "classification_class": "Won't Churn"},
	{"category": 1, "classification_class": "Will Churn"}
	]
	},
	"key": "category",
	"fields": ["classification_class"]
	}
	},
	{
	"aggregate": [
	{
	"op": "count",
	"as": "class_count"
	}
	],
	"groupby": ["classification_class"]
	}
	],

	"mark": "arc",
	"encoding": {
	"theta": {"field": "class_count", "type": "quantitative"},
	"color": {"field": "classification_class", "type": "nominal", "legend": {"title": null }}
	},

	"view": {"stroke": null}
	}