Skip to content

Instantly share code, notes, and snippets.

@tallesairan
Last active August 4, 2022 14:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tallesairan/c5ada88d6cbcb08d28a3aeb9cf7a3904 to your computer and use it in GitHub Desktop.
Save tallesairan/c5ada88d6cbcb08d28a3aeb9cf7a3904 to your computer and use it in GitHub Desktop.
Elasticsearch text classification
from operator import itemgetter
def get_best_category(response):
categories = {}
for hit in response['hits']['hits']:
score = hit['_score']
for category in hit['_source']['category']:
if category not in categories:
categories[category] = score
else:
categories[category] += score
if len(categories) > 0:
sortedCategories = sorted(categories.items(), key=itemgetter(1), reverse=True)
category = sortedCategories[0][0]
return category
curl -X PUT "localhost:9200/_index_template/xagregator" -H 'Content-Type: application/json' -d'
{
"index_patterns" : [
"xagg*",
"xaggvideos"
],
"template" : {
"mappings" : {
"dynamic_templates" : [ ],
"properties" : {
"title_key" : {
"analyzer" : "english",
"type" : "text"
},
"created_at" : {
"type" : "date"
},
"description" : {
"analyzer" : "english",
"type" : "text"
},
"category" : {
"analyzer" : "english",
"type" : "text"
},
"isActive" : {
"type" : "boolean"
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
},
"slug" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
}
}
}
},
"composed_of" : [ ]
}
'
curl -X GET "localhost:9200/xaggvideos/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query":{
"more_like_this":{
"fields":[
"title_key",
"category"
],
"like":"DEMO QUERY",
"min_term_freq":1,
"max_query_terms":20
}
}
}
'
curl -X GET "localhost:9200/export_small_demo/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query":{
"more_like_this":{
"fields":[
"title",
"categories"
],
"like":"DEMO QUERY",
"min_term_freq":1,
"max_query_terms":20
}
}
}
{
"index_patterns" : [
"xagg*",
"xaggvideos"
],
"template" : {
"mappings" : {
"dynamic_templates" : [ ],
"properties" : {
"title_key" : {
"analyzer" : "english",
"type" : "text"
},
"created_at" : {
"type" : "date"
},
"description" : {
"analyzer" : "english",
"type" : "text"
},
"category" : {
"analyzer" : "english",
"type" : "text"
},
"isActive" : {
"type" : "boolean"
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
},
"slug" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
}
}
}
},
"composed_of" : [ ]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment