Skip to content

Instantly share code, notes, and snippets.

@softwaredoug
Last active January 18, 2024 15:56
Show Gist options
  • Save softwaredoug/44c5d09c6aac9b926bec73a400550b3e to your computer and use it in GitHub Desktop.
Save softwaredoug/44c5d09c6aac9b926bec73a400550b3e to your computer and use it in GitHub Desktop.
Synyomyms in Elasticsearch
DELETE syntest
PUT syntest
{
"mappings": {
"article": {
"properties": {
"text": {
"type": "string",
"analyzer": "syn_text"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"syn_text": {
"tokenizer": "standard",
"filter": ["synonym"]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": ["heart attack, myocardial infarction, mi, cardiac arrest, heartattack"]
}
}
}
}
}
POST syntest/_search
{
"query": {
"match_phrase": {
"text": "cardiac attack"
}
}
}
PUT syntest/article/1
{
"text": "heart attack"
}
PUT syntest/article/2
{
"text": "I'm under arrest!"
}
PUT syntest/article/3
{
"text": "myocardial infarction"
}
POST syntest/_search
{
"query": {
"match_phrase": {
"text": "heart attack"
}
}
}
DELETE syntest
PUT syntest
{
"mappings": {
"article": {
"properties": {
"text": {
"type": "string",
"analyzer": "english",
"fields": {
"entities": {
"type": "string",
"analyzer": "syn_text"
}
}
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"syn_text": {
"tokenizer": "standard",
"filter": ["4_shingle", "health_synonym", "keep_health_entities"]
}
},
"filter": {
"4_shingle": {
"type": "shingle",
"max_shingle_size": 4,
"min_shingle_size": 2,
"output_unigrams": true
},
"health_synonym": {
"type": "synonym",
"tokenizer": "keyword",
"synonyms": ["heart attack, myocardial infarction, mi, cardiac arrest, heartattack, acute heart attack"]
},
"keep_health_entities": {
"type": "keep",
"keep_words": ["heart attack", "myocardial infarction", "mi", "cardiac arrest", "heartattack", "acute heart attack"]
}
}
}
}
}
POST syntest/_search
{
"query": {
"bool": {
"should": [
{"match": {
"text.entities": "heart attack"
}},
{"match": {
"text": "heart attack"
}}]
}
}
}
DELETE syntest
PUT syntest
{
"mappings": {
"article": {
"properties": {
"text": {
"type": "string",
"analyzer": "syn_text"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"syn_text": {
"tokenizer": "standard",
"filter": ["autophrase_syn", "health_synonym"]
}
},
"filter": {
"autophrase_syn": {
"type": "synonym",
"synonyms": ["heart attack => heart_attack",
"myocardial infarction => myocardial_infarction",
"cardiac arrest => cardiac_arrest",
"acute heart attack => acute_heart_attack"]
},
"health_synonym": {
"type": "synonym",
"tokenizer": "keyword",
"synonyms": ["heart_attack, myocardial_infarction, mi, cardiac_arrest, heartattack, acute_heart_attack"]
}
}
}
}
}
PUT syntest/article/4
{
"text": "bad heart attack in paris"
}
POST syntest/_search
{
"query": {
"match_phrase": {
"text": "bad cardiac arrest"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment