Last active
January 18, 2024 15:56
-
-
Save softwaredoug/44c5d09c6aac9b926bec73a400550b3e to your computer and use it in GitHub Desktop.
Synyomyms in Elasticsearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DELETE syntest | |
PUT syntest | |
{ | |
"mappings": { | |
"article": { | |
"properties": { | |
"text": { | |
"type": "string", | |
"analyzer": "syn_text" | |
} | |
} | |
} | |
}, | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"syn_text": { | |
"tokenizer": "standard", | |
"filter": ["synonym"] | |
} | |
}, | |
"filter": { | |
"synonym": { | |
"type": "synonym", | |
"synonyms": ["heart attack, myocardial infarction, mi, cardiac arrest, heartattack"] | |
} | |
} | |
} | |
} | |
} | |
POST syntest/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"text": "cardiac attack" | |
} | |
} | |
} | |
PUT syntest/article/1 | |
{ | |
"text": "heart attack" | |
} | |
PUT syntest/article/2 | |
{ | |
"text": "I'm under arrest!" | |
} | |
PUT syntest/article/3 | |
{ | |
"text": "myocardial infarction" | |
} | |
POST syntest/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"text": "heart attack" | |
} | |
} | |
} | |
DELETE syntest | |
PUT syntest | |
{ | |
"mappings": { | |
"article": { | |
"properties": { | |
"text": { | |
"type": "string", | |
"analyzer": "english", | |
"fields": { | |
"entities": { | |
"type": "string", | |
"analyzer": "syn_text" | |
} | |
} | |
} | |
} | |
} | |
}, | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"syn_text": { | |
"tokenizer": "standard", | |
"filter": ["4_shingle", "health_synonym", "keep_health_entities"] | |
} | |
}, | |
"filter": { | |
"4_shingle": { | |
"type": "shingle", | |
"max_shingle_size": 4, | |
"min_shingle_size": 2, | |
"output_unigrams": true | |
}, | |
"health_synonym": { | |
"type": "synonym", | |
"tokenizer": "keyword", | |
"synonyms": ["heart attack, myocardial infarction, mi, cardiac arrest, heartattack, acute heart attack"] | |
}, | |
"keep_health_entities": { | |
"type": "keep", | |
"keep_words": ["heart attack", "myocardial infarction", "mi", "cardiac arrest", "heartattack", "acute heart attack"] | |
} | |
} | |
} | |
} | |
} | |
POST syntest/_search | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{"match": { | |
"text.entities": "heart attack" | |
}}, | |
{"match": { | |
"text": "heart attack" | |
}}] | |
} | |
} | |
} | |
DELETE syntest | |
PUT syntest | |
{ | |
"mappings": { | |
"article": { | |
"properties": { | |
"text": { | |
"type": "string", | |
"analyzer": "syn_text" | |
} | |
} | |
} | |
}, | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"syn_text": { | |
"tokenizer": "standard", | |
"filter": ["autophrase_syn", "health_synonym"] | |
} | |
}, | |
"filter": { | |
"autophrase_syn": { | |
"type": "synonym", | |
"synonyms": ["heart attack => heart_attack", | |
"myocardial infarction => myocardial_infarction", | |
"cardiac arrest => cardiac_arrest", | |
"acute heart attack => acute_heart_attack"] | |
}, | |
"health_synonym": { | |
"type": "synonym", | |
"tokenizer": "keyword", | |
"synonyms": ["heart_attack, myocardial_infarction, mi, cardiac_arrest, heartattack, acute_heart_attack"] | |
} | |
} | |
} | |
} | |
} | |
PUT syntest/article/4 | |
{ | |
"text": "bad heart attack in paris" | |
} | |
POST syntest/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"text": "bad cardiac arrest" | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment