Skip to content

Instantly share code, notes, and snippets.

@spinscale
Created December 14, 2017 09:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spinscale/0a77f04cab9ed6150c56057f1fd3fa2f to your computer and use it in GitHub Desktop.
Save spinscale/0a77f04cab9ed6150c56057f1fd3fa2f to your computer and use it in GitHub Desktop.
Training Examples (Zurich)
##### decompounder example
DELETE decompound
PUT decompound
{
"settings": {
"analysis": {
"analyzer": {
"my_decompound_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [ "lowercase", "my_decompound_filter" ]
},
"my_decompound_analyzer_unique" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [ "lowercase", "my_decompound_filter", "unique" ]
}
},
"filter": {
"my_decompound_filter" : {
"type" : "dictionary_decompounder",
"word_list" : [ "topf", "schiff" ]
}
}
}
},
"mappings": {
"doc" : {
"properties": {
"name" : {
"type": "text",
"analyzer": "my_decompound_analyzer",
"fields": {
"unique" : {
"type": "text",
"analyzer": "my_decompound_analyzer_unique"
}
}
}
}
}
}
}
GET decompound/_settings
POST decompound/_analyze
{
"text": "blumentopf"
}
POST decompound/_analyze
{
"text": "blumentOPF",
"filter": [ "lowercase", "my_decompound_filter" ],
"tokenizer": "standard"
}
POST decompound/_analyze
{
"text": "blumentopf",
"analyzer": "my_decompound_analyzer"
}
POST decompound/_analyze
{
"text": "dampfschiff",
"analyzer": "my_decompound_analyzer"
}
POST decompound/_analyze
{
"text": "blumentopf blumentopf blumentopf",
"analyzer": "my_decompound_analyzer"
}
POST decompound/_analyze
{
"text": "blumentopf blumentopf blumentopf",
"analyzer": "my_decompound_analyzer_unique"
}
PUT decompound/doc/1
{
"name" : "ein wunderschöner blumentopf"
}
GET decompound/doc/_search
{
"query": {
"match": {
"name": "topf"
}
}
}
GET decompound/doc/_search
{
"query": {
"match": {
"name.unique": "topf"
}
}
}
# https://github.com/jprante/elasticsearch-analysis-decompound
# reverse token filter, suffix based searching
DELETE reverse
PUT reverse
{
"settings": {
"analysis": {
"analyzer": {
"reverse" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : [ "lowercase", "reverse" ]
}
}
}
},
"mappings": {
"doc" : {
"properties": {
"name" : {
"type": "text",
"fields": {
"reverse" : {
"type": "text",
"analyzer": "reverse"
}
}
}
}
}
}
}
PUT reverse/doc/1
{
"name" : "blumentopf"
}
# inefficient, inverted index traversal is tricky
GET reverse/_search
{
"query": {
"wildcard": {
"name": {
"value": "*topf"
}
}
}
}
GET reverse/_search
{
"query": {
"prefix": {
"name.reverse": {
"value": "fpot"
}
}
}
}
# highlight with synonyms
DELETE synonyms
PUT synonyms
{
"settings": {
"analysis": {
"analyzer": {
"query_analyzer_with_synonyms": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"my_synonyms"
]
}
},
"filter": {
"my_synonyms": {
"type": "synonym_graph",
"synonyms": [
"bmw, bayrische motorenwerke",
"vw, volkswagen"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"name": {
"type": "text",
"analyzer": "standard",
"search_analyzer": "query_analyzer_with_synonyms"
}
}
}
}
}
PUT synonyms/doc/1
{
"name" : "Volkswagen Golf"
}
GET synonyms/_search
{
"query": {
"match": {
"name": "vw"
}
},
"highlight": {
"fields": {
"name": {}
}
}
}
PUT synonyms/doc/2
{
"name" : "This is a BMW 320i, which is a nice car. BMW stands for Bayerische Motorenwerke"
}
GET synonyms/_search
{
"query": {
"match": {
"name": "bmw"
}
},
"highlight": {
"fields": {
"name": {}
}
}
}
GET synonyms/_search
{
"query": {
"match": {
"name": "bayrische motorenwerke"
}
},
"highlight": {
"fields": {
"name": {
"type" : "plain"
}
}
}
}
GET synonyms/_search
{
"query": {
"match": {
"name": "bayrische motorenwerke"
}
},
"highlight": {
"fields": {
"name": {
"type" : "unified"
}
}
}
}
# italian job
DELETE join
PUT join
{
"mappings": {
"doc": {
"properties": {
"my_join_field": {
"type": "join",
"relations": {
"company": "preferred_company"
}
}
}
}
}
}
PUT join/doc/1
{
"text": "Amazon Logistics GmbH",
"my_join_field": {
"name": "company"
}
}
PUT join/doc/2
{
"text": "Amazon fulfilment A/S",
"my_join_field": {
"name": "company"
}
}
GET join/_search
{
"query": {
"match": {
"text": "amazon"
}
}
}
# alex prefers the logistics gmbh
PUT join/doc/3?routing=1
{
"type" : "preferred_company",
"company": "Amazon Logistics GmbH",
"user" : "alex",
"my_join_field": {
"name": "preferred_company",
"parent" : "1"
}
}
PUT join/doc/4?routing=2
{
"type" : "preferred_company",
"text": "Amazon fulfilment A/S",
"user" : "paul",
"my_join_field": {
"name": "preferred_company",
"parent" : "2"
}
}
GET join/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"text": "amazon"
}
}
],
"filter": {
"term": {
"my_join_field.name": "company"
}
},
"should": [
{
"has_child": {
"type": "preferred_company",
"query": {
"match": {
"user": "alex"
}
}
}
}
]
}
}
}
GET join/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"text": "amazon"
}
}
],
"should": [
{
"has_child": {
"type": "preferred_company",
"query": {
"match": {
"user": "paul"
}
},
"inner_hits": {}
}
}
]
}
}
}
# bin/elasticsearch-plugin install analysis-phonetic
DELETE phonetic_sample
PUT phonetic_sample
{
"mappings": {
"person" : {
"properties": {
"name" : {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"koelner" : {
"type": "text",
"analyzer": "koelner_analyzer"
}
}
}
}
}
},
"settings": {
"index": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"my_metaphone"
]
},
"koelner_analyzer": {
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"koelner_metaphone"
]
}
},
"filter": {
"my_metaphone": {
"type": "phonetic",
"encoder": "metaphone",
"replace": false
},
"koelner_metaphone": {
"type": "phonetic",
"encoder": "koelnerphonetik",
"replace": false
}
}
}
}
}
}
POST phonetic_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "Joe Blocks"
}
POST phonetic_sample/_analyze
{
"analyzer": "koelner_analyzer",
"text": "Aleksander"
}
PUT phonetic_sample/person/1
{
"name" : "Peter Meyer"
}
PUT phonetic_sample/person/2
{
"name" : "Peter Meier"
}
PUT phonetic_sample/person/3
{
"name" : "Peter Maier"
}
PUT phonetic_sample/person/4
{
"name" : "Peter Mayer"
}
GET phonetic_sample/_search
{
"query": {
"match": {
"name": "Maier"
}
}
}
GET phonetic_sample/_search
{
"query": {
"match": {
"name.koelner": "Maier"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment