-
-
Save ThaDafinser/d27b4fa9d144b0083ee7dad37484fdd8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# list of all filters / ... | |
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/93ed7cb33b9c8095c279405467d4301422324655/src/main/java/org/xbib/elasticsearch/plugin/bundle/BundlePlugin.java#L91 | |
# elasticsearch-analysis-autophrase | |
# @TODO | |
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/68dc19c34c40364e04400f92500b973a6cbae170/src/main/java/org/xbib/elasticsearch/index/analysis/autophrase/AutoPhrasingTokenFilterFactory.java | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "auto_phrase", | |
"phrases": [ | |
], | |
"ignoreCase": false, | |
"includeTokens": false, | |
"replaceWhitespaceWith": "-" | |
} | |
], | |
"text": "Mein Text ist gut" | |
} | |
# baseform | |
# https://github.com/jprante/elasticsearch-analysis-baseform | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "baseform", | |
"language": "de" | |
} | |
], | |
"text": "Ich gehe gerne mit meinen neuen Schuhen" | |
} | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "baseform", | |
"language": "en" | |
} | |
], | |
"text": "Thing's are going to be awful" | |
} | |
# lemmatize | |
# @todo source https://github.com/jprante/elasticsearch-plugin-bundle/blob/86236c385f7937ad147c49002485940547377a67/src/main/java/org/xbib/elasticsearch/index/analysis/lemmatize/LemmatizeTokenFilterFactory.java#L28 | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "lemmatize", | |
"language": "de" | |
} | |
], | |
"text": "Ich gehe gerne mit meinen neuen Schuhen" | |
} | |
# pair | |
# @todo | |
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/68dc19c34c40364e04400f92500b973a6cbae170/src/main/java/org/xbib/elasticsearch/index/analysis/concat/PairTokenFilterFactory.java | |
# elasticsearch-analysis-concat | |
# @TODO | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "concat" | |
} | |
], | |
"text": "eins 1, zwei 2" | |
} | |
# elasticsearch-analysis-decompound | |
# https://github.com/jprante/elasticsearch-analysis-decompound | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "decompound" | |
} | |
], | |
"text": "PowerShot Donaudampfschiff ich-gehe-gerne das_ist_nicht" | |
} | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "decompound", | |
"subwords_only": true | |
} | |
], | |
"text": "Donaudampfschiff" | |
} | |
# elasticsearch-analysis-german | |
# https://github.com/jprante/elasticsearch-analysis-german | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "german_normalize" | |
} | |
], | |
"text": "Jörg Prante läuft zum Strauß" | |
} | |
# elasticsearch-analysis-hyphen | |
# https://jprante.github.io/elasticsearch-analysis-hyphen/ | |
GET _analyze | |
{ | |
"tokenizer": "hyphen", | |
"filter": [ | |
"hyphen" | |
], | |
"text": "Do-not-break" | |
} | |
# elasticsearch-analysis-icu | |
# https://github.com/jprante/elasticsearch-icu | |
# https://jprante.github.io/elasticsearch-analysis-icu/ | |
# https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-collation.html | |
https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-folding.html | |
# https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-transform.html | |
## icu_collation | |
# Could not finish it with _analyze API... | |
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-analyze.html | |
## icu_tokenizer | |
GET _analyze | |
{ | |
"tokenizer": { | |
"type": "icu_tokenizer" | |
}, | |
"filter": [ | |
"icu_folding" | |
], | |
"text": "Jörg Prante" | |
} | |
## icu_tokenizer | |
GET _analyze | |
{ | |
"tokenizer": { | |
"type": "icu_tokenizer", | |
"rulefiles": "Latn:icu/Latin-dont-break-on-hyphens.rbbi" | |
}, | |
"text": "we do-not-break on hyphens" | |
} | |
## icu_numberformat | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "icu_numberformat", | |
"locale": "de", | |
"format": "spellout" | |
} | |
], | |
"text": "Das sind 1000 Bücher" | |
} | |
# elasticsearch-analysis-naturalsort | |
# https://jprante.github.io/elasticsearch-analysis-naturalsort/ | |
DELETE test | |
PUT test | |
{ | |
"settings": { | |
"index": { | |
"analysis": { | |
"analyzer": { | |
"natural": { | |
"type": "naturalsort", | |
"locale": "en", | |
"digit": 5, | |
"maxTokens": 5 | |
} | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"doc": { | |
"properties": { | |
"points": { | |
"type": "text", | |
"store": true, | |
"fields": { | |
"encoded": { | |
"type": "text", | |
"fielddata": true, | |
"analyzer": "natural" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
PUT test/doc/1 | |
{ | |
"points": "Bob: 2 points" | |
} | |
PUT test/doc/2 | |
{ | |
"points": "Bob: 3 points" | |
} | |
PUT test/doc/3 | |
{ | |
"points": "Bob: 10 points" | |
} | |
# like expected: 2, 3 and 10 points in this order | |
POST test/_search | |
{ | |
"query": { | |
"match_all": {} | |
}, | |
"stored_fields": "points", | |
"sort": { | |
"points.encoded": { | |
"order": "asc" | |
} | |
} | |
} | |
# elasticsearch-analysis-reference | |
# https://github.com/jprante/elasticsearch-analysis-reference | |
# elasticsearch-analysis-sortform | |
# @TODO | |
# standardnumber | |
# https://github.com/jprante/elasticsearch-analysis-standardnumber | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "standardnumber" | |
} | |
], | |
"text": "Die ISBN von Elasticsearch in Action lautet 9781617291623" | |
} | |
# elasticsearch-analysis-symbolname | |
# @TODO | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "symbolname" | |
} | |
], | |
"text": "eins 1, zwei 2" | |
} | |
# elasticsearch-analysis-worddelimiter | |
# https://github.com/jprante/elasticsearch-analysis-worddelimiter2 | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "worddelimiter2" | |
} | |
], | |
"text": "PowerShot Wi-Fi SD500. It is as-is!" | |
} | |
## old | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "worddelimiter" | |
} | |
], | |
"text": "PowerShot Wi-Fi SD500. It is as-is!" | |
} | |
# elasticsearch-analysis-year | |
# @TODO | |
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/68dc19c34c40364e04400f92500b973a6cbae170/src/main/java/org/xbib/elasticsearch/index/analysis/year/GregorianYearTokenFilterFactory.java#L16 | |
GET _analyze | |
{ | |
"tokenizer": "standard", | |
"filter": [ | |
{ | |
"type": "year", | |
"default_year": "2019" | |
} | |
], | |
"text": "2017 wird das Jahr!" | |
} | |
# elasticsearch-mapper-crypt | |
# https://github.com/jprante/elasticsearch-mapper-crypt | |
DELETE test | |
PUT test | |
{ | |
"mappings": { | |
"docs": { | |
"properties": { | |
"sha_hash": { | |
"type": "crypt", | |
"algo": "SHA-512" | |
} | |
} | |
} | |
} | |
} | |
PUT test/docs/1 | |
{ | |
"sha_hash": "test" | |
} | |
POST test/_search | |
{ | |
"query": { | |
"match": { | |
"sha_hash": "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF" | |
} | |
} | |
} | |
# elasticsearch-mapper-langdetect | |
# https://github.com/jprante/elasticsearch-langdetect | |
DELETE test | |
PUT test | |
{ | |
"mappings": { | |
"docs": { | |
"properties": { | |
"text": { | |
"type": "langdetect", | |
"languages": [ | |
"en", | |
"de", | |
"fr" | |
] | |
} | |
} | |
} | |
} | |
} | |
PUT test/docs/1 | |
{ | |
"text": "Oh, say can you see by the dawn`s early light, What so proudly we hailed at the twilight`s last gleaming?" | |
} | |
POST test/_search | |
{ | |
"query": { | |
"term": { | |
"text": "en" | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment