Skip to content

Instantly share code, notes, and snippets.

@ThaDafinser
Last active April 5, 2017 09:59
Show Gist options
  • Save ThaDafinser/d27b4fa9d144b0083ee7dad37484fdd8 to your computer and use it in GitHub Desktop.
Save ThaDafinser/d27b4fa9d144b0083ee7dad37484fdd8 to your computer and use it in GitHub Desktop.
# list of all filters / ...
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/93ed7cb33b9c8095c279405467d4301422324655/src/main/java/org/xbib/elasticsearch/plugin/bundle/BundlePlugin.java#L91
# elasticsearch-analysis-autophrase
# @TODO
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/68dc19c34c40364e04400f92500b973a6cbae170/src/main/java/org/xbib/elasticsearch/index/analysis/autophrase/AutoPhrasingTokenFilterFactory.java
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "auto_phrase",
"phrases": [
],
"ignoreCase": false,
"includeTokens": false,
"replaceWhitespaceWith": "-"
}
],
"text": "Mein Text ist gut"
}
# baseform
# https://github.com/jprante/elasticsearch-analysis-baseform
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "baseform",
"language": "de"
}
],
"text": "Ich gehe gerne mit meinen neuen Schuhen"
}
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "baseform",
"language": "en"
}
],
"text": "Thing's are going to be awful"
}
# lemmatize
# @todo source https://github.com/jprante/elasticsearch-plugin-bundle/blob/86236c385f7937ad147c49002485940547377a67/src/main/java/org/xbib/elasticsearch/index/analysis/lemmatize/LemmatizeTokenFilterFactory.java#L28
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "lemmatize",
"language": "de"
}
],
"text": "Ich gehe gerne mit meinen neuen Schuhen"
}
# pair
# @todo
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/68dc19c34c40364e04400f92500b973a6cbae170/src/main/java/org/xbib/elasticsearch/index/analysis/concat/PairTokenFilterFactory.java
# elasticsearch-analysis-concat
# @TODO
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "concat"
}
],
"text": "eins 1, zwei 2"
}
# elasticsearch-analysis-decompound
# https://github.com/jprante/elasticsearch-analysis-decompound
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "decompound"
}
],
"text": "PowerShot Donaudampfschiff ich-gehe-gerne das_ist_nicht"
}
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "decompound",
"subwords_only": true
}
],
"text": "Donaudampfschiff"
}
# elasticsearch-analysis-german
# https://github.com/jprante/elasticsearch-analysis-german
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "german_normalize"
}
],
"text": "Jörg Prante läuft zum Strauß"
}
# elasticsearch-analysis-hyphen
# https://jprante.github.io/elasticsearch-analysis-hyphen/
GET _analyze
{
"tokenizer": "hyphen",
"filter": [
"hyphen"
],
"text": "Do-not-break"
}
# elasticsearch-analysis-icu
# https://github.com/jprante/elasticsearch-icu
# https://jprante.github.io/elasticsearch-analysis-icu/
# https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-collation.html
https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-folding.html
# https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu-transform.html
## icu_collation
# Could not finish it with _analyze API...
# https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-analyze.html
## icu_tokenizer
GET _analyze
{
"tokenizer": {
"type": "icu_tokenizer"
},
"filter": [
"icu_folding"
],
"text": "Jörg Prante"
}
## icu_tokenizer
GET _analyze
{
"tokenizer": {
"type": "icu_tokenizer",
"rulefiles": "Latn:icu/Latin-dont-break-on-hyphens.rbbi"
},
"text": "we do-not-break on hyphens"
}
## icu_numberformat
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "icu_numberformat",
"locale": "de",
"format": "spellout"
}
],
"text": "Das sind 1000 Bücher"
}
# elasticsearch-analysis-naturalsort
# https://jprante.github.io/elasticsearch-analysis-naturalsort/
DELETE test
PUT test
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"natural": {
"type": "naturalsort",
"locale": "en",
"digit": 5,
"maxTokens": 5
}
}
}
}
},
"mappings": {
"doc": {
"properties": {
"points": {
"type": "text",
"store": true,
"fields": {
"encoded": {
"type": "text",
"fielddata": true,
"analyzer": "natural"
}
}
}
}
}
}
}
PUT test/doc/1
{
"points": "Bob: 2 points"
}
PUT test/doc/2
{
"points": "Bob: 3 points"
}
PUT test/doc/3
{
"points": "Bob: 10 points"
}
# like expected: 2, 3 and 10 points in this order
POST test/_search
{
"query": {
"match_all": {}
},
"stored_fields": "points",
"sort": {
"points.encoded": {
"order": "asc"
}
}
}
# elasticsearch-analysis-reference
# https://github.com/jprante/elasticsearch-analysis-reference
# elasticsearch-analysis-sortform
# @TODO
# standardnumber
# https://github.com/jprante/elasticsearch-analysis-standardnumber
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "standardnumber"
}
],
"text": "Die ISBN von Elasticsearch in Action lautet 9781617291623"
}
# elasticsearch-analysis-symbolname
# @TODO
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "symbolname"
}
],
"text": "eins 1, zwei 2"
}
# elasticsearch-analysis-worddelimiter
# https://github.com/jprante/elasticsearch-analysis-worddelimiter2
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "worddelimiter2"
}
],
"text": "PowerShot Wi-Fi SD500. It is as-is!"
}
## old
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "worddelimiter"
}
],
"text": "PowerShot Wi-Fi SD500. It is as-is!"
}
# elasticsearch-analysis-year
# @TODO
# https://github.com/jprante/elasticsearch-plugin-bundle/blob/68dc19c34c40364e04400f92500b973a6cbae170/src/main/java/org/xbib/elasticsearch/index/analysis/year/GregorianYearTokenFilterFactory.java#L16
GET _analyze
{
"tokenizer": "standard",
"filter": [
{
"type": "year",
"default_year": "2019"
}
],
"text": "2017 wird das Jahr!"
}
# elasticsearch-mapper-crypt
# https://github.com/jprante/elasticsearch-mapper-crypt
DELETE test
PUT test
{
"mappings": {
"docs": {
"properties": {
"sha_hash": {
"type": "crypt",
"algo": "SHA-512"
}
}
}
}
}
PUT test/docs/1
{
"sha_hash": "test"
}
POST test/_search
{
"query": {
"match": {
"sha_hash": "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF"
}
}
}
# elasticsearch-mapper-langdetect
# https://github.com/jprante/elasticsearch-langdetect
DELETE test
PUT test
{
"mappings": {
"docs": {
"properties": {
"text": {
"type": "langdetect",
"languages": [
"en",
"de",
"fr"
]
}
}
}
}
}
PUT test/docs/1
{
"text": "Oh, say can you see by the dawn`s early light, What so proudly we hailed at the twilight`s last gleaming?"
}
POST test/_search
{
"query": {
"term": {
"text": "en"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment