Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alejandrodumas/1520364 to your computer and use it in GitHub Desktop.
Save alejandrodumas/1520364 to your computer and use it in GitHub Desktop.
NGram Analyzer in ElasticSearch
# ========================================
# Testing n-gram analysis in ElasticSearch
# ========================================
curl -X DELETE localhost:9200/ngram_test
curl -X PUT localhost:9200/ngram_test -d '
{
"settings" : {
"index" : {
"analysis" : {
"analyzer" : {
"url_analyzer" : {
"type" : "custom",
"tokenizer" : "lowercase",
"filter" : ["stop", "url_stop", "url_ngram"]
}
},
"filter" : {
"url_stop" : {
"type" : "stop",
"stopwords" : ["http", "https"]
},
"url_ngram" : {
"type" : "nGram",
"min_gram" : 3,
"max_gram" : 5
}
}
}
}
},
"mappings": {
"url": {
"properties": {
"url": {
"type": "string",
"analyzer": "url_analyzer",
"boost": 10
}
}
}
}
}
'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://heise.de" }'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://heisewetter.de" }'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://eisenwerken.de" }'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://eisenwerkenberlin.de" }'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://urlaubinkroatien.de" }'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://besteurlaubinkroatien.de" }'
curl -X POST "http://localhost:9200/ngram_test/url" -d '{ "url" : "http://kroatien.de" }'
curl -X POST "http://localhost:9200/ngram_test/_refresh"
# curl "http://localhost:9200/ngram_test/_analyze?text=http://heise.de&analyzer=url_analyzer"
URLS='
http://localhost:9200/ngram_test/_search?q=url:heise
http://localhost:9200/ngram_test/_search?q=url:eis
http://localhost:9200/ngram_test/_search?q=url:berlin
http://localhost:9200/ngram_test/_search?q=url:wetter
http://localhost:9200/ngram_test/_search?q=url:kroatien
http://localhost:9200/ngram_test/_search?q=url:(urlaub%20kroatien)
'
for url in ${URLS}
do
echo; echo; echo ">>> ${url}"
if which open &> /dev/null; then
open "${url}&pretty=true"
fi
curl "${url}&pretty=true"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment