Last active
December 2, 2016 09:33
-
-
Save petrknap/8ff88a2827af894a20c196faed1992a3 to your computer and use it in GitHub Desktop.
Elasticsearch sandbox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/.idea/ | |
/vendor/ | |
/composer.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Options +Indexes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use Elasticsearch\Client; | |
use Elasticsearch\Common\Exceptions\Missing404Exception; | |
require_once __DIR__ . "/require_me.php"; | |
define("INDEX_AKA_DATABASE", "diakritika"); | |
define("TYPE_AKA_TABLE_NAME", "diakritika"); | |
define("FULLTEXT", "_fulltext_"); | |
define("FULLTEXT_TITLE", "_fulltext_title_"); | |
define("FULLTEXT_CONTENT", "_fulltext_content_"); | |
call("Delete index", function (Client $client) { | |
try { | |
$params = [ | |
"index" => INDEX_AKA_DATABASE, | |
]; | |
return $client->indices()->delete($params); | |
} catch (Missing404Exception $e) { | |
return json_decode($e->getMessage()); | |
} | |
}); | |
call("Create index", function (Client $client) { | |
$params = [ | |
"index" => INDEX_AKA_DATABASE, | |
"body" => [ | |
"settings" => [ | |
"analysis" => [ | |
"filter" => [ | |
"cs_stop" => [ | |
"type" => "stop", | |
"stopwords" => "_czech_" | |
], | |
"cs_stemmer" => [ | |
"type" => "stemmer", | |
"language" => "czech" | |
], | |
"remove_duplicities" => [ | |
"type" => "unique" | |
] | |
], | |
"analyzer" => [ | |
"cs_analyzer" => [ | |
"tokenizer" => "standard", | |
"filter" => [ | |
"lowercase", | |
"cs_stop", | |
"cs_stemmer", | |
"cs_stop", | |
"asciifolding", | |
"remove_duplicities" | |
] | |
] | |
] | |
] | |
], | |
"mappings" => [ | |
TYPE_AKA_TABLE_NAME => [ | |
"properties" => [ | |
FULLTEXT => [ | |
"type" => "string", | |
"analyzer" => "cs_analyzer" | |
], | |
FULLTEXT_TITLE => [ | |
"type" => "string", | |
"analyzer" => "cs_analyzer" | |
], | |
FULLTEXT_CONTENT => [ | |
"type" => "string", | |
"analyzer" => "cs_analyzer" | |
], | |
"title" => [ | |
"type" => "string", | |
"copy_to" => [FULLTEXT, FULLTEXT_TITLE] | |
], | |
"content" => [ | |
"type" => "string", | |
"copy_to" => [FULLTEXT, FULLTEXT_CONTENT] | |
], | |
] | |
] | |
] | |
] | |
]; | |
return $client->indices()->create($params); | |
}); | |
call("Index documents", function (Client $client) { | |
$returns = []; | |
foreach ([ | |
001 => [ | |
"title" => "Zpěv žlutého koně", | |
"content" => "Příliš žluťoučký kůň pěl ďábelské ódy. Byl to dlouhý den." | |
], | |
002 => [ | |
"title" => "Příběh mokrého koně (zdarma)", | |
"content" => "Kůň s dlouhou hřívou vylezl z řeky." | |
], | |
101 => [ | |
"title" => "pán hrad město žena", | |
"content" => "pánové hrady města ženy" | |
], | |
102 => [ | |
"title" => "pána hradu města ženy", | |
"content" => "pánů hradů měst žen" | |
], | |
103 => [ | |
"title" => "pánovi hradu městu ženě", | |
"content" => "pánům hradům městům ženám" | |
], | |
104 => [ | |
"title" => "pána hrad město ženu", | |
"content" => "pány hrady města ženy" | |
], | |
105 => [ | |
"title" => "pane hrade město ženo", | |
"content" => "páni hrady města ženy" | |
], | |
106 => [ | |
"title" => "pánu hradě městě ženě", | |
"content" => "pánech hradech městech ženách" | |
], | |
107 => [ | |
"title" => "pánem hradem městem ženou", | |
"content" => "pány hrady městy ženami" | |
] | |
] as $id => $document) { | |
$params = [ | |
"index" => INDEX_AKA_DATABASE, | |
"type" => TYPE_AKA_TABLE_NAME, | |
"id" => $id, | |
"body" => $document | |
]; | |
$returns[] = $client->index($params); | |
} | |
return $returns; | |
}); | |
call("Refresh index", function (Client $client) { | |
$params = [ | |
"index" => INDEX_AKA_DATABASE, | |
]; | |
return $client->indices()->refresh($params); | |
}); | |
call("Test analyzer", function (Client $client) { | |
$params = [ | |
"index" => INDEX_AKA_DATABASE, | |
"analyzer" => "cs_analyzer", | |
"body" => [ | |
"text" => "dlouhá dlouhý dlouhé zpěv zpev příběh pribeh koně kone kůň kun" | |
] | |
]; | |
return $client->indices()->analyze($params); | |
}); | |
call("Search for a documents", function (Client $client) { | |
foreach ([ | |
[ | |
"query" => ["match" => [FULLTEXT => "zpěv"]], | |
"expected" => ["ids" => [001]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "příběh"]], | |
"expected" => ["ids" => [002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "koně"]], | |
"expected" => ["ids" => [001, 002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "kůň"]], | |
"expected" => ["ids" => [001, 002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "zpev"]], | |
"expected" => ["ids" => [001]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "pribeh"]], | |
"expected" => ["ids" => [002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "kone"]], | |
"expected" => ["ids" => [001, 002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "kun"]], | |
"expected" => ["ids" => [001, 002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "dlouhá"]], | |
"expected" => ["ids" => [001, 002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "hrad"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_TITLE => "hrad"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_CONTENT => "hrad"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "pán"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_TITLE => "pán"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_CONTENT => "pán"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "město"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_TITLE => "město"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_CONTENT => "město"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT => "žena"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_TITLE => "žena"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_CONTENT => "žena"]], | |
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_TITLE => "zdarma"]], | |
"expected" => ["ids" => [002]] | |
], | |
[ | |
"query" => ["match" => [FULLTEXT_TITLE => "příběh zdrama"]], | |
"expected" => ["ids" => [002]] | |
], | |
[ | |
"query" => [ | |
"match" => [ | |
FULLTEXT_TITLE => [ | |
"query" => "zdrama", | |
"fuzziness" => 2 // AUTO doesn't work (length 5 -> fuzziness: 1) | |
] | |
] | |
], | |
"expected" => ["ids" => [002]] | |
] | |
] as $lookup) { | |
$params = [ | |
"index" => INDEX_AKA_DATABASE, | |
"type" => TYPE_AKA_TABLE_NAME, | |
"body" => [ | |
"query" => $lookup["query"] | |
] | |
]; | |
$result = $client->search($params); | |
@assert($result["hits"]["total"] == count($lookup["expected"]["ids"])) or printf( | |
"%d is not equal to %d [lookup=%s]\n", | |
$result["hits"]["total"], | |
count($lookup["expected"]["ids"]), | |
json_encode($lookup) | |
); | |
foreach ($result["hits"]["hits"] as $hit) { | |
@assert(in_array($hit["_id"], $lookup["expected"]["ids"])) or printf( | |
"%d is not in array [%s] [lookup=%s]\n", | |
$hit["_id"], | |
implode(", ", $lookup["expected"]["ids"]), | |
json_encode($lookup) | |
); | |
} | |
} | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "dash/elasticsearch", | |
"authors": [ | |
{ | |
"name": "Petr Knap", | |
"email": "dev@petrknap.cz" | |
} | |
], | |
"require": { | |
"elasticsearch/elasticsearch": "^5.0" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
elasticsearch: | |
image: elasticsearch:2.3.5 | |
web: | |
image: php:5.6-apache | |
links: | |
- elasticsearch:elastic.search | |
ports: | |
- 127.0.0.1:80:80 | |
volumes: | |
- .:/var/www/html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use Elasticsearch\Client; | |
use Elasticsearch\Common\Exceptions\Missing404Exception; | |
require_once __DIR__ . "/require_me.php"; | |
define("DATABASE", "test"); | |
define("TABLE", "contacts"); | |
define("FIELD", "data"); | |
define("FIELD_NUMBERS_ONLY", "data_numbers"); | |
call("Delete index", function (Client $client) { | |
try { | |
$params = [ | |
"index" => DATABASE, | |
]; | |
return $client->indices()->delete($params); | |
} catch (Missing404Exception $e) { | |
return json_decode($e->getMessage()); | |
} | |
}); | |
call("Create index", function (Client $client) { | |
$params = [ | |
"index" => DATABASE, | |
"body" => [ | |
"settings" => [ | |
"analysis" => [ | |
"filter" => [ | |
"cs_stop" => [ | |
"type" => "stop", | |
"stopwords" => "_czech_" | |
], | |
"cs_stemmer" => [ | |
"type" => "stemmer", | |
"language" => "czech" | |
], | |
"numeric_filter" => [ | |
"type" => "pattern_replace", | |
"pattern" => '([^\d]*)', | |
"replacement" => "" | |
], | |
"123grams" => [ | |
"type" => "ngram", | |
"min_gram" => 1, | |
"max_gram" => 3, | |
"preserve_original" => true | |
], | |
"remove_duplicities" => [ | |
"type" => "unique" | |
] | |
], | |
"analyzer" => [ | |
"cs_analyzer" => [ | |
"tokenizer" => "standard", | |
"filter" => [ | |
"lowercase", | |
"cs_stop", | |
"cs_stemmer", | |
"cs_stop", | |
"asciifolding", | |
"remove_duplicities" | |
] | |
], | |
"numeric_analyzer" => [ | |
"tokenizer" => "keyword", // returns the entire input string as a single token | |
"filter" => [ | |
"numeric_filter", | |
"123grams", | |
"remove_duplicities" | |
] | |
] | |
] | |
] | |
], | |
"mappings" => [ | |
TABLE => [ | |
"properties" => [ | |
FIELD_NUMBERS_ONLY => [ | |
"type" => "string", | |
"analyzer" => "numeric_analyzer" | |
], | |
FIELD => [ | |
"type" => "string", | |
"analyzer" => "cs_analyzer", | |
"copy_to" => [FIELD_NUMBERS_ONLY] | |
] | |
] | |
] | |
] | |
] | |
]; | |
return $client->indices()->create($params); | |
}); | |
call("Add documents", function (Client $client) { | |
$returns = []; | |
foreach ([ | |
1 => "Josef Novák, +420 123 456 789, josef@novak.test, Ulice 1, Město 123 45", | |
2 => "Anna Nováková, +420 223 456 789, anna.n@posta.test, Ulice 2, Město 123 45", | |
3 => "Pavel Nepovim, +420 323 456 789, pavel@nepovim.test, Za rohem 2, Praha 110 00" | |
] as $id => $data) { | |
$params = [ | |
'index' => DATABASE, | |
'type' => TABLE, | |
'id' => $id, | |
'body' => [FIELD => $data] | |
]; | |
$returns[] = $client->index($params); | |
} | |
return $returns; | |
}); | |
call("Refresh index", function (Client $client) { | |
$params = [ | |
"index" => DATABASE, | |
]; | |
return $client->indices()->refresh($params); | |
}); | |
call("Test numeric_analyzer", function (Client $client) { | |
$params = [ | |
"index" => DATABASE, | |
"analyzer" => "numeric_analyzer", | |
"body" => [ | |
"text" => "Novák Nováková Pavel 456789" | |
] | |
]; | |
return array_map( | |
function ($tokenData) { | |
return $tokenData["token"]; | |
}, | |
$client->indices()->analyze($params)["tokens"] | |
); | |
}); | |
call("Search for a documents", function (Client $client) { | |
foreach ([ | |
[ | |
"body" => ["query" => ["match" => [FIELD => "Novák"]]], | |
"expected" => ["ids" => [1, 2]] | |
], | |
[ | |
"body" => ["query" => ["match" => [FIELD => "Nováková"]]], | |
"expected" => ["ids" => [1, 2]] | |
], | |
[ | |
"body" => ["query" => ["match" => [FIELD => "Pavel"]]], | |
"expected" => ["ids" => [3]] | |
], | |
[ | |
"body" => ["query" => ["match" => [FIELD => "Ulice"]]], | |
"expected" => ["ids" => [1, 2]] | |
], | |
[ | |
"body" => ["query" => ["match" => [FIELD => "Praha"]]], | |
"expected" => ["ids" => [3]] | |
], | |
[ | |
"body" => [ | |
"query" => [ | |
"match" => [FIELD_NUMBERS_ONLY => "456789" | |
] | |
], | |
"min_score" => 1.00 | |
], | |
"expected" => ["ids" => [1, 2, 3]] | |
], | |
[ | |
"body" => [ | |
"query" => [ | |
"match" => [FIELD_NUMBERS_ONLY => "+420323456789"] | |
], | |
"min_score" => 1.00 | |
], | |
"expected" => ["ids" => [3]] | |
], | |
[ | |
"body" => [ | |
"query" => [ | |
"match" => [FIELD_NUMBERS_ONLY => "12345"] | |
], | |
"min_score" => 0.75 | |
], | |
"expected" => ["ids" => [1, 2]] | |
], | |
[ | |
"body" => [ | |
"query" => [ | |
"match" => [FIELD_NUMBERS_ONLY => "123"] | |
], | |
"min_score" => 0.75 | |
], | |
"expected" => ["ids" => [1, 2]] | |
] | |
] as $lookup) { | |
$params = [ | |
"index" => DATABASE, | |
"type" => TABLE, | |
"body" => $lookup["body"] | |
]; | |
$result = $client->search($params); | |
@assert($result["hits"]["total"] == count($lookup["expected"]["ids"])) or printf( | |
"%d is not equal to %d [lookup=%s]\n", | |
$result["hits"]["total"], | |
count($lookup["expected"]["ids"]), | |
json_encode($lookup) | |
); | |
foreach ($result["hits"]["hits"] as $hit) { | |
@assert(in_array($hit["_id"], $lookup["expected"]["ids"])) or printf( | |
"%d is not in array [%s] [lookup=%s]\n", | |
$hit["_id"], | |
implode(", ", $lookup["expected"]["ids"]), | |
json_encode($lookup) | |
); | |
} | |
} | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker-run: | |
make docker-stop | |
sudo docker-compose up | |
docker-stop: | |
sudo docker-compose stop | |
sudo docker stop $$(sudo docker ps -a -q) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
header('Content-Type: text/plain; charset=utf-8'); | |
use Elasticsearch\Client; | |
use Elasticsearch\ClientBuilder; | |
require_once __DIR__ . "/vendor/autoload.php"; | |
function call($label, callable $callable) | |
{ | |
$client = ClientBuilder::create()->setHosts(["elastic.search"])->build(); | |
printf("\n# %s\n\n", $label); | |
var_dump(call_user_func($callable, $client)); | |
printf("\n"); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use Elasticsearch\Client; | |
require_once __DIR__ . "/require_me.php"; | |
call("Index a document", function (Client $client) { | |
$params = [ | |
'index' => 'my_index', | |
'type' => 'my_type', | |
'id' => 'my_id', | |
'body' => ['testField' => 'abc'] | |
]; | |
return $client->index($params); | |
}); | |
call("Get a document", function (Client $client) { | |
$params = [ | |
'index' => 'my_index', | |
'type' => 'my_type', | |
'id' => 'my_id' | |
]; | |
return $client->get($params); | |
}); | |
call("Search for a document", function (Client $client) { | |
$params = [ | |
'index' => 'my_index', | |
'type' => 'my_type', | |
'body' => [ | |
'query' => [ | |
'match' => [ | |
'testField' => 'abc' | |
] | |
] | |
] | |
]; | |
return $client->search($params); | |
}); | |
call("Delete a document", function (Client $client) { | |
$params = [ | |
'index' => 'my_index', | |
'type' => 'my_type', | |
'id' => 'my_id' | |
]; | |
return $client->delete($params); | |
}); | |
call("Delete an index", function (Client $client) { | |
$deleteParams = [ | |
'index' => 'my_index' | |
]; | |
return $client->indices()->delete($deleteParams); | |
}); | |
call("Create an index", function (Client $client) { | |
$params = [ | |
'index' => 'my_index', | |
'body' => [ | |
'settings' => [ | |
'number_of_shards' => 2, | |
'number_of_replicas' => 0 | |
] | |
] | |
]; | |
return $client->indices()->create($params); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment