Skip to content

Instantly share code, notes, and snippets.

@petrknap
Last active December 2, 2016 09:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save petrknap/8ff88a2827af894a20c196faed1992a3 to your computer and use it in GitHub Desktop.
Save petrknap/8ff88a2827af894a20c196faed1992a3 to your computer and use it in GitHub Desktop.
Elasticsearch sandbox
/.idea/
/vendor/
/composer.lock
Options +Indexes
<?php
use Elasticsearch\Client;
use Elasticsearch\Common\Exceptions\Missing404Exception;
require_once __DIR__ . "/require_me.php";
define("INDEX_AKA_DATABASE", "diakritika");
define("TYPE_AKA_TABLE_NAME", "diakritika");
define("FULLTEXT", "_fulltext_");
define("FULLTEXT_TITLE", "_fulltext_title_");
define("FULLTEXT_CONTENT", "_fulltext_content_");
call("Delete index", function (Client $client) {
try {
$params = [
"index" => INDEX_AKA_DATABASE,
];
return $client->indices()->delete($params);
} catch (Missing404Exception $e) {
return json_decode($e->getMessage());
}
});
call("Create index", function (Client $client) {
$params = [
"index" => INDEX_AKA_DATABASE,
"body" => [
"settings" => [
"analysis" => [
"filter" => [
"cs_stop" => [
"type" => "stop",
"stopwords" => "_czech_"
],
"cs_stemmer" => [
"type" => "stemmer",
"language" => "czech"
],
"remove_duplicities" => [
"type" => "unique"
]
],
"analyzer" => [
"cs_analyzer" => [
"tokenizer" => "standard",
"filter" => [
"lowercase",
"cs_stop",
"cs_stemmer",
"cs_stop",
"asciifolding",
"remove_duplicities"
]
]
]
]
],
"mappings" => [
TYPE_AKA_TABLE_NAME => [
"properties" => [
FULLTEXT => [
"type" => "string",
"analyzer" => "cs_analyzer"
],
FULLTEXT_TITLE => [
"type" => "string",
"analyzer" => "cs_analyzer"
],
FULLTEXT_CONTENT => [
"type" => "string",
"analyzer" => "cs_analyzer"
],
"title" => [
"type" => "string",
"copy_to" => [FULLTEXT, FULLTEXT_TITLE]
],
"content" => [
"type" => "string",
"copy_to" => [FULLTEXT, FULLTEXT_CONTENT]
],
]
]
]
]
];
return $client->indices()->create($params);
});
call("Index documents", function (Client $client) {
$returns = [];
foreach ([
001 => [
"title" => "Zpěv žlutého koně",
"content" => "Příliš žluťoučký kůň pěl ďábelské ódy. Byl to dlouhý den."
],
002 => [
"title" => "Příběh mokrého koně (zdarma)",
"content" => "Kůň s dlouhou hřívou vylezl z řeky."
],
101 => [
"title" => "pán hrad město žena",
"content" => "pánové hrady města ženy"
],
102 => [
"title" => "pána hradu města ženy",
"content" => "pánů hradů měst žen"
],
103 => [
"title" => "pánovi hradu městu ženě",
"content" => "pánům hradům městům ženám"
],
104 => [
"title" => "pána hrad město ženu",
"content" => "pány hrady města ženy"
],
105 => [
"title" => "pane hrade město ženo",
"content" => "páni hrady města ženy"
],
106 => [
"title" => "pánu hradě městě ženě",
"content" => "pánech hradech městech ženách"
],
107 => [
"title" => "pánem hradem městem ženou",
"content" => "pány hrady městy ženami"
]
] as $id => $document) {
$params = [
"index" => INDEX_AKA_DATABASE,
"type" => TYPE_AKA_TABLE_NAME,
"id" => $id,
"body" => $document
];
$returns[] = $client->index($params);
}
return $returns;
});
call("Refresh index", function (Client $client) {
$params = [
"index" => INDEX_AKA_DATABASE,
];
return $client->indices()->refresh($params);
});
call("Test analyzer", function (Client $client) {
$params = [
"index" => INDEX_AKA_DATABASE,
"analyzer" => "cs_analyzer",
"body" => [
"text" => "dlouhá dlouhý dlouhé zpěv zpev příběh pribeh koně kone kůň kun"
]
];
return $client->indices()->analyze($params);
});
call("Search for a documents", function (Client $client) {
foreach ([
[
"query" => ["match" => [FULLTEXT => "zpěv"]],
"expected" => ["ids" => [001]]
],
[
"query" => ["match" => [FULLTEXT => "příběh"]],
"expected" => ["ids" => [002]]
],
[
"query" => ["match" => [FULLTEXT => "koně"]],
"expected" => ["ids" => [001, 002]]
],
[
"query" => ["match" => [FULLTEXT => "kůň"]],
"expected" => ["ids" => [001, 002]]
],
[
"query" => ["match" => [FULLTEXT => "zpev"]],
"expected" => ["ids" => [001]]
],
[
"query" => ["match" => [FULLTEXT => "pribeh"]],
"expected" => ["ids" => [002]]
],
[
"query" => ["match" => [FULLTEXT => "kone"]],
"expected" => ["ids" => [001, 002]]
],
[
"query" => ["match" => [FULLTEXT => "kun"]],
"expected" => ["ids" => [001, 002]]
],
[
"query" => ["match" => [FULLTEXT => "dlouhá"]],
"expected" => ["ids" => [001, 002]]
],
[
"query" => ["match" => [FULLTEXT => "hrad"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_TITLE => "hrad"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_CONTENT => "hrad"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT => "pán"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_TITLE => "pán"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_CONTENT => "pán"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT => "město"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_TITLE => "město"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_CONTENT => "město"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT => "žena"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_TITLE => "žena"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_CONTENT => "žena"]],
"expected" => ["ids" => [101, 102, 103, 104, 105, 106, 107]]
],
[
"query" => ["match" => [FULLTEXT_TITLE => "zdarma"]],
"expected" => ["ids" => [002]]
],
[
"query" => ["match" => [FULLTEXT_TITLE => "příběh zdrama"]],
"expected" => ["ids" => [002]]
],
[
"query" => [
"match" => [
FULLTEXT_TITLE => [
"query" => "zdrama",
"fuzziness" => 2 // AUTO doesn't work (length 5 -> fuzziness: 1)
]
]
],
"expected" => ["ids" => [002]]
]
] as $lookup) {
$params = [
"index" => INDEX_AKA_DATABASE,
"type" => TYPE_AKA_TABLE_NAME,
"body" => [
"query" => $lookup["query"]
]
];
$result = $client->search($params);
@assert($result["hits"]["total"] == count($lookup["expected"]["ids"])) or printf(
"%d is not equal to %d [lookup=%s]\n",
$result["hits"]["total"],
count($lookup["expected"]["ids"]),
json_encode($lookup)
);
foreach ($result["hits"]["hits"] as $hit) {
@assert(in_array($hit["_id"], $lookup["expected"]["ids"])) or printf(
"%d is not in array [%s] [lookup=%s]\n",
$hit["_id"],
implode(", ", $lookup["expected"]["ids"]),
json_encode($lookup)
);
}
}
});
{
"name": "dash/elasticsearch",
"authors": [
{
"name": "Petr Knap",
"email": "dev@petrknap.cz"
}
],
"require": {
"elasticsearch/elasticsearch": "^5.0"
}
}
elasticsearch:
image: elasticsearch:2.3.5
web:
image: php:5.6-apache
links:
- elasticsearch:elastic.search
ports:
- 127.0.0.1:80:80
volumes:
- .:/var/www/html
<?php
use Elasticsearch\Client;
use Elasticsearch\Common\Exceptions\Missing404Exception;
require_once __DIR__ . "/require_me.php";
define("DATABASE", "test");
define("TABLE", "contacts");
define("FIELD", "data");
define("FIELD_NUMBERS_ONLY", "data_numbers");
call("Delete index", function (Client $client) {
try {
$params = [
"index" => DATABASE,
];
return $client->indices()->delete($params);
} catch (Missing404Exception $e) {
return json_decode($e->getMessage());
}
});
call("Create index", function (Client $client) {
$params = [
"index" => DATABASE,
"body" => [
"settings" => [
"analysis" => [
"filter" => [
"cs_stop" => [
"type" => "stop",
"stopwords" => "_czech_"
],
"cs_stemmer" => [
"type" => "stemmer",
"language" => "czech"
],
"numeric_filter" => [
"type" => "pattern_replace",
"pattern" => '([^\d]*)',
"replacement" => ""
],
"123grams" => [
"type" => "ngram",
"min_gram" => 1,
"max_gram" => 3,
"preserve_original" => true
],
"remove_duplicities" => [
"type" => "unique"
]
],
"analyzer" => [
"cs_analyzer" => [
"tokenizer" => "standard",
"filter" => [
"lowercase",
"cs_stop",
"cs_stemmer",
"cs_stop",
"asciifolding",
"remove_duplicities"
]
],
"numeric_analyzer" => [
"tokenizer" => "keyword", // returns the entire input string as a single token
"filter" => [
"numeric_filter",
"123grams",
"remove_duplicities"
]
]
]
]
],
"mappings" => [
TABLE => [
"properties" => [
FIELD_NUMBERS_ONLY => [
"type" => "string",
"analyzer" => "numeric_analyzer"
],
FIELD => [
"type" => "string",
"analyzer" => "cs_analyzer",
"copy_to" => [FIELD_NUMBERS_ONLY]
]
]
]
]
]
];
return $client->indices()->create($params);
});
call("Add documents", function (Client $client) {
$returns = [];
foreach ([
1 => "Josef Novák, +420 123 456 789, josef@novak.test, Ulice 1, Město 123 45",
2 => "Anna Nováková, +420 223 456 789, anna.n@posta.test, Ulice 2, Město 123 45",
3 => "Pavel Nepovim, +420 323 456 789, pavel@nepovim.test, Za rohem 2, Praha 110 00"
] as $id => $data) {
$params = [
'index' => DATABASE,
'type' => TABLE,
'id' => $id,
'body' => [FIELD => $data]
];
$returns[] = $client->index($params);
}
return $returns;
});
call("Refresh index", function (Client $client) {
$params = [
"index" => DATABASE,
];
return $client->indices()->refresh($params);
});
call("Test numeric_analyzer", function (Client $client) {
$params = [
"index" => DATABASE,
"analyzer" => "numeric_analyzer",
"body" => [
"text" => "Novák Nováková Pavel 456789"
]
];
return array_map(
function ($tokenData) {
return $tokenData["token"];
},
$client->indices()->analyze($params)["tokens"]
);
});
call("Search for a documents", function (Client $client) {
foreach ([
[
"body" => ["query" => ["match" => [FIELD => "Novák"]]],
"expected" => ["ids" => [1, 2]]
],
[
"body" => ["query" => ["match" => [FIELD => "Nováková"]]],
"expected" => ["ids" => [1, 2]]
],
[
"body" => ["query" => ["match" => [FIELD => "Pavel"]]],
"expected" => ["ids" => [3]]
],
[
"body" => ["query" => ["match" => [FIELD => "Ulice"]]],
"expected" => ["ids" => [1, 2]]
],
[
"body" => ["query" => ["match" => [FIELD => "Praha"]]],
"expected" => ["ids" => [3]]
],
[
"body" => [
"query" => [
"match" => [FIELD_NUMBERS_ONLY => "456789"
]
],
"min_score" => 1.00
],
"expected" => ["ids" => [1, 2, 3]]
],
[
"body" => [
"query" => [
"match" => [FIELD_NUMBERS_ONLY => "+420323456789"]
],
"min_score" => 1.00
],
"expected" => ["ids" => [3]]
],
[
"body" => [
"query" => [
"match" => [FIELD_NUMBERS_ONLY => "12345"]
],
"min_score" => 0.75
],
"expected" => ["ids" => [1, 2]]
],
[
"body" => [
"query" => [
"match" => [FIELD_NUMBERS_ONLY => "123"]
],
"min_score" => 0.75
],
"expected" => ["ids" => [1, 2]]
]
] as $lookup) {
$params = [
"index" => DATABASE,
"type" => TABLE,
"body" => $lookup["body"]
];
$result = $client->search($params);
@assert($result["hits"]["total"] == count($lookup["expected"]["ids"])) or printf(
"%d is not equal to %d [lookup=%s]\n",
$result["hits"]["total"],
count($lookup["expected"]["ids"]),
json_encode($lookup)
);
foreach ($result["hits"]["hits"] as $hit) {
@assert(in_array($hit["_id"], $lookup["expected"]["ids"])) or printf(
"%d is not in array [%s] [lookup=%s]\n",
$hit["_id"],
implode(", ", $lookup["expected"]["ids"]),
json_encode($lookup)
);
}
}
});
docker-run:
make docker-stop
sudo docker-compose up
docker-stop:
sudo docker-compose stop
sudo docker stop $$(sudo docker ps -a -q)
<?php
header('Content-Type: text/plain; charset=utf-8');
use Elasticsearch\Client;
use Elasticsearch\ClientBuilder;
require_once __DIR__ . "/vendor/autoload.php";
function call($label, callable $callable)
{
$client = ClientBuilder::create()->setHosts(["elastic.search"])->build();
printf("\n# %s\n\n", $label);
var_dump(call_user_func($callable, $client));
printf("\n");
}
<?php
use Elasticsearch\Client;
require_once __DIR__ . "/require_me.php";
call("Index a document", function (Client $client) {
$params = [
'index' => 'my_index',
'type' => 'my_type',
'id' => 'my_id',
'body' => ['testField' => 'abc']
];
return $client->index($params);
});
call("Get a document", function (Client $client) {
$params = [
'index' => 'my_index',
'type' => 'my_type',
'id' => 'my_id'
];
return $client->get($params);
});
call("Search for a document", function (Client $client) {
$params = [
'index' => 'my_index',
'type' => 'my_type',
'body' => [
'query' => [
'match' => [
'testField' => 'abc'
]
]
]
];
return $client->search($params);
});
call("Delete a document", function (Client $client) {
$params = [
'index' => 'my_index',
'type' => 'my_type',
'id' => 'my_id'
];
return $client->delete($params);
});
call("Delete an index", function (Client $client) {
$deleteParams = [
'index' => 'my_index'
];
return $client->indices()->delete($deleteParams);
});
call("Create an index", function (Client $client) {
$params = [
'index' => 'my_index',
'body' => [
'settings' => [
'number_of_shards' => 2,
'number_of_replicas' => 0
]
]
];
return $client->indices()->create($params);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment