Skip to content

Instantly share code, notes, and snippets.

@ikegami-yukino
Last active March 26, 2020 04:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikegami-yukino/bf83daa4dc4f0e3d4787 to your computer and use it in GitHub Desktop.
Save ikegami-yukino/bf83daa4dc4f0e3d4787 to your computer and use it in GitHub Desktop.
Elasticsearch同義語辞書確認用コード
import os
import shutil
import tempfile
import tcptest
from elasticsearch import Elasticsearch
SYNONYMS_PATH = "/tmp/wikipedia_synonym.txt"
settings = {
'settings': {
'analysis': {
'analyzer': {
'ma': {'type': 'custom', 'tokenizer': 'kuromoji', "filter" : ["synonym"]}
},
'tokenizer': {
'kuromoji': {'type': 'kuromoji_neodocd_tokenizer', 'mode': 'search'}
},
"filter": {
"synonym": {"type": "synonym", "synonyms_path": SYNONYMS_PATH}
}
}
}
}
mapping = {
'doc': {
'properties': {
'text': {'type': 'string', 'index': 'analyzed', 'analyzer': 'ma'}
}
}
}
class ESTestServer(tcptest.TestServer):
def build_command(self):
return ('elasticsearch',
'-Des.network.bind_host=127.0.0.1',
'-Des.network.host=127.0.0.1',
'-Des.http.port=%s' % self.port,
"-Des.node.master=true",
"-Des.node.local=true",
'-Des.path.data=%s' % self.data_dir,
'-Des.path.logs=%s' % self.logs_dir
)
def _before_start(self):
self.data_dir = tempfile.mkdtemp(prefix='esdata')
self.logs_dir = tempfile.mkdtemp(prefix='eslogs')
def _after_stop(self):
for path in filter(os.path.exists, (self.data_dir, self.logs_dir)):
shutil.rmtree(path)
with ESTestServer(timeout=30) as server:
es = Elasticsearch(['localhost:%s' % server.port])
es.indices.create(index='test_index', body=settings, request_timeout=30)
es.indices.put_mapping(index='test_index', doc_type='doc', body=mapping)
es.indices.refresh(index='test_index')
res = es.index(index="test_index", doc_type='doc', id=1, body={'text': 'モッツァレッラチーズを食べました'})
es.indices.refresh(index="test_index")
res = es.search(index="test_index", body={
"query": {
"simple_query_string": {
"query": "モッツァレラチーズを食べました",
"fields": ["text"]
}
}
})
print("%d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
print("%(text)s" % hit["_source"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment