Skip to content

Instantly share code, notes, and snippets.

@maliubiao
Created November 4, 2018 10:06
Show Gist options
  • Save maliubiao/17a95ddd9a7c6ff446941fcc7d620bf6 to your computer and use it in GitHub Desktop.
Save maliubiao/17a95ddd9a7c6ff446941fcc7d620bf6 to your computer and use it in GitHub Desktop.
为了更好的理解某些单词的意思 , 需要定位它最恰当使用的上下文, 所以 把几千部文学作品放进elasticsearch, 搜索这个单词, 就可以找到这个上下文
import requests
import os
import json
import sys
import pdb
import subprocess
headers = {
"Content-Type": "application/json"
}
BASE_URL = "http://192.168.2.113:9200/article"
def post_to_es(title, article):
res = requests.post(BASE_URL+"/_doc", headers=headers, data=json.dumps({
"filename": title,
"content": article.decode("gbk", "ignore"),
}))
print res.text
def query(word):
filenames = []
print word
ret = requests.get(BASE_URL+"/_search", params={
"q": '"%s"' % word,
}, headers=headers).json()
for hit in ret["hits"]["hits"]:
filename = hit["_source"]["filename"]
print hit["_score"], filename
filenames.append(filename)
print filenames[0]
p = subprocess.Popen([
'C:\\Program Files (x86)\\Notepad++\\notepad++.exe', filenames[0]])
p.wait()
def create_index():
content = {
"mappings": {
"_doc": {
"properties": {
"filename": {
"type": "text"
},
"content": {
"type": "text"
}
}
}
}
}
res = requests.put(BASE_URL, data=json.dumps(content), headers=headers)
print res.text
def send_docs():
for base, dirs, files in os.walk("."):
for file in files:
if file.lower().endswith(".txt"):
fullpath = os.path.join(base, file)
print fullpath
post_to_es(fullpath, open(fullpath).read())
if __name__ == "__main__":
arg1 = sys.argv[1]
if arg1 == "index":
create_index()
elif arg1 == "docs":
send_docs()
elif arg1 == "del":
requests.delete(BASE_URL)
elif arg1 == "q":
query(sys.argv[2])
else:
print "unknown"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment