Skip to content

Instantly share code, notes, and snippets.

@kaeton
Created May 17, 2019 08:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaeton/c2c07350f0f43202817cd02a8efa6972 to your computer and use it in GitHub Desktop.
Save kaeton/c2c07350f0f43202817cd02a8efa6972 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
import json
# import joblib
import requests
# result1 = open('result1.json')
# result2 = open('result2.json')
# result3 = open('result3.json')
# result4 = open('result4.json')
# result5 = open('result5.json')
# result1js = json.load(result1)
# result2js = json.load(result2)
# result3js = json.load(result3)
# result4js = json.load(result4)
# result5js = json.load(result5)
class SearchSimularity():
'''
今回azure searchを使うにあたり、basic tierでの使用をしたかった。
そのため、ストレージなどの兼ね合いがあり、インデクサを複数用意したのでこのように複数検索クエリを叩いて、
そのトップのみを評価するという構成にしてある。
'''
def __init__(self):
self.blob_index = [
"azureblob-index",
"azureblob-index2",
"azureblob-index3",
"azureblob-index4",
"azureblob-index5"
]
self.searchURLs = "https://wiki-redirect.search.windows.net/indexes/"
self.apiversion = "/docs?api-version=2019-05-06&search="
self.optiontext = "&$top=1"
# self.estimate_searchresults = [result1js,result2js,result3js,result4js,result5js]
# def compair_each_blob_searchscore(self, resultjson:list):
'''
5つのインデクサを走らせた検索結果(それぞれのトップ)をjsonarrayで格納しておいてあるのが引数の想定。
その後、そのそれぞれのjsonよりsearch.scoreが最も高かった検索結果のcontext(類似語)を返す
'''
def compair_each_blob_searchscore(self):
# for searchresult in resultjson:
# resultdata = json.load(searchresult)
searchscores = [x["value"][0]["@search.score"] for x in self.estimate_searchresults]
max_searchscore_index = searchscores.index(max(searchscores))
return self.estimate_searchresults[max_searchscore_index]["value"][0]["context"]
def execute_azure_search_query(self, url:str):
item = requests.get(url)
return json.load(item.text)
def search_each_url(self, word:str):
topjsoncontent = []
for index in self.blob_index:
azure_query = self.searchURLs + index + self.apiversion + word + self.optiontext
# print(azure_query)
topjsoncontent.append(self.execute_azure_search_query(azure_query))
top_synonym = self.compair_each_blob_searchscore(topjsoncontent)
print(top_synonym)
def search_simularity(self, searchwords:list):
for word in searchwords:
self.search_each_url(word)
if __name__ == "__main__":
simularity_searcher = SearchSimularity()
simularity_searcher.search_simularity(searchwords=['hoge', 'hogehogehoge', "マミさん"])
# simularity_searcher.compair_each_blob_searchscore()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment