kaeton/search_synonym.py

## search_synonym.py
#!/usr/bin/env python
# coding: utf-8

import json
# import joblib
import requests

# result1 = open('result1.json')
# result2 = open('result2.json')
# result3 = open('result3.json')
# result4 = open('result4.json')
# result5 = open('result5.json')
# result1js = json.load(result1)
# result2js = json.load(result2)
# result3js = json.load(result3)
# result4js = json.load(result4)
# result5js = json.load(result5)

class SearchSimularity():
    '''
    今回azure searchを使うにあたり、basic tierでの使用をしたかった。
    そのため、ストレージなどの兼ね合いがあり、インデクサを複数用意したのでこのように複数検索クエリを叩いて、
    そのトップのみを評価するという構成にしてある。
    '''
    def __init__(self):
        self.blob_index = [
            "azureblob-index",
            "azureblob-index2",
            "azureblob-index3",
            "azureblob-index4",
            "azureblob-index5"
        ]
        self.searchURLs = "https://wiki-redirect.search.windows.net/indexes/"
        self.apiversion = "/docs?api-version=2019-05-06&search="
        self.optiontext = "&$top=1"
        # self.estimate_searchresults = [result1js,result2js,result3js,result4js,result5js]

    # def compair_each_blob_searchscore(self, resultjson:list):
    '''
    ５つのインデクサを走らせた検索結果（それぞれのトップ）をjsonarrayで格納しておいてあるのが引数の想定。
    その後、そのそれぞれのjsonよりsearch.scoreが最も高かった検索結果のcontext(類似語)を返す
    '''
    def compair_each_blob_searchscore(self):
        # for searchresult in resultjson:
            # resultdata = json.load(searchresult)
        searchscores = [x["value"][0]["@search.score"] for x in self.estimate_searchresults]
        max_searchscore_index = searchscores.index(max(searchscores))
        return self.estimate_searchresults[max_searchscore_index]["value"][0]["context"]

    def execute_azure_search_query(self, url:str):
        item = requests.get(url)
        return json.load(item.text)

    def search_each_url(self, word:str):
        topjsoncontent = []
        for index in self.blob_index:
            azure_query = self.searchURLs + index + self.apiversion + word + self.optiontext
            # print(azure_query)
            topjsoncontent.append(self.execute_azure_search_query(azure_query))

        top_synonym = self.compair_each_blob_searchscore(topjsoncontent)
        print(top_synonym)

    def search_simularity(self, searchwords:list):
        for word in searchwords:
            self.search_each_url(word)

if __name__ == "__main__":
    simularity_searcher = SearchSimularity()
    simularity_searcher.search_simularity(searchwords=['hoge', 'hogehogehoge', "マミさん"])
    # simularity_searcher.compair_each_blob_searchscore()
	#!/usr/bin/env python
	# coding: utf-8

	import json
	# import joblib
	import requests

	# result1 = open('result1.json')
	# result2 = open('result2.json')
	# result3 = open('result3.json')
	# result4 = open('result4.json')
	# result5 = open('result5.json')
	# result1js = json.load(result1)
	# result2js = json.load(result2)
	# result3js = json.load(result3)
	# result4js = json.load(result4)
	# result5js = json.load(result5)

	class SearchSimularity():
	'''
	今回azure searchを使うにあたり、basic tierでの使用をしたかった。
	そのため、ストレージなどの兼ね合いがあり、インデクサを複数用意したのでこのように複数検索クエリを叩いて、
	そのトップのみを評価するという構成にしてある。
	'''
	def __init__(self):
	self.blob_index = [
	"azureblob-index",
	"azureblob-index2",
	"azureblob-index3",
	"azureblob-index4",
	"azureblob-index5"
	]
	self.searchURLs = "https://wiki-redirect.search.windows.net/indexes/"
	self.apiversion = "/docs?api-version=2019-05-06&search="
	self.optiontext = "&$top=1"
	# self.estimate_searchresults = [result1js,result2js,result3js,result4js,result5js]

	# def compair_each_blob_searchscore(self, resultjson:list):
	'''
	５つのインデクサを走らせた検索結果（それぞれのトップ）をjsonarrayで格納しておいてあるのが引数の想定。
	その後、そのそれぞれのjsonよりsearch.scoreが最も高かった検索結果のcontext(類似語)を返す
	'''
	def compair_each_blob_searchscore(self):
	# for searchresult in resultjson:
	# resultdata = json.load(searchresult)
	searchscores = [x["value"][0]["@search.score"] for x in self.estimate_searchresults]
	max_searchscore_index = searchscores.index(max(searchscores))
	return self.estimate_searchresults[max_searchscore_index]["value"][0]["context"]

	def execute_azure_search_query(self, url:str):
	item = requests.get(url)
	return json.load(item.text)

	def search_each_url(self, word:str):
	topjsoncontent = []
	for index in self.blob_index:
	azure_query = self.searchURLs + index + self.apiversion + word + self.optiontext
	# print(azure_query)
	topjsoncontent.append(self.execute_azure_search_query(azure_query))

	top_synonym = self.compair_each_blob_searchscore(topjsoncontent)
	print(top_synonym)

	def search_simularity(self, searchwords:list):
	for word in searchwords:
	self.search_each_url(word)

	if __name__ == "__main__":
	simularity_searcher = SearchSimularity()
	simularity_searcher.search_simularity(searchwords=['hoge', 'hogehogehoge', "マミさん"])
	# simularity_searcher.compair_each_blob_searchscore()