Last active
November 2, 2023 03:09
-
-
Save h2rashee/961a4f8b5b7a658c8b9c9f8354aa1ed2 to your computer and use it in GitHub Desktop.
Hebbia.AI: Given a query, passage and scores of the words in the passage, return a single contiguous selection of the passage.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
query = "Why are covid tests useful in the workplace?" | |
passage = "if it does. Ultimately, tests will help ensure that people can get back to work and will help reestablish public confidence in various sectors." | |
scores = [0.09, 0.01, 0.0, 0.9, 1.0, 0.4, 0.2, 0.6, 0.7, 0.8, 0.9, 0.9, 0.8, 0.7, 0.6, 0.02, 0.1, 0.01, 0.5, 0.2, 0.3, 0.0, 0.01, 0.00] | |
# Example of possible picks | |
# 0.9, 1.0, 0.4 | |
# 0.6, 0.7, 0.8, 0.9, 0.9, 0.8, 0.7, 0.6 | |
# # Difference array approach? | |
# [-0.08, -0.01, 0.9, 0.1, -0.6, -0.2, 0.4, 0.1, 0.1, 0.1, 0.1, 0, -0.1, -0.1 ...] | |
MAX_HEURISTIC = 4 | |
def find_best_selection(passage, scores): | |
# split passage and separate words | |
# Set some n | |
# Find the top n scores in the passage | |
# Look for the earliest and latest score index and return that as the selection | |
words = passage.split(' ') | |
n = MAX_HEURISTIC | |
cur_max_list = [] | |
while n > 0: | |
cur_max = 0.0 | |
cur_max_pos = -1 | |
for i in range(len(scores)): | |
if scores[i] > cur_max and i not in cur_max_list: | |
cur_max = scores[i] | |
cur_max_pos = i | |
if cur_max_pos == -1: | |
break | |
cur_max_list.append(cur_max_pos) | |
n = n - 1 | |
cur_max_list.sort() | |
return ' '.join(words[cur_max_list[0] : cur_max_list[-1]+1]) | |
print(find_best_selection(passage, scores)) | |
# Returns: Ultimately, tests will help ensure that people can get |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment