Created
December 14, 2023 00:55
-
-
Save danielsgriffin/07bbce17cc213d1e56e63a58c08febeb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from datetime import datetime | |
import json | |
import llm # from Simon Willison's https://llm.datasette.io/en/stable/python-api.html | |
import tiktoken # for constraining prompt context length | |
YOU_API_KEY = os.environ.get("YOU_API_KEY") | |
RESULTS_LOG_FILE = "tiyse_results.json" | |
def num_tokens_from_string(string: str) -> int: | |
"""Returns the number of tokens in a text string. | |
Adapted from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb | |
""" | |
encoding = tiktoken.get_encoding("cl100k_base") | |
num_tokens = len(encoding.encode(string)) | |
return num_tokens | |
def add_dicts_to_context(prompt, dicts): | |
token_count = num_tokens_from_string(prompt) | |
for d in dicts: | |
title_tokens = num_tokens_from_string(d['title']) | |
snippet_tokens = num_tokens_from_string(d['description']) | |
if token_count + title_tokens + snippet_tokens > 2000: | |
break | |
prompt += '\n' + d['title'] + ': ' + d['description'] + ' ' | |
token_count += title_tokens + snippet_tokens | |
return prompt | |
def get_results_from_json(query, tool_name): | |
# load RESULTS_LOG_FILE if it exists, else create it | |
if os.path.exists(RESULTS_LOG_FILE): | |
with open(RESULTS_LOG_FILE, "r") as f: | |
RESULTS_LOG = json.load(f) | |
else: | |
RESULTS_LOG = {} | |
# get results from RESULTS_LOG with query as key, then subkeys are the tool name and then timestamp and then results | |
if query in RESULTS_LOG: | |
if tool_name in RESULTS_LOG[query]: | |
# return the most recent results | |
return RESULTS_LOG[query][tool_name][sorted(RESULTS_LOG[query][tool_name].keys())[-1]] | |
else: | |
return None | |
else: | |
return None | |
def save_results_to_json(query, results, tool_name): | |
# load RESULTS_LOG if it exists, else create it | |
if os.path.exists(RESULTS_LOG_FILE): | |
with open(RESULTS_LOG_FILE, "r") as f: | |
RESULTS_LOG = json.load(f) | |
else: | |
RESULTS_LOG = {} | |
if query in RESULTS_LOG: | |
if tool_name in RESULTS_LOG[query]: | |
RESULTS_LOG[query][tool_name][str(datetime.now())] = results | |
else: | |
RESULTS_LOG[query][tool_name] = {str(datetime.now()): results} | |
else: | |
RESULTS_LOG[query] = {tool_name: {str(datetime.now()): results}} | |
with open(RESULTS_LOG_FILE, "w") as f: | |
json.dump(RESULTS_LOG, f, indent=4) | |
def get_ai_snippets_for_query(query): | |
headers = {"X-API-Key": YOU_API_KEY} | |
params = {"query": query} | |
results = requests.get( | |
f"https://api.ydc-index.io/search?query={query}", | |
params=params, | |
headers=headers, | |
).json() | |
save_results_to_json(query, results, "You.com Web Search API") | |
return results | |
# query = input("Enter a query: ") | |
query = "What is answer.ai (from Howard & Ries)?" | |
past_results = get_results_from_json(query, "You.com Web Search API") | |
if past_results: | |
print(f"Results found in {RESULTS_LOG_FILE}") | |
results = past_results | |
else: | |
print(f"Results not found in {RESULTS_LOG_FILE}") | |
_ = input("Press Enter to continue...") | |
results = get_ai_snippets_for_query(query) | |
_ = input("Press Enter to continue...") | |
prompt_string= f"Use the results to help write a brief answer to this question: {query}\n Results: " | |
prompt_string_with_retrieved_context = add_dicts_to_context(prompt_string, results["hits"]) | |
model = llm.get_model("gpt-3.5-turbo") | |
response = model.prompt(prompt_string_with_retrieved_context) | |
print(response.text()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment