Skip to content

Instantly share code, notes, and snippets.

@danielsgriffin
Created December 14, 2023 00:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielsgriffin/07bbce17cc213d1e56e63a58c08febeb to your computer and use it in GitHub Desktop.
Save danielsgriffin/07bbce17cc213d1e56e63a58c08febeb to your computer and use it in GitHub Desktop.
import os
import requests
from datetime import datetime
import json
import llm # from Simon Willison's https://llm.datasette.io/en/stable/python-api.html
import tiktoken # for constraining prompt context length
YOU_API_KEY = os.environ.get("YOU_API_KEY")
RESULTS_LOG_FILE = "tiyse_results.json"
def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string.
Adapted from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = len(encoding.encode(string))
return num_tokens
def add_dicts_to_context(prompt, dicts):
token_count = num_tokens_from_string(prompt)
for d in dicts:
title_tokens = num_tokens_from_string(d['title'])
snippet_tokens = num_tokens_from_string(d['description'])
if token_count + title_tokens + snippet_tokens > 2000:
break
prompt += '\n' + d['title'] + ': ' + d['description'] + ' '
token_count += title_tokens + snippet_tokens
return prompt
def get_results_from_json(query, tool_name):
# load RESULTS_LOG_FILE if it exists, else create it
if os.path.exists(RESULTS_LOG_FILE):
with open(RESULTS_LOG_FILE, "r") as f:
RESULTS_LOG = json.load(f)
else:
RESULTS_LOG = {}
# get results from RESULTS_LOG with query as key, then subkeys are the tool name and then timestamp and then results
if query in RESULTS_LOG:
if tool_name in RESULTS_LOG[query]:
# return the most recent results
return RESULTS_LOG[query][tool_name][sorted(RESULTS_LOG[query][tool_name].keys())[-1]]
else:
return None
else:
return None
def save_results_to_json(query, results, tool_name):
# load RESULTS_LOG if it exists, else create it
if os.path.exists(RESULTS_LOG_FILE):
with open(RESULTS_LOG_FILE, "r") as f:
RESULTS_LOG = json.load(f)
else:
RESULTS_LOG = {}
if query in RESULTS_LOG:
if tool_name in RESULTS_LOG[query]:
RESULTS_LOG[query][tool_name][str(datetime.now())] = results
else:
RESULTS_LOG[query][tool_name] = {str(datetime.now()): results}
else:
RESULTS_LOG[query] = {tool_name: {str(datetime.now()): results}}
with open(RESULTS_LOG_FILE, "w") as f:
json.dump(RESULTS_LOG, f, indent=4)
def get_ai_snippets_for_query(query):
headers = {"X-API-Key": YOU_API_KEY}
params = {"query": query}
results = requests.get(
f"https://api.ydc-index.io/search?query={query}",
params=params,
headers=headers,
).json()
save_results_to_json(query, results, "You.com Web Search API")
return results
# query = input("Enter a query: ")
query = "What is answer.ai (from Howard & Ries)?"
past_results = get_results_from_json(query, "You.com Web Search API")
if past_results:
print(f"Results found in {RESULTS_LOG_FILE}")
results = past_results
else:
print(f"Results not found in {RESULTS_LOG_FILE}")
_ = input("Press Enter to continue...")
results = get_ai_snippets_for_query(query)
_ = input("Press Enter to continue...")
prompt_string= f"Use the results to help write a brief answer to this question: {query}\n Results: "
prompt_string_with_retrieved_context = add_dicts_to_context(prompt_string, results["hits"])
model = llm.get_model("gpt-3.5-turbo")
response = model.prompt(prompt_string_with_retrieved_context)
print(response.text())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment