danielsgriffin/trying_you_api.py

## trying_you_api.py
import os
import requests
from datetime import datetime
import json

import llm # from Simon Willison's https://llm.datasette.io/en/stable/python-api.html
import tiktoken # for constraining prompt context length

YOU_API_KEY = os.environ.get("YOU_API_KEY")
RESULTS_LOG_FILE = "tiyse_results.json"

def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string.
    Adapted from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
    """
    encoding = tiktoken.get_encoding("cl100k_base")
    num_tokens = len(encoding.encode(string))
    return num_tokens


def add_dicts_to_context(prompt, dicts):
    token_count = num_tokens_from_string(prompt)
    for d in dicts:
        title_tokens = num_tokens_from_string(d['title'])
        snippet_tokens = num_tokens_from_string(d['description'])
        if token_count + title_tokens + snippet_tokens > 2000:
            break
        prompt += '\n' + d['title'] + ': ' + d['description'] + ' '
        token_count += title_tokens + snippet_tokens
    return prompt


def get_results_from_json(query, tool_name):
    # load RESULTS_LOG_FILE if it exists, else create it
    if os.path.exists(RESULTS_LOG_FILE):
        with open(RESULTS_LOG_FILE, "r") as f:
            RESULTS_LOG = json.load(f)
    else:
        RESULTS_LOG = {}

    # get results from RESULTS_LOG with query as key, then subkeys are the tool name and then timestamp and then results
    if query in RESULTS_LOG:
        if tool_name in RESULTS_LOG[query]:
            # return the most recent results
            return RESULTS_LOG[query][tool_name][sorted(RESULTS_LOG[query][tool_name].keys())[-1]]
        else:
            return None
    else:
        return None


def save_results_to_json(query, results, tool_name):
    # load RESULTS_LOG if it exists, else create it
    if os.path.exists(RESULTS_LOG_FILE):
        with open(RESULTS_LOG_FILE, "r") as f:
            RESULTS_LOG = json.load(f)
    else:
        RESULTS_LOG = {}

    if query in RESULTS_LOG:
        if tool_name in RESULTS_LOG[query]:
            RESULTS_LOG[query][tool_name][str(datetime.now())] = results
        else:
            RESULTS_LOG[query][tool_name] = {str(datetime.now()): results}
    else:
        RESULTS_LOG[query] = {tool_name: {str(datetime.now()): results}}

    with open(RESULTS_LOG_FILE, "w") as f:
        json.dump(RESULTS_LOG, f, indent=4)

def get_ai_snippets_for_query(query):
    headers = {"X-API-Key": YOU_API_KEY}
    params = {"query": query}
    results = requests.get(
        f"https://api.ydc-index.io/search?query={query}",
        params=params,
        headers=headers,
    ).json()
    save_results_to_json(query, results, "You.com Web Search API")
    return results

# query = input("Enter a query: ")
query = "What is answer.ai (from Howard & Ries)?"


past_results = get_results_from_json(query, "You.com Web Search API")

if past_results:
    print(f"Results found in {RESULTS_LOG_FILE}")
    results = past_results
else:
    print(f"Results not found in {RESULTS_LOG_FILE}")
    _ = input("Press Enter to continue...")
    results = get_ai_snippets_for_query(query)

_ = input("Press Enter to continue...")
prompt_string= f"Use the results to help write a brief answer to this question: {query}\n Results: "
prompt_string_with_retrieved_context = add_dicts_to_context(prompt_string, results["hits"])

model = llm.get_model("gpt-3.5-turbo")
response = model.prompt(prompt_string_with_retrieved_context)

print(response.text())
	import os
	import requests
	from datetime import datetime
	import json

	import llm # from Simon Willison's https://llm.datasette.io/en/stable/python-api.html
	import tiktoken # for constraining prompt context length

	YOU_API_KEY = os.environ.get("YOU_API_KEY")
	RESULTS_LOG_FILE = "tiyse_results.json"

	def num_tokens_from_string(string: str) -> int:
	"""Returns the number of tokens in a text string.
	Adapted from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
	"""
	encoding = tiktoken.get_encoding("cl100k_base")
	num_tokens = len(encoding.encode(string))
	return num_tokens


	def add_dicts_to_context(prompt, dicts):
	token_count = num_tokens_from_string(prompt)
	for d in dicts:
	title_tokens = num_tokens_from_string(d['title'])
	snippet_tokens = num_tokens_from_string(d['description'])
	if token_count + title_tokens + snippet_tokens > 2000:
	break
	prompt += '\n' + d['title'] + ': ' + d['description'] + ' '
	token_count += title_tokens + snippet_tokens
	return prompt


	def get_results_from_json(query, tool_name):
	# load RESULTS_LOG_FILE if it exists, else create it
	if os.path.exists(RESULTS_LOG_FILE):
	with open(RESULTS_LOG_FILE, "r") as f:
	RESULTS_LOG = json.load(f)
	else:
	RESULTS_LOG = {}

	# get results from RESULTS_LOG with query as key, then subkeys are the tool name and then timestamp and then results
	if query in RESULTS_LOG:
	if tool_name in RESULTS_LOG[query]:
	# return the most recent results
	return RESULTS_LOG[query][tool_name][sorted(RESULTS_LOG[query][tool_name].keys())[-1]]
	else:
	return None
	else:
	return None




	def save_results_to_json(query, results, tool_name):
	# load RESULTS_LOG if it exists, else create it
	if os.path.exists(RESULTS_LOG_FILE):
	with open(RESULTS_LOG_FILE, "r") as f:
	RESULTS_LOG = json.load(f)
	else:
	RESULTS_LOG = {}

	if query in RESULTS_LOG:
	if tool_name in RESULTS_LOG[query]:
	RESULTS_LOG[query][tool_name][str(datetime.now())] = results
	else:
	RESULTS_LOG[query][tool_name] = {str(datetime.now()): results}
	else:
	RESULTS_LOG[query] = {tool_name: {str(datetime.now()): results}}

	with open(RESULTS_LOG_FILE, "w") as f:
	json.dump(RESULTS_LOG, f, indent=4)

	def get_ai_snippets_for_query(query):
	headers = {"X-API-Key": YOU_API_KEY}
	params = {"query": query}
	results = requests.get(
	f"https://api.ydc-index.io/search?query={query}",
	params=params,
	headers=headers,
	).json()
	save_results_to_json(query, results, "You.com Web Search API")
	return results

	# query = input("Enter a query: ")
	query = "What is answer.ai (from Howard & Ries)?"


	past_results = get_results_from_json(query, "You.com Web Search API")

	if past_results:
	print(f"Results found in {RESULTS_LOG_FILE}")
	results = past_results
	else:
	print(f"Results not found in {RESULTS_LOG_FILE}")
	_ = input("Press Enter to continue...")
	results = get_ai_snippets_for_query(query)

	_ = input("Press Enter to continue...")
	prompt_string= f"Use the results to help write a brief answer to this question: {query}\n Results: "
	prompt_string_with_retrieved_context = add_dicts_to_context(prompt_string, results["hits"])

	model = llm.get_model("gpt-3.5-turbo")
	response = model.prompt(prompt_string_with_retrieved_context)

	print(response.text())