nheingit/main.py

## main.py
import os
import logging
from telegram import Update
from telegram.ext import ApplicationBuilder, ContextTypes, CommandHandler
from openai import OpenAI
import pandas as pd
import numpy as np

from questions import answer_question

openai = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
tg_bot_token = os.environ['TG_BOT_TOKEN']

df = pd.read_csv('processed/embeddings.csv', index_col=0)
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)

messages = [{
    "role": "system",
    "content": "You are a helpful assistant that answers questions."
}]

logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO)


async def chat(update: Update, context: ContextTypes.DEFAULT_TYPE):
  messages.append({"role": "user", "content": update.message.text})
  completion = openai.chat.completions.create(model="gpt-3.5-turbo",
                                              messages=messages)
  completion_answer = completion.choices[0].message
  messages.append(completion_answer)

  await context.bot.send_message(chat_id=update.effective_chat.id,
                                 text=completion_answer.content)

async def mozilla(update: Update, context: ContextTypes.DEFAULT_TYPE):
      answer = answer_question(df, question=update.message.text, debug=True)
      await context.bot.send_message(chat_id=update.effective_chat.id, text=answer)


async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
  await context.bot.send_message(chat_id=update.effective_chat.id,
                                 text="I'm a bot, please talk to me!")


if __name__ == '__main__':
  application = ApplicationBuilder().token(tg_bot_token).build()

  start_handler = CommandHandler('start', start)
  chat_handler = CommandHandler('chat', chat)

  mozilla_handler = CommandHandler('mozilla', mozilla)
  application.add_handler(mozilla_handler)

  application.add_handler(start_handler)
  application.add_handler(chat_handler)

  application.run_polling()

## questions.py
import numpy as np
import pandas as pd
from openai import OpenAI
import os
from typing import List
from scipy import spatial


def distances_from_embeddings(
    query_embedding: List[float],
    embeddings: List[List[float]],
    distance_metric="cosine",
) -> List[float]:
  """Return the distances between a query embedding and a list of embeddings."""
  distance_metrics = {
      "cosine": spatial.distance.cosine,
      "L1": spatial.distance.cityblock,
      "L2": spatial.distance.euclidean,
      "Linf": spatial.distance.chebyshev,
  }
  distances = [
      distance_metrics[distance_metric](query_embedding, embedding)
      for embedding in embeddings
  ]
  return distances


openai = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

df = pd.read_csv('processed/embeddings.csv', index_col=0)
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)


def create_context(question, df, max_len=1800):
  """
    Create a context for a question by finding the most similar context from the dataframe
    """
  # Get the embeddings for the question
  q_embeddings = openai.embeddings.create(
      input=question, model='text-embedding-ada-002').data[0].embedding

  # Get the distances from the embeddings
  df['distances'] = distances_from_embeddings(q_embeddings,
                                              df['embeddings'].values,
                                              distance_metric='cosine')

  returns = []
  cur_len = 0

  # Sort by distance and add the text to the context until the context is too long
  for i, row in df.sort_values('distances', ascending=True).iterrows():
    # Add the length of the text to the current length
    cur_len += row['n_tokens'] + 4

    # If the context is too long, break
    if cur_len > max_len:
      break

    # Else add it to the text that is being returned
    returns.append(row["text"])

  # Return the context
  return "\n\n###\n\n".join(returns)


def answer_question(df,
                    model="gpt-3.5-turbo-1106",
                    question="What is the meaning of life?",
                    max_len=1800,
                    debug=False,
                    max_tokens=150,
                    stop_sequence=None):
  """
    Answer a question based on the most similar context from the dataframe texts
    """
  context = create_context(
      question,
      df,
      max_len=max_len,
  )
  # If debug, print the raw model response
  if debug:
    print("Context:\n" + context)
    print("\n\n")

  try:
    # Create a completions using the question and context
    response = openai.chat.completions.create(
        model=model,
        messages=[{
            "role":
            "user",
            "content":
            f"Answer the question based on the context below, and if the question can't be answered based on the context, say \"I don't know.\" Try to site sources to the links in the context when possible.\n\nContext: {context}\n\n---\n\nQuestion: {question}\nAnswer:",
        }],
        temperature=0,
        max_tokens=max_tokens,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=stop_sequence,
    )
    return response.choices[0].message.content
  except Exception as e:
    print(e)
    return ""
	import os
	import logging
	from telegram import Update
	from telegram.ext import ApplicationBuilder, ContextTypes, CommandHandler
	from openai import OpenAI
	import pandas as pd
	import numpy as np

	from questions import answer_question

	openai = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
	tg_bot_token = os.environ['TG_BOT_TOKEN']

	df = pd.read_csv('processed/embeddings.csv', index_col=0)
	df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)

	messages = [{
	"role": "system",
	"content": "You are a helpful assistant that answers questions."
	}]

	logging.basicConfig(
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	level=logging.INFO)


	async def chat(update: Update, context: ContextTypes.DEFAULT_TYPE):
	messages.append({"role": "user", "content": update.message.text})
	completion = openai.chat.completions.create(model="gpt-3.5-turbo",
	messages=messages)
	completion_answer = completion.choices[0].message
	messages.append(completion_answer)

	await context.bot.send_message(chat_id=update.effective_chat.id,
	text=completion_answer.content)

	async def mozilla(update: Update, context: ContextTypes.DEFAULT_TYPE):
	answer = answer_question(df, question=update.message.text, debug=True)
	await context.bot.send_message(chat_id=update.effective_chat.id, text=answer)


	async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
	await context.bot.send_message(chat_id=update.effective_chat.id,
	text="I'm a bot, please talk to me!")


	if __name__ == '__main__':
	application = ApplicationBuilder().token(tg_bot_token).build()

	start_handler = CommandHandler('start', start)
	chat_handler = CommandHandler('chat', chat)

	mozilla_handler = CommandHandler('mozilla', mozilla)
	application.add_handler(mozilla_handler)

	application.add_handler(start_handler)
	application.add_handler(chat_handler)

	application.run_polling()