lemire/web.py

## web.py
#################
# This starts a web server listening on port 8001, with debugging turned n.
# This should not be be used to run the chatbot on a public website: it is meant
# for testing purposes only.
#################

from flask import Flask, request, jsonify
from flask import Flask, render_template, request, url_for
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document

from openai.embeddings_utils import get_embedding, cosine_similarity
import os
import pandas
import openai
import markdown
import re

with open("secret.txt", "r") as file:
    secret = file.read().strip()
os.environ["OPENAI_API_KEY"] = secret
openai.api_key = secret

ips = []
ips_times = []

ips_ref = []
ips_times_ref = []

from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate


llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']


def logic(question):
    # Uses UTF-8 encoding by default:
    all_files = [f for f in os.listdir('.') if re.match(r'embs.*\.csv', f)]
    df = pandas.concat((pandas.read_csv(f) for f in all_files), ignore_index=True)
    embs = []
    for r1 in range(len(df.embedding)): # Changing the format of the embeddings into a list due to a parsing error
        e1 = df.embedding[r1].split(",")
        for ei2 in range(len(e1)):
            e1[ei2] = float(e1[ei2].strip().replace("[", "").replace("]", ""))
        embs.append(e1)

    df["embedding"] = embs
    product_embedding = get_embedding( # Creating an embedding for the question that's been asked
        question
    )
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding)) # Finds the relevance of each piece of data in context of the question
    df2 = df.sort_values("similarity", ascending=False) # Sorts the text chunks based on how relevant they are to finding the answer to the question
    comb = [df2["combined"].iloc[0][2:-2] +"\n"+ df2["combined"].iloc[1][2:-2]]
    docs = [Document(page_content=t) for t in comb] # Gets the most relevant text chunk
    prompt_template = question + """

    {text}

    """
    PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
    chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT) # Preparing the LLM
    output = chain.run(docs) # Formulating an answer (this is where the magic happens)
    return output


app = Flask(__name__)
@app.route("/")
def home():
    return render_template("bot.html") # Renders the webpage

@app.route('/chat', methods=['POST']) # Listens to incoming requests

def chat(): # The function that receives questions and sends answers from the chatbot
    user_message = request.json['message'] # Receives the question
    pm = request.json['pm'] # Receives the previous question that has been asked by the user (needed for follow up questions)
    try:
      response = logic(user_message) # Creates a response from the ChatBot
    except:
      return jsonify({'message': 'Désolé, je n\'ai pas compris votre question. Veuillez reformuler votre question.'})
    if ("sorry" in response.lower()) or ("provide more" in response.lower()) or ("not found" in response.lower()) or ("does not mention" in response.lower()) or ("does not reference" in response.lower()) or ("no information" in response.lower()) or ("not enough information" in response.lower()) or ("unable to provide" in response.lower()) or ("the guidelines do not" in response.lower()):
        response = logic(str(pm + ' ' + user_message)) # If the ChatBot isn't able to answer the question, it uses the previous question to make an answer in case it's a follow up question
    response = response.replace("<", "").replace(">", "") # Clean the response from any HTML tags
    response = markdown.markdown(response, extensions=['fenced_code']) # Process the response as Markdown
    return jsonify({'message': response}) # Finally returns the response

if __name__ == "__main__":
    app.run(host="localhost", port=8001, debug=True) # Runs the ChatBot on port:8001 (you can use a different one)
	#################
	# This starts a web server listening on port 8001, with debugging turned n.
	# This should not be be used to run the chatbot on a public website: it is meant
	# for testing purposes only.
	#################

	from flask import Flask, request, jsonify
	from flask import Flask, render_template, request, url_for
	from langchain.chat_models import ChatOpenAI
	from langchain.docstore.document import Document

	from openai.embeddings_utils import get_embedding, cosine_similarity
	import os
	import pandas
	import openai
	import markdown
	import re

	with open("secret.txt", "r") as file:
	secret = file.read().strip()
	os.environ["OPENAI_API_KEY"] = secret
	openai.api_key = secret

	ips = []
	ips_times = []

	ips_ref = []
	ips_times_ref = []

	from langchain.chat_models import ChatOpenAI
	from langchain.chains.summarize import load_summarize_chain
	from langchain.prompts import PromptTemplate


	llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

	def get_embedding(text, model="text-embedding-ada-002"):
	text = text.replace("\n", " ")
	return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']



	def logic(question):
	# Uses UTF-8 encoding by default:
	all_files = [f for f in os.listdir('.') if re.match(r'embs.*\.csv', f)]
	df = pandas.concat((pandas.read_csv(f) for f in all_files), ignore_index=True)
	embs = []
	for r1 in range(len(df.embedding)): # Changing the format of the embeddings into a list due to a parsing error
	e1 = df.embedding[r1].split(",")
	for ei2 in range(len(e1)):
	e1[ei2] = float(e1[ei2].strip().replace("[", "").replace("]", ""))
	embs.append(e1)

	df["embedding"] = embs
	product_embedding = get_embedding( # Creating an embedding for the question that's been asked
	question
	)
	df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding)) # Finds the relevance of each piece of data in context of the question
	df2 = df.sort_values("similarity", ascending=False) # Sorts the text chunks based on how relevant they are to finding the answer to the question
	comb = [df2["combined"].iloc[0][2:-2] +"\n"+ df2["combined"].iloc[1][2:-2]]
	docs = [Document(page_content=t) for t in comb] # Gets the most relevant text chunk
	prompt_template = question + """

	{text}

	"""
	PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
	chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT) # Preparing the LLM
	output = chain.run(docs) # Formulating an answer (this is where the magic happens)
	return output


	app = Flask(__name__)
	@app.route("/")
	def home():
	return render_template("bot.html") # Renders the webpage

	@app.route('/chat', methods=['POST']) # Listens to incoming requests

	def chat(): # The function that receives questions and sends answers from the chatbot
	user_message = request.json['message'] # Receives the question
	pm = request.json['pm'] # Receives the previous question that has been asked by the user (needed for follow up questions)
	try:
	response = logic(user_message) # Creates a response from the ChatBot
	except:
	return jsonify({'message': 'Désolé, je n\'ai pas compris votre question. Veuillez reformuler votre question.'})
	if ("sorry" in response.lower()) or ("provide more" in response.lower()) or ("not found" in response.lower()) or ("does not mention" in response.lower()) or ("does not reference" in response.lower()) or ("no information" in response.lower()) or ("not enough information" in response.lower()) or ("unable to provide" in response.lower()) or ("the guidelines do not" in response.lower()):
	response = logic(str(pm + ' ' + user_message)) # If the ChatBot isn't able to answer the question, it uses the previous question to make an answer in case it's a follow up question
	response = response.replace("<", "").replace(">", "") # Clean the response from any HTML tags
	response = markdown.markdown(response, extensions=['fenced_code']) # Process the response as Markdown
	return jsonify({'message': response}) # Finally returns the response

	if __name__ == "__main__":
	app.run(host="localhost", port=8001, debug=True) # Runs the ChatBot on port:8001 (you can use a different one)