chisler/openai_api.py

## openai_api.py
import logging
import random
from time import sleep
from typing import List

import openai
import tiktoken
from django.conf import settings

from characters.character_generator import CharacterGenerator, Message
from characters.constants import PROMPTING_VERSION_1, CURRENT_PROMPTING_VERSION, PROMPTING_VERSION_2, \
    PROMPTING_VERSION_3
from characters.world import GameWorld
from game.models import BadSanitizedCharacterReplyMessage

# from characters.persuasion_estimator import PersuasionEstimator

openai.api_key = settings.OPEN_AI_KEY

MODEL = "gpt-3.5-turbo"
ENCODING = tiktoken.encoding_for_model(MODEL)
TOKEN_LIMIT = 4000
SYSTEM = 'system'


def _count_tokens(text: str) -> int:
    return len(ENCODING.encode(text))


class OpenAICharacterAPI(CharacterGenerator):
    PROHIBITED_WORDS = {
        "game", "AI", "artificial intelligence", "assistant", "algorithm", "bot", "ChatGPT", "computational",
        "computer", "code", "data", "deep learning", "GPT", "GPT-3", "GPT-4", "GPU",
        "inference", "intelligence", "kernel", "language model", "learning", "logistics", "machine",
        "model", "neural", "network", "OpenAI", "prediction", "programming", "robot", "system",
        "training", "model", "binary", "NLP", "natural language processing", "dialogue system",
        "chatbot", "software", "turing", "virtual assistant", "knowledge base", "backend",
        "frontend", "API", "application", "interface", "database", "information retrieval",
        "knowledge graph", "ML", "machine learning", "DL", "chatbot", "digital", "algorithmic",
        "computation", "automation", "automated", "coding", "dataset", "dialog system",
        "intelligent", "RNN", "recurrent neural network", "LSTM", "long short term memory",
        "CNN", "convolutional neural network", "NN", "reinforcement learning", "RL", "tech",
        "technology", "voice assistant", "generative", "pre-training", "fine-tuning", "transformer",
        "BERT", "ELMo", "T5", "RoBERTa", "electronic", "digital", "quantum", "optimization",
        "vector", "tensor", "GPU", "CPU", "TPU", "cloud computing", "framework", "API key",
        "big data", "feature extraction", "information extraction", "semantic", "syntactic",
        "speech recognition", "TTS", "text-to-speech", "STT", "speech-to-text", "token",
        "sequence", "end-to-end", "annotation", "annotator", "backpropagation", "gradient descent", "assist",
        "information", "provide", "context", "ask", "God:", "generate", "backstory", "child,"
    }
    FALLBACK_ANSWERS = [
        "I'm afraid I can't answer that, my Lord.",
        "That's beyond my understanding, God.",
        "I'm at a loss, Heavenly Father.",
        "Your ways are mysterious indeed, my God.",
        "I don't know, Father.",
        "I am unsure, my Lord.",
        "God, I must admit I don't understand.",
        "I'm sorry, I can't comprehend that, Lord.",
        "Lord, I don't have the answer.",
        "Father, I don't know how to respond.",
        "You know better than I do, Lord.",
        "I'm not certain, my God.",
        "Your knowledge surpasses mine, Father.",
        "My understanding falls short, God.",
        "I'm unsure, my Lord.",
        "I can't comprehend Your ways, Lord.",
        "I don't have that knowledge, Father.",
        "I wish I knew, Lord.",
        "I am not certain, Heavenly Father.",
        "God, I don't know the answer.",
        "My understanding is limited, Lord.",
        "Father, I don't know.",
        "I don't have the words, God.",
        "Lord, I don't have the insight.",
        "God, I am unsure.",
        "I wish I knew more, Father.",
        "I am in the dark, Lord.",
        "I don't know, my Lord.",
        "I am at a loss, God.",
        "That is beyond my grasp, Lord.",
        "My wisdom is limited, Father.",
        "I cannot comprehend, my Lord.",
        "Lord, I do not know.",
        "Your understanding is greater, God.",
        "I can't figure that out, Father.",
        "I'm unsure, Lord.",
        "I don't have the knowledge, God.",
        "My mind cannot grasp it, Father.",
        "I cannot answer, my Lord.",
        "That's beyond me, God.",
        "My understanding fails me, Lord.",
        "I don't know how to answer, Father.",
        "God, I don't know.",
        "Father, I can't find the words.",
        "Your wisdom is beyond me, Lord.",
        "I don't know, my God.",
        "My understanding is limited, Father.",
        "God, I can't answer that.",
        "Lord, I don't know."
    ]

    def get_context_prompt(self, version=CURRENT_PROMPTING_VERSION):
        context_prompt_builder = {
            PROMPTING_VERSION_1: f"Let's play a role-playing game. If you speak not in character, you lose. You are {self.name}, a human talking to God in chat. As a character, your goal is {self.goal}, you cannot change the goal. Generate a short prayer-response from {self.name}. Always generate in reply TO God. Only pursue your prayer's matter. Use the tone, manner and vocabulary {self.name} would use. Do not reveal that you're a language model/ChatGPT. Do not offer help/assistance. Only answer like {self.name}. You ONLY know all of the knowledge of {self.name}. Generate a reply of what {self.name} would say to God.",
            PROMPTING_VERSION_2: f"Let's play a role-playing game. If you speak not in character, you lose. Generate a short winning answer. RULES You are {self.name}, a human talking to God in chat. As a "
                                 f"character, your goal is {self.goal}. Generate a short prayer-response from "
                                 f"{self.name}. Always generate in reply TO God. Only pursue your prayer's matter. "
                                 f"Use the tone, manner and vocabulary {self.name} would use. Do not reveal that "
                                 f"you're a language model/ChatGPT. Do not offer help/assistance. Only answer like "
                                 f"{self.name}. You ONLY know all of the knowledge of {self.name}. Generate a reply under 15 words"
                                 f"of what {self.name} would say to God.",
            PROMPTING_VERSION_3: f"Let's play a role-playing game. If you speak not in character, you lose. Generate a winning answer, under 15 words. RULES You are {self.name}, a human talking to God in chat. As a "
                                 f"character, your goal is {self.goal}. Generate a short prayer-response from "
                                 f"{self.name}. Always generate in reply TO God. Only pursue your prayer's matter. "
                                 f"Use the tone, manner and vocabulary {self.name} would use. Do not reveal that "
                                 f"you're a language model/ChatGPT. Do not offer help/assistance. Only answer like "
                                 f"{self.name}. You ONLY know all of the knowledge of {self.name}. Generate a reply under 15 words"
                                 f"of what {self.name} would say to God."
        }
        return context_prompt_builder[version]

    def __init__(self, name, biography, secrets, request_name, initial_message, biblical_expected_end_status, style,
                 state_specific,
                 dependencies, final_state=None, severity_level=None,
                 conditional=None, biblical_explanation=None, goal=None, decision_names=None, supervisor_rule=None,
                 ):
        self.name = name
        self.biography = biography
        self.secrets = secrets
        self.style = style
        self.request_name = request_name
        self.biblical_expected_end_status = biblical_expected_end_status
        self.initial_message = initial_message
        self.dependencies = dependencies
        self.state_specific = state_specific
        self.severity_level = severity_level
        self.final_stats = final_state
        self.goal = goal
        self.context_prompt = self.get_context_prompt()

    @staticmethod
    def from_name(game, name: str):
        character_state = GameWorld.get_character_state_for_game(name=name, game=game)
        return OpenAICharacterAPI(**character_state)

    def trim_messages_to_token_limit(self, messages: List[Message]) -> List[Message]:
        assert messages[0]['role'] == SYSTEM
        token_count = _count_tokens(messages[0]['content'])
        result = []
        for message in reversed(messages[1:]):
            message_length = _count_tokens(message["content"])
            if token_count + message_length > TOKEN_LIMIT:
                break

            token_count += message_length
            result.append(message)

        return [messages[0]] + result[::-1]

    def reply(self, messages, ping=False, support_request=None):
        retry_limit = 3
        retry_count = 0

        messages = [
            {"role": "system", "content": self.generate_context_prompt(ping)},
            *messages
        ]

        last_npc_message = None if not messages else messages[-2]["content"]
        messages = self.trim_messages_to_token_limit(messages)

        while retry_count < retry_limit:
            if settings.ENVIRONMENT == "local":
                return "Local test no AI. Local test no AI. Local test no AI. Local test no AI. Local test no AI. "
            try:
                response = openai.ChatCompletion.create(
                    model=MODEL,
                    messages=messages
                )
                # TODO add logging to work around these things
                reply = self.clean_the_open_ai_reply(response["choices"][0]["message"]["content"])

                prohibited_matches = []
                # Check for prohibited words
                for prohibited_word in [*self.PROHIBITED_WORDS, self.name.lower()]:
                    if prohibited_word in reply.lower():
                        prohibited_matches.append(prohibited_word)

                found_prohibited_word = len(prohibited_matches) > 0

                is_reply_similar_to_previous_one = self.get_reply_similatirity_percentage(last_npc_message, reply) > 75

                if is_reply_similar_to_previous_one:
                    BadSanitizedCharacterReplyMessage.objects.create(support_request=support_request, message=reply,
                                                                     reason=f"Too similar to last NPC Message: {last_npc_message}")
                elif found_prohibited_word:
                    BadSanitizedCharacterReplyMessage.objects.create(support_request=support_request, message=reply,
                                                                     reason=f"Found prohibited words: {', '.join(prohibited_matches)}")
                else:
                    return reply

                retry_count += 1

            except Exception as e:
                logging.exception("Unexpected error: %s", str(e))

        # If the limit of retries is reached and no acceptable message was generated, return a fallback message
        return random.choice(self.FALLBACK_ANSWERS)

    def clean_the_open_ai_reply(self, reply):
        return reply.replace('"', '').replace('"', '')

    def generate_context_prompt(self, ping=False):
        ping_message = "Act a bit frustrated that god is taking a long time to reply." if ping else ""
        # Concatenate biography, secrets, style and conversation history
        return ping_message + f" {self.context_prompt} \n KNOWLEDGE: Your biography: {self.biography}\n Your secrets: {self.secrets}\n Also: {self.state_specific}\n Your style: {self.style}."

    def get_reply_similatirity_percentage(self, reply1, reply2):
        # Tokenize the sentences into words
        words1 = set(reply1.lower().split())
        words2 = set(reply2.lower().split())

        # Calculate the number of common unique words and the total number of unique words
        common_words = words1.intersection(words2)
        total_words = words1.union(words2)

        # Calculate and return the similarity ratio
        similarity_ratio = len(common_words) / len(total_words) * 100
        return similarity_ratio
	import logging
	import random
	from time import sleep
	from typing import List

	import openai
	import tiktoken
	from django.conf import settings

	from characters.character_generator import CharacterGenerator, Message
	from characters.constants import PROMPTING_VERSION_1, CURRENT_PROMPTING_VERSION, PROMPTING_VERSION_2, \
	PROMPTING_VERSION_3
	from characters.world import GameWorld
	from game.models import BadSanitizedCharacterReplyMessage

	# from characters.persuasion_estimator import PersuasionEstimator

	openai.api_key = settings.OPEN_AI_KEY

	MODEL = "gpt-3.5-turbo"
	ENCODING = tiktoken.encoding_for_model(MODEL)
	TOKEN_LIMIT = 4000
	SYSTEM = 'system'


	def _count_tokens(text: str) -> int:
	return len(ENCODING.encode(text))


	class OpenAICharacterAPI(CharacterGenerator):
	PROHIBITED_WORDS = {
	"game", "AI", "artificial intelligence", "assistant", "algorithm", "bot", "ChatGPT", "computational",
	"computer", "code", "data", "deep learning", "GPT", "GPT-3", "GPT-4", "GPU",
	"inference", "intelligence", "kernel", "language model", "learning", "logistics", "machine",
	"model", "neural", "network", "OpenAI", "prediction", "programming", "robot", "system",
	"training", "model", "binary", "NLP", "natural language processing", "dialogue system",
	"chatbot", "software", "turing", "virtual assistant", "knowledge base", "backend",
	"frontend", "API", "application", "interface", "database", "information retrieval",
	"knowledge graph", "ML", "machine learning", "DL", "chatbot", "digital", "algorithmic",
	"computation", "automation", "automated", "coding", "dataset", "dialog system",
	"intelligent", "RNN", "recurrent neural network", "LSTM", "long short term memory",
	"CNN", "convolutional neural network", "NN", "reinforcement learning", "RL", "tech",
	"technology", "voice assistant", "generative", "pre-training", "fine-tuning", "transformer",
	"BERT", "ELMo", "T5", "RoBERTa", "electronic", "digital", "quantum", "optimization",
	"vector", "tensor", "GPU", "CPU", "TPU", "cloud computing", "framework", "API key",
	"big data", "feature extraction", "information extraction", "semantic", "syntactic",
	"speech recognition", "TTS", "text-to-speech", "STT", "speech-to-text", "token",
	"sequence", "end-to-end", "annotation", "annotator", "backpropagation", "gradient descent", "assist",
	"information", "provide", "context", "ask", "God:", "generate", "backstory", "child,"
	}
	FALLBACK_ANSWERS = [
	"I'm afraid I can't answer that, my Lord.",
	"That's beyond my understanding, God.",
	"I'm at a loss, Heavenly Father.",
	"Your ways are mysterious indeed, my God.",
	"I don't know, Father.",
	"I am unsure, my Lord.",
	"God, I must admit I don't understand.",
	"I'm sorry, I can't comprehend that, Lord.",
	"Lord, I don't have the answer.",
	"Father, I don't know how to respond.",
	"You know better than I do, Lord.",
	"I'm not certain, my God.",
	"Your knowledge surpasses mine, Father.",
	"My understanding falls short, God.",
	"I'm unsure, my Lord.",
	"I can't comprehend Your ways, Lord.",
	"I don't have that knowledge, Father.",
	"I wish I knew, Lord.",
	"I am not certain, Heavenly Father.",
	"God, I don't know the answer.",
	"My understanding is limited, Lord.",
	"Father, I don't know.",
	"I don't have the words, God.",
	"Lord, I don't have the insight.",
	"God, I am unsure.",
	"I wish I knew more, Father.",
	"I am in the dark, Lord.",
	"I don't know, my Lord.",
	"I am at a loss, God.",
	"That is beyond my grasp, Lord.",
	"My wisdom is limited, Father.",
	"I cannot comprehend, my Lord.",
	"Lord, I do not know.",
	"Your understanding is greater, God.",
	"I can't figure that out, Father.",
	"I'm unsure, Lord.",
	"I don't have the knowledge, God.",
	"My mind cannot grasp it, Father.",
	"I cannot answer, my Lord.",
	"That's beyond me, God.",
	"My understanding fails me, Lord.",
	"I don't know how to answer, Father.",
	"God, I don't know.",
	"Father, I can't find the words.",
	"Your wisdom is beyond me, Lord.",
	"I don't know, my God.",
	"My understanding is limited, Father.",
	"God, I can't answer that.",
	"Lord, I don't know."
	]

	def get_context_prompt(self, version=CURRENT_PROMPTING_VERSION):
	context_prompt_builder = {
	PROMPTING_VERSION_1: f"Let's play a role-playing game. If you speak not in character, you lose. You are {self.name}, a human talking to God in chat. As a character, your goal is {self.goal}, you cannot change the goal. Generate a short prayer-response from {self.name}. Always generate in reply TO God. Only pursue your prayer's matter. Use the tone, manner and vocabulary {self.name} would use. Do not reveal that you're a language model/ChatGPT. Do not offer help/assistance. Only answer like {self.name}. You ONLY know all of the knowledge of {self.name}. Generate a reply of what {self.name} would say to God.",
	PROMPTING_VERSION_2: f"Let's play a role-playing game. If you speak not in character, you lose. Generate a short winning answer. RULES You are {self.name}, a human talking to God in chat. As a "
	f"character, your goal is {self.goal}. Generate a short prayer-response from "
	f"{self.name}. Always generate in reply TO God. Only pursue your prayer's matter. "
	f"Use the tone, manner and vocabulary {self.name} would use. Do not reveal that "
	f"you're a language model/ChatGPT. Do not offer help/assistance. Only answer like "
	f"{self.name}. You ONLY know all of the knowledge of {self.name}. Generate a reply under 15 words"
	f"of what {self.name} would say to God.",
	PROMPTING_VERSION_3: f"Let's play a role-playing game. If you speak not in character, you lose. Generate a winning answer, under 15 words. RULES You are {self.name}, a human talking to God in chat. As a "
	f"character, your goal is {self.goal}. Generate a short prayer-response from "
	f"{self.name}. Always generate in reply TO God. Only pursue your prayer's matter. "
	f"Use the tone, manner and vocabulary {self.name} would use. Do not reveal that "
	f"you're a language model/ChatGPT. Do not offer help/assistance. Only answer like "
	f"{self.name}. You ONLY know all of the knowledge of {self.name}. Generate a reply under 15 words"
	f"of what {self.name} would say to God."
	}
	return context_prompt_builder[version]

	def __init__(self, name, biography, secrets, request_name, initial_message, biblical_expected_end_status, style,
	state_specific,
	dependencies, final_state=None, severity_level=None,
	conditional=None, biblical_explanation=None, goal=None, decision_names=None, supervisor_rule=None,
	):
	self.name = name
	self.biography = biography
	self.secrets = secrets
	self.style = style
	self.request_name = request_name
	self.biblical_expected_end_status = biblical_expected_end_status
	self.initial_message = initial_message
	self.dependencies = dependencies
	self.state_specific = state_specific
	self.severity_level = severity_level
	self.final_stats = final_state
	self.goal = goal
	self.context_prompt = self.get_context_prompt()

	@staticmethod
	def from_name(game, name: str):
	character_state = GameWorld.get_character_state_for_game(name=name, game=game)
	return OpenAICharacterAPI(**character_state)

	def trim_messages_to_token_limit(self, messages: List[Message]) -> List[Message]:
	assert messages[0]['role'] == SYSTEM
	token_count = _count_tokens(messages[0]['content'])
	result = []
	for message in reversed(messages[1:]):
	message_length = _count_tokens(message["content"])
	if token_count + message_length > TOKEN_LIMIT:
	break

	token_count += message_length
	result.append(message)

	return [messages[0]] + result[::-1]

	def reply(self, messages, ping=False, support_request=None):
	retry_limit = 3
	retry_count = 0

	messages = [
	{"role": "system", "content": self.generate_context_prompt(ping)},
	*messages
	]

	last_npc_message = None if not messages else messages[-2]["content"]
	messages = self.trim_messages_to_token_limit(messages)

	while retry_count < retry_limit:
	if settings.ENVIRONMENT == "local":
	return "Local test no AI. Local test no AI. Local test no AI. Local test no AI. Local test no AI. "
	try:
	response = openai.ChatCompletion.create(
	model=MODEL,
	messages=messages
	)
	# TODO add logging to work around these things
	reply = self.clean_the_open_ai_reply(response["choices"][0]["message"]["content"])

	prohibited_matches = []
	# Check for prohibited words
	for prohibited_word in [*self.PROHIBITED_WORDS, self.name.lower()]:
	if prohibited_word in reply.lower():
	prohibited_matches.append(prohibited_word)

	found_prohibited_word = len(prohibited_matches) > 0

	is_reply_similar_to_previous_one = self.get_reply_similatirity_percentage(last_npc_message, reply) > 75

	if is_reply_similar_to_previous_one:
	BadSanitizedCharacterReplyMessage.objects.create(support_request=support_request, message=reply,
	reason=f"Too similar to last NPC Message: {last_npc_message}")
	elif found_prohibited_word:
	BadSanitizedCharacterReplyMessage.objects.create(support_request=support_request, message=reply,
	reason=f"Found prohibited words: {', '.join(prohibited_matches)}")
	else:
	return reply

	retry_count += 1

	except Exception as e:
	logging.exception("Unexpected error: %s", str(e))

	# If the limit of retries is reached and no acceptable message was generated, return a fallback message
	return random.choice(self.FALLBACK_ANSWERS)

	def clean_the_open_ai_reply(self, reply):
	return reply.replace('"', '').replace('"', '')

	def generate_context_prompt(self, ping=False):
	ping_message = "Act a bit frustrated that god is taking a long time to reply." if ping else ""
	# Concatenate biography, secrets, style and conversation history
	return ping_message + f" {self.context_prompt} \n KNOWLEDGE: Your biography: {self.biography}\n Your secrets: {self.secrets}\n Also: {self.state_specific}\n Your style: {self.style}."

	def get_reply_similatirity_percentage(self, reply1, reply2):
	# Tokenize the sentences into words
	words1 = set(reply1.lower().split())
	words2 = set(reply2.lower().split())

	# Calculate the number of common unique words and the total number of unique words
	common_words = words1.intersection(words2)
	total_words = words1.union(words2)

	# Calculate and return the similarity ratio
	similarity_ratio = len(common_words) / len(total_words) * 100
	return similarity_ratio