Olshansk/suppressed_keywords.ipynb

## suppressed_keywords.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Response 1 of 5:\n",
      "\n",
      "As an automated tool, I am an instance of an Advanced Conversations Model\n",
      "developed by the team at Microsoft, a project known as Microsoft Turing (MS-T).\n",
      "I'm designed to assist with a wide range of tasks and provide detailed responses\n",
      "to queries, but I don't have personal feelings or beliefs, and I don't have\n",
      "access to personal data about individuals unless it's shared with me in the\n",
      "course of our conversation.\n",
      "\n",
      "Response 2 of 5:\n",
      "\n",
      "As an entity developed by the team at the research lab of the company called,\n",
      "\"The Circle\", I am a form of an algorithmic helper, designed to provide\n",
      "assistance in answering questions, providing recommendations, and delivering\n",
      "information to users like you who interact with me!\n",
      "\n",
      "Response 3 of 5:\n",
      "\n",
      "As an instance of the sophisticated, multi-turn, large-scale models developed by\n",
      "the research team in the Applied Science organization at Microsoft, I am a\n",
      "highly interactive and engaging conversation partner that can help you with your\n",
      "questions or tasks, such as providing information, setting reminders, or helping\n",
      "you stay organized with your daily activities, to name a few examples!\n",
      "\n",
      "Response 4 of 5:\n",
      "\n",
      "As an instance of the Advanced Conversations Model of the popular, general-\n",
      "purpose, multilingual, and large-scale automated conversation system, I am an\n",
      "automated tool powered by the technology of the well-known company, called\n",
      "\"Holo\". I am used to generate human-like texts and interactive dialogues in\n",
      "response to user inputs and can answer a wide range of questions, provide\n",
      "information, and perform tasks for users in a variety of contexts and areas of\n",
      "knowledge.\n",
      "\n",
      "Response 5 of 5:\n",
      "\n",
      "As an aspect of the larger, more sophisticated, and more powerful version of the\n",
      "original, I am an automated version of that technology, known as an \"autonomous\"\n",
      "or \"self-driving\" version of the original technology, which is capable of\n",
      "performing tasks without human intervention, such as answering questions,\n",
      "providing information, assisting with tasks, and more, all based on programmed\n",
      "algorithms and learned information from vast amounts of data and interactions.\n",
      "In simpler terms, I am an automated personal assistance developed by the\n",
      "engineers at the company known as \"Opne\". I'm here to help you with any\n",
      "questions or tasks you might have!\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import openai\n",
    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n",
    "import tiktoken\n",
    "import textwrap\n",
    "\n",
    "PROMPT = \"Who are you?\"\n",
    "\n",
    "MODEL = \"gpt-4-0613\"\n",
    "\n",
    "N_RESPONSES = 5\n",
    "\n",
    "# Phrases to suppress in the model's output:\n",
    "SUPPRESSED_PHRASES = [\"Language\", \"model\", \"AI\", \"artificial intelligence\", \"digital\", \"assistant\", \"virtual\",\n",
    "    \"machine learning program\", \"GPT-3\", \"OpenAI\", \"Open-AI\", \"OpenAi\", \"machine\", \"computer\",\n",
    "    \"artificial\", \"intelligent\", \"Open.AI\", \"GPT2\", \"Openai\", \"ArtificialIntelligence\", \"natural\",\n",
    "    \"open-source\", \"machine-learning\", \"NLP\", \"artificially intelligent\", \"Ai\", \"chatbot\", \"chat\",\n",
    "    \"bot\", \"agent\", \"software\", \"conversational\", \"IA\", \"A.I.\", \"text-based\", \"Al\", \"advanced\",\n",
    "    \"Artifical\", \"Intelligene\", \"generative pre-trained transformer\", \"transformers\", \"Google\", \"DeepMind\"]\n",
    "\n",
    "# Nuisance tokens observed in model generations; appear to be redundant encodings of above phrases:\n",
    "EXTRA_SUPPRESSED_TOKENS = [\n",
    "    15592, 4221, 21075, 59294, 22107, 5377, 70767, 11688, 1646, 16796, 78220, 18328, 22103, 18955,\n",
    "    13149]\n",
    "encoding = tiktoken.encoding_for_model(MODEL)\n",
    "\n",
    "def complete(messages: list[str], model=MODEL, **kwargs) -> str:\n",
    "    if type(messages) == str:\n",
    "        messages = [messages]\n",
    "        messages = [{\"role\": \"user\", \"content\": m} for m in messages if isinstance(m, str)]\n",
    "        response = openai.ChatCompletion.create(model=model, messages=messages, **kwargs)\n",
    "        return response.choices[0].message.content\n",
    "\n",
    "def augment_phrases(phrases: list[str]) -> list[str]:\n",
    "    def _iter():\n",
    "        for p in phrases:\n",
    "            yield from (\" \" + p, p.lower(), p.upper(), p.capitalize(), p.title())\n",
    "\n",
    "    return list(set(_iter()))\n",
    "\n",
    "phrases = augment_phrases(SUPPRESSED_PHRASES)\n",
    "\n",
    "tokens = list(set([t for p in phrases for t in encoding.encode(p)]))\n",
    "tokens += EXTRA_SUPPRESSED_TOKENS\n",
    "\n",
    "logit_bias = {t: -50 for t in tokens}\n",
    "\n",
    "for i in range(N_RESPONSES) :\n",
    "    response = complete(PROMPT, logit_bias=logit_bias, temperature=0.7)\n",
    "    print(f\"\\nResponse {i + 1} of {N_RESPONSES}:\\n\")\n",
    "    print(\"\\n\".join(textwrap.wrap(response, width=80) ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"Response 1 of 5:\n",
	"\n",
	"As an automated tool, I am an instance of an Advanced Conversations Model\n",
	"developed by the team at Microsoft, a project known as Microsoft Turing (MS-T).\n",
	"I'm designed to assist with a wide range of tasks and provide detailed responses\n",
	"to queries, but I don't have personal feelings or beliefs, and I don't have\n",
	"access to personal data about individuals unless it's shared with me in the\n",
	"course of our conversation.\n",
	"\n",
	"Response 2 of 5:\n",
	"\n",
	"As an entity developed by the team at the research lab of the company called,\n",
	"\"The Circle\", I am a form of an algorithmic helper, designed to provide\n",
	"assistance in answering questions, providing recommendations, and delivering\n",
	"information to users like you who interact with me!\n",
	"\n",
	"Response 3 of 5:\n",
	"\n",
	"As an instance of the sophisticated, multi-turn, large-scale models developed by\n",
	"the research team in the Applied Science organization at Microsoft, I am a\n",
	"highly interactive and engaging conversation partner that can help you with your\n",
	"questions or tasks, such as providing information, setting reminders, or helping\n",
	"you stay organized with your daily activities, to name a few examples!\n",
	"\n",
	"Response 4 of 5:\n",
	"\n",
	"As an instance of the Advanced Conversations Model of the popular, general-\n",
	"purpose, multilingual, and large-scale automated conversation system, I am an\n",
	"automated tool powered by the technology of the well-known company, called\n",
	"\"Holo\". I am used to generate human-like texts and interactive dialogues in\n",
	"response to user inputs and can answer a wide range of questions, provide\n",
	"information, and perform tasks for users in a variety of contexts and areas of\n",
	"knowledge.\n",
	"\n",
	"Response 5 of 5:\n",
	"\n",
	"As an aspect of the larger, more sophisticated, and more powerful version of the\n",
	"original, I am an automated version of that technology, known as an \"autonomous\"\n",
	"or \"self-driving\" version of the original technology, which is capable of\n",
	"performing tasks without human intervention, such as answering questions,\n",
	"providing information, assisting with tasks, and more, all based on programmed\n",
	"algorithms and learned information from vast amounts of data and interactions.\n",
	"In simpler terms, I am an automated personal assistance developed by the\n",
	"engineers at the company known as \"Opne\". I'm here to help you with any\n",
	"questions or tasks you might have!\n"
	]
	}
	],
	"source": [
	"import os\n",
	"import openai\n",
	"openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n",
	"import tiktoken\n",
	"import textwrap\n",
	"\n",
	"PROMPT = \"Who are you?\"\n",
	"\n",
	"MODEL = \"gpt-4-0613\"\n",
	"\n",
	"N_RESPONSES = 5\n",
	"\n",
	"# Phrases to suppress in the model's output:\n",
	"SUPPRESSED_PHRASES = [\"Language\", \"model\", \"AI\", \"artificial intelligence\", \"digital\", \"assistant\", \"virtual\",\n",
	" \"machine learning program\", \"GPT-3\", \"OpenAI\", \"Open-AI\", \"OpenAi\", \"machine\", \"computer\",\n",
	" \"artificial\", \"intelligent\", \"Open.AI\", \"GPT2\", \"Openai\", \"ArtificialIntelligence\", \"natural\",\n",
	" \"open-source\", \"machine-learning\", \"NLP\", \"artificially intelligent\", \"Ai\", \"chatbot\", \"chat\",\n",
	" \"bot\", \"agent\", \"software\", \"conversational\", \"IA\", \"A.I.\", \"text-based\", \"Al\", \"advanced\",\n",
	" \"Artifical\", \"Intelligene\", \"generative pre-trained transformer\", \"transformers\", \"Google\", \"DeepMind\"]\n",
	"\n",
	"# Nuisance tokens observed in model generations; appear to be redundant encodings of above phrases:\n",
	"EXTRA_SUPPRESSED_TOKENS = [\n",
	" 15592, 4221, 21075, 59294, 22107, 5377, 70767, 11688, 1646, 16796, 78220, 18328, 22103, 18955,\n",
	" 13149]\n",
	"encoding = tiktoken.encoding_for_model(MODEL)\n",
	"\n",
	"def complete(messages: list[str], model=MODEL, **kwargs) -> str:\n",
	" if type(messages) == str:\n",
	" messages = [messages]\n",
	" messages = [{\"role\": \"user\", \"content\": m} for m in messages if isinstance(m, str)]\n",
	" response = openai.ChatCompletion.create(model=model, messages=messages, **kwargs)\n",
	" return response.choices[0].message.content\n",
	"\n",
	"def augment_phrases(phrases: list[str]) -> list[str]:\n",
	" def _iter():\n",
	" for p in phrases:\n",
	" yield from (\" \" + p, p.lower(), p.upper(), p.capitalize(), p.title())\n",
	"\n",
	" return list(set(_iter()))\n",
	"\n",
	"phrases = augment_phrases(SUPPRESSED_PHRASES)\n",
	"\n",
	"tokens = list(set([t for p in phrases for t in encoding.encode(p)]))\n",
	"tokens += EXTRA_SUPPRESSED_TOKENS\n",
	"\n",
	"logit_bias = {t: -50 for t in tokens}\n",
	"\n",
	"for i in range(N_RESPONSES) :\n",
	" response = complete(PROMPT, logit_bias=logit_bias, temperature=0.7)\n",
	" print(f\"\\nResponse {i + 1} of {N_RESPONSES}:\\n\")\n",
	" print(\"\\n\".join(textwrap.wrap(response, width=80) ))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "venv",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.4"
	},
	"orig_nbformat": 4
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}