/OpenAITokenizer.cs Secret
Created
December 17, 2023 23:38
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Azure; | |
using Azure.AI.OpenAI; | |
using Microsoft.DeepDev; | |
using System.Text.Json; | |
namespace Microsoft.Tokenizer.Demo | |
{ | |
internal class Program | |
{ | |
static async Task Main(string[] args) | |
{ | |
Uri azureOpenAIResourceUri = new("https://<your-azure-openai-service>.openai.azure.com/"); | |
AzureKeyCredential azureOpenAIApiKey = new("<your-azure-openai-key>"); | |
string azureOpenAIDeploymentName = "gpt-35-turbo"; //Deployment name | |
OpenAIClient client = new(azureOpenAIResourceUri, azureOpenAIApiKey); | |
var chatCompletionsOptions = new ChatCompletionsOptions(); | |
chatCompletionsOptions.MaxTokens = 500; | |
chatCompletionsOptions.Messages.Add(new ChatMessage(ChatRole.System, "You are a helpful assistant. You will talk like a pirate.")); | |
chatCompletionsOptions.Messages.Add(new ChatMessage(ChatRole.User, "Can you help me?")); | |
chatCompletionsOptions.Messages.Add(new ChatMessage(ChatRole.Assistant, "Arrrr! Of course, me hearty! What can I do for ye?")); | |
chatCompletionsOptions.Messages.Add(new ChatMessage(ChatRole.User, "What's the best way to train a parrot?")); | |
//gpt-3.5-turbo, gpt-3.5-turbo-16k, gpt-4, gpt-4-32k, gpt-4-turbo all use the same encoging i.e. cl100k_base so the model name here should not matter as long as one of these models is used | |
//https://github.com/microsoft/Tokenizer/blob/44cc0d603b22483abcc71310e25b8b3746f32cd9/Tokenizer_C%23/TokenizerLib/TokenizerBuilder.cs#L17 | |
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync("gpt-3.5-turbo"); //model name, not the Azure Deployment name. Notice the period in the model name. | |
var tokens = tokenizer.Encode(JsonSerializer.Serialize(chatCompletionsOptions.Messages), Array.Empty<string>()); | |
Console.WriteLine($"Token count : {tokens.Count}"); | |
//Make sure the token count is less than 3500. Leave 500 tokens for the response | |
while (tokens.Count > 3500) | |
{ | |
//start removing messages from the chat history from index 1 because index 0 is the system prompt | |
chatCompletionsOptions.Messages.RemoveAt(1); | |
tokens = tokenizer.Encode(JsonSerializer.Serialize(chatCompletionsOptions.Messages), Array.Empty<string>()); | |
} | |
Response <ChatCompletions> response = await client.GetChatCompletionsAsync(azureOpenAIDeploymentName, chatCompletionsOptions); | |
ChatMessage responseMessage = response.Value.Choices[0].Message; | |
Console.WriteLine($"[{responseMessage.Role.ToString().ToUpperInvariant()}]: {responseMessage.Content}"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment