Skip to content

Instantly share code, notes, and snippets.

@simo97
Created March 26, 2024 02:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simo97/72c078503576b45c03d6834ba80eb986 to your computer and use it in GitHub Desktop.
Save simo97/72c078503576b45c03d6834ba80eb986 to your computer and use it in GitHub Desktop.
/**
* GenerateStreamedResponse(question, history)
* GenerateResponse(question, history)
* Should be a class
*/
import type OpenAI from 'openai';
// import ollama from './ollama';
import openai from './openai';
import type { ChatResponse, Message } from 'ollama';
import { Ollama } from 'ollama';
import type { ChatCompletion, ChatCompletionChunk, ChatCompletionMessage } from 'openai/resources';
import { env } from '$env/dynamic/private';
const ollama = new Ollama({ host: 'localhost:11434' });
export interface UserGPTEntry {
user_message: any;
gpt_response: any;
}
const MODEL = env.MODEL;
export default class MarthaLLM {
protected systemMessage: string = '';
private llmType: 'openai' | 'ollama' = 'ollama';
private llm: OpenAI | Ollama;
constructor(llmType: 'openai' | 'ollama') {
this.llmType = llmType;
if (this.llmType === 'ollama') {
this.llm = ollama;
} else if (this.llmType === 'openai') {
this.llm = openai;
} else {
this.llm = openai;
}
}
public setSystemMessage(message: string): void {
this.systemMessage = message;
}
public getSystemMessage(): string {
return this.systemMessage;
}
public getStreamReader(): (chunk: ChatCompletionChunk | ChatResponse) => { content: any } {
var insider: (chunk: ChatCompletionChunk | ChatResponse) => { content: any };
if (this.llmType === 'ollama') {
insider = function (chunk: ChatCompletionChunk | ChatResponse) {
let _chunk: ChatResponse = chunk as ChatResponse;
return {
content: _chunk.message.content
};
};
} else {
insider = function (chunk: ChatCompletionChunk | ChatResponse) {
let _chunk: ChatCompletionChunk = chunk as ChatCompletionChunk;
return {
content: _chunk.choices[0].delta.content!
};
};
}
return insider;
}
public getReader(): (chunk: ChatCompletion | ChatResponse) => { content: any } {
var insider: (chunk: ChatCompletion | ChatResponse) => { content: any };
if (this.llmType === 'ollama') {
insider = function (chunk: ChatCompletion | ChatResponse) {
let _chunk: ChatResponse = chunk as ChatResponse;
return {
content: _chunk.message.content
};
};
} else {
insider = function (chunk: ChatCompletion | ChatResponse) {
let _chunk: ChatCompletion = chunk as ChatCompletion;
return {
content: _chunk.choices[0].message.content!
};
};
}
return insider;
}
public async generateStreamedResponse(question: string, history: []) {
const messages: ChatCompletionMessage[] = [
{
role: 'system',
content: this.systemMessage
}
];
if (history) {
history.forEach((entry: UserGPTEntry) => {
messages.push({ role: 'user', content: entry.user_message });
messages.push({ role: 'assistant', content: entry.gpt_response });
});
}
messages.push({
role: 'user',
content: `${question}. Do not respond with the context or text delimiter. Always responds in the same language as the document.`
});
if (this.llmType === 'openai') {
let ai = this.llm as OpenAI;
return ai.chat.completions.create({
messages: messages,
model: MODEL,
n: 1,
temperature: 0.2,
stream: true
});
} else {
let ai = this.llm as Ollama;
const ollamaMessages: Message[] = messages.map((item) => {
return {
role: item.role,
content: item.content
} as Message;
});
return ai.chat({
model: 'mistral:instruct',
messages: ollamaMessages,
stream: true,
options: {
temperature: 0.2
}
});
}
}
public async generateResponse(question: string, history: []) {
const messages: ChatCompletionMessage[] = [
{
role: 'system',
content: this.systemMessage
}
];
if (history) {
history.forEach((entry: UserGPTEntry) => {
messages.push({ role: 'user', content: entry.user_message });
messages.push({ role: 'assistant', content: entry.gpt_response });
});
}
messages.push({ role: 'user', content: question });
if (this.llmType === 'openai') {
let ai = this.llm as OpenAI;
return ai.chat.completions.create({
messages: messages,
model: MODEL,
n: 1,
temperature: 0.2
});
} else {
let ai = this.llm as Ollama;
const ollamaMessages: Message[] = messages.map((item) => {
return {
role: item.role,
content: item.content
} as Message;
});
return ai.chat({
model: 'mistral:instruct',
messages: ollamaMessages,
options: {
temperature: 0.2
}
});
}
}
}
const llmType: 'ollama' | 'openai' = 'ollama';
const llm = new MarthaLLM(llmType);
llm.setSystemMessage("system message")
// generate a streamed response like this:
streams = await llm.generateStreamedResponse(
`Write the content of the section with the following content:
[STARTTEXT]
${sectionContext}
[ENDTEXT].
Make sure to follow its structure. If you need additional context or information from the pre-prompt or previous interactions, please refer to those sources.
I'm looking for high-quality and detailed content for this section. Provide at least 3 paragraphs for the section.
Thank you!
Do not use Markdown formatting.`,
[]
);
// the stream reader is a function that is able to parse the result of the llm (stream) regarless of what model the LLM is configure to read from (openai or ollama)
var streamReader = llm.getStreamReader()
for await (const part of streams) {
r = read(part);
console.log({ r}); // { 'content': string }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment