Skip to content

Instantly share code, notes, and snippets.

@APTy
Last active June 10, 2024 01:31
Show Gist options
  • Save APTy/941b2a053b0a33599ab30f78c998a4d0 to your computer and use it in GitHub Desktop.
Save APTy/941b2a053b0a33599ab30f78c998a4d0 to your computer and use it in GitHub Desktop.
Multi-stage LLMs improve understanding
import crypto from "crypto";
import path from "path";
import fs from "fs/promises";
import OpenAI from "openai";
const USE_COMPLETIONS_CACHE = true
const openai = new OpenAI();
const personaClause = `
You are a product manager for Spyglass, a security platform for data teams.
Your job is to help me better understand my user and their problems.
Spyglass is a security platform that helps your data team continuously improve the security of your data by providing access exploration, easy-to-build access rules, change management, and automated compliance.
This discussion should be succinct and very related to our product as a security tool for data teams.
`
const transcriptInputClause = `
Analyze this transcript of a conversation between our team and our customer. This is a VTT file, you can ignore timestamps.
In transcripts, you should ignore compliments, opinions, or things that feel subjective.
Nick Coffee and Tyler Julian are founders of our company, not users. Do not include problems mentioned by Nick Coffee or Tyler Julian.
`
const summaryOutputClause = `
Return a list of topics discussed in the conversation as plain text list without any formatting, with each topic on a new line, and some detail about each topic, separated by a semicolon.
`
const analyzeOutputClause = `
This is a list of topics and some detail about each topic, separated by a semicolon. Please return the top 5-10 most common and important topics:
`
const createAnalysisClause = (topics) => `
You should make clear notes about the problems a user actually experiences, quoting anecdotes word for word where possible,
and noting any ways they are currently solving the problem.
Please returned detailed notes on the problems faced by the users in the conversation, as they relate to our core product themes:
${topics}
Include a section at the end that lists any and all technologies mentioned in the transcript. Specific tool names only, not generic categories.
Include information about number of users, company size, and any other relevant context where possible.
`
async function main() {
if (process.argv.length < 4) {
console.error('Usage: node analyze-transcript.js <input-dir> <output-dir>');
process.exit(1);
}
const inputDirName = process.argv[2];
const outputDirName = process.argv[3]
console.log('Stage 1 - Summarize')
let topics = []
const files = await fs.readdir(inputDirName);
for (const file of files) {
const filename = path.join(inputDirName, file)
if (!filename.endsWith('.vtt')) {
console.log('Skipping non-VTT file:', filename)
continue
}
console.log('Summarizing transcript:', filename)
const transcript = await fs.readFile(filename, 'utf8');
const callTopics = await generateResponse(
[personaClause, transcriptInputClause, summaryOutputClause],
transcript,
)
topics = topics.concat(callTopics);
}
await fs.writeFile(path.join(outputDirName, 'all-topics.txt'), topics.join('\n'))
console.log('Stage 2 - Synthesize')
const coreProblems = await generateResponse(
[personaClause, analyzeOutputClause],
topics.join('\n'),
)
await fs.writeFile(path.join(outputDirName, 'core-problems.md'), coreProblems)
console.log('Stage 3 - Analyze')
for (const file of files) {
const filename = path.join(inputDirName, file)
if (!filename.endsWith('.vtt')) {
console.log('Skipping non-VTT file:', filename)
continue
}
console.log('Analyzing transcript:', filename)
const transcript = await fs.readFile(filename, 'utf8');
const result = await generateResponse(
[personaClause, createAnalysisClause(coreProblems)],
transcript,
)
const outputFileName = path.join(outputDirName, file.replace('.vtt', '.md'))
await fs.writeFile(outputFileName, result)
}
console.log('Success!')
}
async function _generateResponse(prompt, inputData) {
const completion = await openai.chat.completions.create({
messages: [
{ role: "system", content: prompt },
{ role: "user", content: inputData },
],
model: "gpt-4o",
});
return completion.choices[0].message.content;
}
async function generateResponse(clauses, inputData) {
const prompt = clauses.join('\n')
if (!USE_COMPLETIONS_CACHE) {
return await _generateResponse(prompt, inputData);
}
var cacheDir = path.resolve('./.completions-cache');
// Create cache dir if not exists
try {
await fs.stat(cacheDir)
} catch (e) {
fs.mkdir(cacheDir, 0o744);
}
// Get deterministic filename based on prompt/input
const hash = crypto.createHash('sha256');
hash.update(prompt + inputData);
const cacheFile = path.join(cacheDir, hash.digest('hex') + '.txt')
// Get the cached file or generate a new response and cache it
try {
const data = await fs.readFile(cacheFile, 'utf8');
return data;
} catch (err) {
const data = await _generateResponse(prompt, inputData);
await fs.writeFile(cacheFile, data);
return data;
}
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment