Skip to content

Instantly share code, notes, and snippets.

@wesbos
Created May 15, 2024 16:04
Show Gist options
  • Save wesbos/b1ed2a7d00f2b64e8cd7ef2304515651 to your computer and use it in GitHub Desktop.
Save wesbos/b1ed2a7d00f2b64e8cd7ef2304515651 to your computer and use it in GitHub Desktop.
import Anthropic from '@anthropic-ai/sdk';
import { Glob } from 'bun';
export const anthropic = new Anthropic({
apiKey: process.env.ANTHROPIC_KEY
});
const glob = new Glob("./txt/*.txt");
type Page = {
pageNumber: number;
fileContent: string;
file: string;
}
let files = [];
// Scans the current working directory and each of its sub-directories recursively
for await (const file of glob.scan(".")) {
const fileContent = await Bun.file(file).text();
const pageNumber = parseInt(file.split("/").pop()?.split('_')[0]);
files.push({
pageNumber,
fileContent,
file
})
}
files.sort((a, b) => a.pageNumber - b.pageNumber);
// let transations = [];
for (const file of files) {
const existing = Bun.file(`./translations/${file.pageNumber}.md`);
if (await existing.exists()) {
console.log(`Skipping ${file.pageNumber}.md`);
// This is some code I used to do some light formatting
// const content = await existing.text();
// transations.push(content);
// const newContent = `## [Page ${file.pageNumber}]\n\n${content.replaceAll('# ', '## ')}`;
// // overwrite the file with the new content
// await Bun.write(`./translations/${file.pageNumber}.md`, newContent);
// console.log(newContent );
continue;
}
console.log(`Translating ${file.pageNumber}.md`);
const result = await completion(file.fileContent);
// Save the file to disk
await Bun.write(`./translations/${file.pageNumber}.md`, result.content.at(0)?.text)
}
async function completion(fileContent: string) {
const message = await anthropic.messages.create({
max_tokens: 4096,
messages: [{
role: 'user', content: `This text is the output of a diary transcription. The language is dutch. There are some mistakes. Can you fix the mistakes and translate the dutch to english? Add headings for each dated journal entry. Convert the dates to readable english dates. Return only english. Return only the result. Format as markdown.
${fileContent}
` },
],
// model: 'claude-3-opus-20240229',
model: 'claude-3-opus-20240229',
});
console.log(message);
return message;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment