Skip to content

Instantly share code, notes, and snippets.

@mizchi
Created May 30, 2024 04:07
Show Gist options
  • Save mizchi/95fe46c39a2a6248eca3d3035567ef27 to your computer and use it in GitHub Desktop.
Save mizchi/95fe46c39a2a6248eca3d3035567ef27 to your computer and use it in GitHub Desktop.
import type { } from "npm:@cloudflare/workers-types@4.20240524.0";
const CF_API_TOKEN = Deno.env.get('CLOUDFLARE_AI_API_TOKEN')!;
const CF_ACCOUNT_ID = Deno.env.get('CLOUDFLARE_ACCOUNT_ID')!;
const INDEX_NAME = 'embeddings-index';
interface EmbeddingResponse {
result: {
shape: number[];
data: number[][];
},
success: boolean;
errors: any[];
messages: any[];
}
async function runCfAi(model: string, args: any) {
const endpoint = `https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/ai/run/${model}`;
return fetch(
endpoint,
{
headers: {
'Authorization': `Bearer ${CF_API_TOKEN}`,
'Content-Type': 'application/json',
},
method: "POST",
body: JSON.stringify(args),
}
).then((res) => res.json());
}
async function getEmbeddingVectors(args: { text: string[] }): Promise<EmbeddingResponse> {
return await runCfAi('@cf/baai/bge-base-en-v1.5', args) as any;
}
async function runCfVectorize(method: string, args: any, { ndjson = true }: {
ndjson?: boolean,
} = {}) {
const endpoint = `https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/vectorize/indexes/${INDEX_NAME}/${method}`;
return fetch(
endpoint,
{
headers: {
'Authorization': `Bearer ${CF_API_TOKEN}`,
'Content-Type': ndjson ? 'application/x-ndjson' : 'application/json',
},
method: "POST",
body: ndjson ? args.map((arg: any) => JSON.stringify(arg)).join('\n') : JSON.stringify(args),
}
).then((res) => res.json());
}
async function upsertVectors(vectors: VectorizeVector[]) {
return runCfVectorize('upsert', vectors, { ndjson: true });
}
async function insertVectors(vectors: VectorizeVector[]) {
return runCfVectorize('insert', vectors, { ndjson: true });
}
async function queryVectors(
vectors: number[],
options: VectorizeQueryOptions
): Promise<VectorizeMatches> {
return runCfVectorize('query', {
...options,
vector: vectors,
}, { ndjson: false }) as any;
}
const texts = [
'This is a story about an orange cloud',
'This is a story about a llama',
'This is a story about a hugging emoji'
];
const res = await getEmbeddingVectors({
text: texts,
});
const vectors: VectorizeVector[] = [];
res.result.data.forEach((vector, id) => {
vectors.push({
id: id.toString(),
values: vector,
metadata: { title: texts[id] }
});
});
const userQuery = 'orange cloud';
const queryVector: EmbeddingResponse = await getEmbeddingVectors({
text: [userQuery],
});
const query = queryVector.result.data[0];
const matches = await queryVectors(query, { topK: 3, returnValues: false, returnMetadata: false });
console.log(matches);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment