Last active
June 7, 2024 06:37
-
-
Save Bart200c/3c8636fc880d13f1f14f1edd9fe34004 to your computer and use it in GitHub Desktop.
Glacier VectorDB Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { GlacierClient } = require('@glacier-network/client') | |
const fs = require('fs') | |
const axios = require('axios'); | |
const { argv } = require('process'); | |
const privateKey = `<your-demo-wallet-privateKey>`; | |
const endpoint = 'https://greenfield.onebitdev.com/glacier-gateway/' | |
const client = new GlacierClient(endpoint, { | |
privateKey, | |
}); | |
const namespace = '<your-ns>' | |
const dataset = '<your-ds>' | |
const collection = 'programming' | |
const hf_token = '<your-huggingface-token>' | |
const openai_key = 'your-openai-key' | |
function sleep(ms) { | |
return new Promise(resolve => setTimeout(resolve, ms)); | |
} | |
let init = async (namespace, dataset, collection, schema) => { | |
let resp = await client.createNamespace(namespace) | |
console.log(`namespace: ${namespace}`, resp.insertedId) | |
resp = await client.namespace(namespace).createDataset(dataset) | |
console.log(`dataset: ${dataset}`, resp.insertedId) | |
resp = await client.namespace(namespace).dataset(dataset).createCollection(collection, schema) | |
console.log(`collection: ${collection}`, resp.insertedId) | |
} | |
let programming = async (namespace, dataset, collection) => { | |
let url = 'https://raw.githubusercontent.com/science-periodicals/list-of-programming-languages/master/data/data.json' | |
let resp = await axios.get(url) | |
let data = resp.data | |
let description = data['description'] | |
let items = data['itemListElement'] | |
console.log(description, items[0]) | |
let coll = client.namespace(namespace).dataset(dataset).collection(collection) | |
let n = 10; | |
let docs = [] | |
for (const item of items) { | |
let doc = { | |
'link': item['item']['@id'], | |
'type': item['item']['@type'], | |
'name': item['item']['name'], | |
'nameEmbedding': await getEmbeddingFromHF(item['item']['name']), | |
} | |
docs.push(doc) | |
console.log(doc) | |
n -= 1 | |
if (n < 0) { | |
break | |
} | |
let resp = await coll.insertOne(doc) | |
console.log(resp.insertedId) | |
await sleep(500) | |
} | |
fs.writeFileSync('./data_embedding.json', JSON.stringify(docs)) | |
} | |
let search = async (namespace, dataset, collection, text) => { | |
const embedding = await getEmbeddingFromHF(text) | |
let coll = client.namespace(namespace).dataset(dataset).collection(collection) | |
let result = await coll.find({ | |
'numCandidates': 10, | |
'vectorPath': 'nameEmbedding', | |
'queryVector': embedding, | |
}).toArray() | |
console.log(result) | |
return result | |
} | |
let chatDocs = async (namespace, dataset, collection, input) => { | |
const docs = await search(namespace, dataset, collection, input) | |
if (docs.length === 0) { | |
console.log(`I'm not sure about your question!`) | |
return | |
} | |
const contexts = docs.map(item => `language: ${item.name}, wikipedia link: ${item.link}` ) | |
const messages = buildPrompt(input, contexts) | |
const result = await getChatCompletions(messages) | |
console.log(`your question: ${input}`) | |
console.log(`chat response: ${result}`) | |
} | |
const schema = { | |
title: "programming-lang", | |
type: "object", | |
properties: { | |
name: { | |
type: "string", | |
}, | |
nameEmbedding: { | |
type: "string", | |
vectorIndexOption: { | |
"type": "knnVector", | |
"dimensions": 384, | |
"similarity": "euclidean", | |
}, | |
}, | |
link: { | |
type: "string", | |
}, | |
type: { | |
type: "string", | |
} | |
} | |
} | |
async function getEmbeddingFromHF(input) { | |
const embedding_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2" | |
let response = await axios.post(embedding_url, { | |
inputs: input, | |
}, { | |
headers: { | |
'Authorization': `Bearer ${hf_token}`, | |
'Content-Type': 'application/json' | |
} | |
}); | |
if (response.status === 200) { | |
return response.data; | |
} else { | |
throw new Error(`Failed to get embedding. Status code: ${response.status}`); | |
} | |
} | |
async function getEmbedding(query) { | |
// Define the OpenAI API url and key. | |
const url = 'https://api.openai.com/v1/embeddings'; | |
// Call OpenAI API to get the embeddings. | |
let response = await axios.default.post(url, { | |
input: query, | |
model: "text-embedding-ada-002" | |
}, { | |
headers: { | |
'Authorization': `Bearer ${openai_key}`, | |
'Content-Type': 'application/json' | |
} | |
}); | |
if (response.status === 200) { | |
return response.data.data[0].embedding; | |
} else { | |
throw new Error(`Failed to get embedding. Status code: ${response.status}`); | |
} | |
} | |
async function findSimilarDocuments(embedding, vectorPath) { | |
let coll = client.namespace(namespace).dataset(dataset).collection(collection) | |
let cur = coll.find({ | |
numCandidates: 100, | |
QueryVector: embedding, | |
vectorPath: vectorPath, | |
}) | |
return await cur.toArray() | |
} | |
async function getChatCompletions(messages) { | |
// Define the OpenAI API url and key. | |
const url = 'https://api.openai.com/v1/chat/completions'; | |
// Call OpenAI API to get the embeddings. | |
let response = await axios.post(url, { | |
model: "gpt-3.5-turbo", | |
messages: messages, | |
}, { | |
headers: { | |
'Authorization': `Bearer ${openai_key}`, | |
'Content-Type': 'application/json' | |
} | |
}); | |
if (response.status === 200) { | |
return response.data.choices[0].message.content; | |
} else { | |
throw new Error(`Failed to get chat completion. Status code: ${response.status}`); | |
} | |
} | |
function buildPrompt(query, context) { | |
const system = { | |
"role": "system", | |
"content": "I am going to ask you a question, which I would like you to answer" + | |
"based only on the provided context, and not any other information." + | |
"If there is not enough information in the context to answer the question," + | |
"say \"I am not sure\", then try to make a guess." + | |
"Break your answer up into nicely readable paragraphs." | |
}; | |
const user = { | |
"role": "user", | |
"content": "The question is " + query + ". Here is all the context you have:" + | |
context.join(" ") | |
}; | |
return [system, user] | |
} | |
async function main() { | |
console.log(argv[0]) | |
if (argv.length == 0) { | |
console.log(`run with "init" -> "vector" -> "search [text]"`) | |
return | |
} | |
if (argv[2] == 'init') { | |
console.log('init...') | |
await init(namespace, dataset, collection, schema) | |
console.log('init done') | |
} | |
if (argv[2] == 'vector') { | |
console.log('vector...') | |
await programming(namespace, dataset, collection) | |
console.log('vector done') | |
} | |
if (argv[2] == 'search') { | |
const text = argv[3] | |
console.log('searching...', text) | |
if (text === '') { | |
console.log('search [text]') | |
return | |
} | |
await search(namespace, dataset, collection, text) | |
} | |
if (argv[2] == 'chat') { | |
const query = argv[3] | |
if (query === '') { | |
console.log('chat [query]') | |
return | |
} | |
console.log('thinking...', query) | |
await chatDocs(namespace, dataset, collection, query) | |
} | |
} | |
main().then(console.log) | |
// getEmbeddingFromHF("hello glacier!").then(console.log) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment