Skip to content

Instantly share code, notes, and snippets.

@toshvelaga
Created August 16, 2023 21:06
Show Gist options
  • Save toshvelaga/2bd8b5efb14c145892a14bcb663c7342 to your computer and use it in GitHub Desktop.
Save toshvelaga/2bd8b5efb14c145892a14bcb663c7342 to your computer and use it in GitHub Desktop.
Create vector embeddings using MongoDB
const { Configuration, OpenAIApi } = require('openai')
const configuration = new Configuration({
apiKey: process.env.OPENAI_API_KEY,
})
const openai = new OpenAIApi(configuration)
const createEmbedding = async (text) => {
const embeddingResponse = await openai.createEmbedding({
model: 'text-embedding-ada-002',
input: text,
})
const [{ embedding }] = embeddingResponse?.data?.data
console.log('embedding', embedding)
return embedding
}
module.exports = { createEmbedding }
const mongoose = require('mongoose')
const Schema = mongoose.Schema
// Create a new schema for uploaded documents
const DocumentUploadSchema = new Schema({
title: String,
description: String,
fileName: String,
uploadDate: {
type: Date,
default: Date.now,
},
embedding: [Number],
// Represents the vector embedding
// 1536 numbers in array (this is if you use OpenAI ada embeddings)
// You can add other fields as needed
})
// Create a model from the schema
const UploadedDocument = mongoose.model(
'UploadedDocument',
DocumentUploadSchema
)
module.exports = UploadedDocument
const express = require('express')
const UploadedDocument = require('../models/DocumentUpload.js')
const { createEmbedding } = require('../utils/createEmbedding.js')
const db = require('../MongoDB.js')
const { runWebScraper } = require('../utils/runWebScraper.js')
const { hitOpenAiApi } = require('../utils/hitOpenAiApi.js')
const router = express.Router()
const collection = db.collection('uploadeddocuments') // Replace with your collection's name
// Endpoint to add a new embedding to mongodb
// creates embedding using OpenAI
// then stores the embedding in mongodb as an array of floating point numbers
router.post('/document', async (req, res) => {
try {
const { url } = req.body
const { text } = await runWebScraper(url)
// there is a limit to text length, need to split text
const embedding = await createEmbedding(text)
const newDoc = new UploadedDocument({
description: text,
embedding: embedding,
})
const savedDoc = await newDoc.save()
res.status(201).json({
message: 'Document uploaded successfully',
document: savedDoc,
})
} catch (err) {
console.log('err: ', err)
res.status(500).json({
error: 'Internal server error',
message: err.message,
})
}
})
// turn query text into embedding
// then compares that embedding with embeddings stored in mongodb
router.post('/query-embedding', async (req, res) => {
try {
const { query } = req.body
const embedding = await createEmbedding(query)
async function findSimilarDocuments(embedding) {
try {
// Query for similar documents.
const documents = await collection
.aggregate([
{
$search: {
knnBeta: {
vector: embedding,
// path is the path to the embedding field in the mongodb collection documentupload
path: 'embedding',
// change k to the number of documents you want to be returned
k: 5,
},
},
},
{
$project: {
description: 1,
score: { $meta: 'searchScore' },
},
},
])
.toArray()
return documents
} catch (err) {
console.error(err)
}
}
const similarDocuments = await findSimilarDocuments(embedding)
console.log('similarDocuments: ', similarDocuments)
// gets the document with the highest score
const highestScoreDoc = similarDocuments.reduce((highest, current) => {
return highest.score > current.score ? highest : current
})
console.log('highestScoreDoc', highestScoreDoc)
const prompt = `Based on this context: ${highestScoreDoc.description} \n\n Query: ${query} \n\n Answer:`
const answer = await hitOpenAiApi(prompt)
console.log('answer: ', answer)
res.send(answer)
} catch (err) {
res.status(500).json({
error: 'Internal server error',
message: err.message,
})
}
})
module.exports = router
const { Configuration, OpenAIApi } = require('openai')
require('dotenv').config()
const configuration = new Configuration({
apiKey: process.env.OPENAI_API_KEY,
})
const openai = new OpenAIApi(configuration)
async function hitOpenAiApi(prompt) {
const response = await openai.createChatCompletion({
model: 'gpt-3.5-turbo-16k',
stream: false,
temperature: 0.5,
messages: [
{
role: 'system',
content: 'You are a helpful assistant.',
},
{
role: 'user',
content: prompt,
},
],
})
// console.log('response', response?.data?.choices[0]?.message?.content)
return response?.data?.choices[0]?.message?.content
}
module.exports = { hitOpenAiApi }
// use chatGPT or StackOverflow for this
// this is just a simple web scraper using pupeteer
const puppeteer = require('puppeteer')
// import screenshot from '../screenshot.js'
const PROD_CONFIG = {
headless: true,
ignoreHTTPSErrors: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
ignoreDefaultArgs: ['--disable-extensions'],
}
// this is for my computer, you will have to change this to your own path or just not use it on your computer
const DEV_CONFIG = {
executablePath:
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
headless: false,
ignoreHTTPSErrors: true,
}
const runWebScraper = async (url) => {
const browser = await puppeteer.launch(
process.env.NODE_ENV === 'production' ? PROD_CONFIG : DEV_CONFIG
)
console.time('puppeteer')
const page = await browser.newPage()
await page.goto(url, { waitUntil: 'domcontentloaded' })
// await page.goto(url, { waitUntil: 'networkidle0' })
const content = await page.$eval('*', (el) => {
const selection = window.getSelection()
const range = document.createRange()
range.selectNode(el)
selection.removeAllRanges()
selection.addRange(range)
return window.getSelection().toString()
})
// console.log(content)
// console.log('content length: ', content.length)
await page.close()
await browser.close()
console.timeEnd('puppeteer')
return { text: content, url_from_chunk: url }
}
// FOR TESTING
// const URL = 'https://www.npmjs.com/package/html-to-text'
// runWebScraper(URL)
module.exports = { runWebScraper }
@gagan1228
Copy link

Error:Configuration is not a construtor

@arshadazaad3
Copy link

arshadazaad3 commented May 10, 2024

Error:Configuration is not a construtor

"openai": "^4.44.0",

Change import as below and configure using OpenAi

const { OpenAI } = require("openai");

const openai = new OpenAI({
  apiKey: <your_key>,
});

Also change below to perform create embedding function

openai.createEmbedding -> openai.embeddings.create

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment