Created
August 16, 2023 21:06
-
-
Save toshvelaga/2bd8b5efb14c145892a14bcb663c7342 to your computer and use it in GitHub Desktop.
Create vector embeddings using MongoDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { Configuration, OpenAIApi } = require('openai') | |
const configuration = new Configuration({ | |
apiKey: process.env.OPENAI_API_KEY, | |
}) | |
const openai = new OpenAIApi(configuration) | |
const createEmbedding = async (text) => { | |
const embeddingResponse = await openai.createEmbedding({ | |
model: 'text-embedding-ada-002', | |
input: text, | |
}) | |
const [{ embedding }] = embeddingResponse?.data?.data | |
console.log('embedding', embedding) | |
return embedding | |
} | |
module.exports = { createEmbedding } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const mongoose = require('mongoose') | |
const Schema = mongoose.Schema | |
// Create a new schema for uploaded documents | |
const DocumentUploadSchema = new Schema({ | |
title: String, | |
description: String, | |
fileName: String, | |
uploadDate: { | |
type: Date, | |
default: Date.now, | |
}, | |
embedding: [Number], | |
// Represents the vector embedding | |
// 1536 numbers in array (this is if you use OpenAI ada embeddings) | |
// You can add other fields as needed | |
}) | |
// Create a model from the schema | |
const UploadedDocument = mongoose.model( | |
'UploadedDocument', | |
DocumentUploadSchema | |
) | |
module.exports = UploadedDocument |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const express = require('express') | |
const UploadedDocument = require('../models/DocumentUpload.js') | |
const { createEmbedding } = require('../utils/createEmbedding.js') | |
const db = require('../MongoDB.js') | |
const { runWebScraper } = require('../utils/runWebScraper.js') | |
const { hitOpenAiApi } = require('../utils/hitOpenAiApi.js') | |
const router = express.Router() | |
const collection = db.collection('uploadeddocuments') // Replace with your collection's name | |
// Endpoint to add a new embedding to mongodb | |
// creates embedding using OpenAI | |
// then stores the embedding in mongodb as an array of floating point numbers | |
router.post('/document', async (req, res) => { | |
try { | |
const { url } = req.body | |
const { text } = await runWebScraper(url) | |
// there is a limit to text length, need to split text | |
const embedding = await createEmbedding(text) | |
const newDoc = new UploadedDocument({ | |
description: text, | |
embedding: embedding, | |
}) | |
const savedDoc = await newDoc.save() | |
res.status(201).json({ | |
message: 'Document uploaded successfully', | |
document: savedDoc, | |
}) | |
} catch (err) { | |
console.log('err: ', err) | |
res.status(500).json({ | |
error: 'Internal server error', | |
message: err.message, | |
}) | |
} | |
}) | |
// turn query text into embedding | |
// then compares that embedding with embeddings stored in mongodb | |
router.post('/query-embedding', async (req, res) => { | |
try { | |
const { query } = req.body | |
const embedding = await createEmbedding(query) | |
async function findSimilarDocuments(embedding) { | |
try { | |
// Query for similar documents. | |
const documents = await collection | |
.aggregate([ | |
{ | |
$search: { | |
knnBeta: { | |
vector: embedding, | |
// path is the path to the embedding field in the mongodb collection documentupload | |
path: 'embedding', | |
// change k to the number of documents you want to be returned | |
k: 5, | |
}, | |
}, | |
}, | |
{ | |
$project: { | |
description: 1, | |
score: { $meta: 'searchScore' }, | |
}, | |
}, | |
]) | |
.toArray() | |
return documents | |
} catch (err) { | |
console.error(err) | |
} | |
} | |
const similarDocuments = await findSimilarDocuments(embedding) | |
console.log('similarDocuments: ', similarDocuments) | |
// gets the document with the highest score | |
const highestScoreDoc = similarDocuments.reduce((highest, current) => { | |
return highest.score > current.score ? highest : current | |
}) | |
console.log('highestScoreDoc', highestScoreDoc) | |
const prompt = `Based on this context: ${highestScoreDoc.description} \n\n Query: ${query} \n\n Answer:` | |
const answer = await hitOpenAiApi(prompt) | |
console.log('answer: ', answer) | |
res.send(answer) | |
} catch (err) { | |
res.status(500).json({ | |
error: 'Internal server error', | |
message: err.message, | |
}) | |
} | |
}) | |
module.exports = router |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { Configuration, OpenAIApi } = require('openai') | |
require('dotenv').config() | |
const configuration = new Configuration({ | |
apiKey: process.env.OPENAI_API_KEY, | |
}) | |
const openai = new OpenAIApi(configuration) | |
async function hitOpenAiApi(prompt) { | |
const response = await openai.createChatCompletion({ | |
model: 'gpt-3.5-turbo-16k', | |
stream: false, | |
temperature: 0.5, | |
messages: [ | |
{ | |
role: 'system', | |
content: 'You are a helpful assistant.', | |
}, | |
{ | |
role: 'user', | |
content: prompt, | |
}, | |
], | |
}) | |
// console.log('response', response?.data?.choices[0]?.message?.content) | |
return response?.data?.choices[0]?.message?.content | |
} | |
module.exports = { hitOpenAiApi } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// use chatGPT or StackOverflow for this | |
// this is just a simple web scraper using pupeteer | |
const puppeteer = require('puppeteer') | |
// import screenshot from '../screenshot.js' | |
const PROD_CONFIG = { | |
headless: true, | |
ignoreHTTPSErrors: true, | |
args: ['--no-sandbox', '--disable-setuid-sandbox'], | |
ignoreDefaultArgs: ['--disable-extensions'], | |
} | |
// this is for my computer, you will have to change this to your own path or just not use it on your computer | |
const DEV_CONFIG = { | |
executablePath: | |
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', | |
headless: false, | |
ignoreHTTPSErrors: true, | |
} | |
const runWebScraper = async (url) => { | |
const browser = await puppeteer.launch( | |
process.env.NODE_ENV === 'production' ? PROD_CONFIG : DEV_CONFIG | |
) | |
console.time('puppeteer') | |
const page = await browser.newPage() | |
await page.goto(url, { waitUntil: 'domcontentloaded' }) | |
// await page.goto(url, { waitUntil: 'networkidle0' }) | |
const content = await page.$eval('*', (el) => { | |
const selection = window.getSelection() | |
const range = document.createRange() | |
range.selectNode(el) | |
selection.removeAllRanges() | |
selection.addRange(range) | |
return window.getSelection().toString() | |
}) | |
// console.log(content) | |
// console.log('content length: ', content.length) | |
await page.close() | |
await browser.close() | |
console.timeEnd('puppeteer') | |
return { text: content, url_from_chunk: url } | |
} | |
// FOR TESTING | |
// const URL = 'https://www.npmjs.com/package/html-to-text' | |
// runWebScraper(URL) | |
module.exports = { runWebScraper } |
Error:Configuration is not a construtor
"openai": "^4.44.0",
Change import as below and configure using OpenAi
const { OpenAI } = require("openai");
const openai = new OpenAI({
apiKey: <your_key>,
});
Also change below to perform create embedding function
openai.createEmbedding
-> openai.embeddings.create
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Error:Configuration is not a construtor