Skip to content

Instantly share code, notes, and snippets.

@N8python
Last active May 27, 2024 19:53
Show Gist options
  • Save N8python/ed615825bff19e57ee180baec5efe67b to your computer and use it in GitHub Desktop.
Save N8python/ed615825bff19e57ee180baec5efe67b to your computer and use it in GitHub Desktop.
It's pretty simple to make a half-decent search agent.
import googleIt from 'google-it';
import axios from 'axios';
import cheerio from 'cheerio';
import OpenAI from 'openai';
import readlineSync from 'readline-sync';
const openai = new OpenAI({
baseURL: "http://localhost:1234/v1",
apiKey: 'My API Key'
});
function fetchWithTimeout(url, timeout = 5000) {
return Promise.race([
axios.get(url),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Request timed out')), timeout)
)
]);
}
// Function to perform Google search and scrape URLs
async function answerQuery(query) {
try {
// Search using google-it
const results = await googleIt({ 'query': query, 'disableConsole': true });
// console.log("Search results:", results);
// Collect all URLs from the search results
const urls = results.map(result => result.link);
/* console.log("URLs from Google Search:");
console.log(urls);*/
// Optionally, scrape data from the URLs (demonstrating with the first URL)
if (urls.length > 0) {
/* const response = await axios.get(urls[0]);
const $ = cheerio.load(response.data);
// Example: Print the title of the first URL's page
console.log("Title of the first URL's page:");
console.log($('title').text());*/
const responsePromises = urls.map(url => fetchWithTimeout(url, 5000));
const responses = await Promise.allSettled(responsePromises);
const htmls = responses
.filter(response => response.status === 'fulfilled')
.map(response => response.value.data);
const $s = htmls.map(html => cheerio.load(html));
/* const titles = $s.map($ => $('title').text());
const */
// Recursively extract all text from the page, taking text in: <p>, <div> and <span>
const extractText = ($, selector) => {
return $(selector).map((index, element) => {
return $(element).text();
}).get().join(' ');
};
const texts = $s.map($ => {
return extractText($, 'p');
});
const titles = $s.map($ => {
return extractText($, 'title');
});
const formattedData = urls.map((url, index) => {
return {
url,
title: titles[index],
text: texts[index]
};
}).filter(data => data.text && data.text.length > 0);
let finalString = formattedData.map((data, i) => {
return `***${data.title.replace(/\t/g, '')}**: \n\n${data.text.replace(/\t/g, '')}`;
}).join('\n\n');
finalString = finalString.replace(/�/g, '?');
if (finalString.length > 48000) {
finalString = finalString.substring(0, 48000) + "[Truncated]"
}
const prompt = `Extract & summarize relevant information from this content: \n\n${finalString}\n\n In order to answer this query: ${query} \n \n When answering, use common sense, report only high-quality information, and if the answer is not clear, go off what you innately know. If the information provided is nonsensical or unrelated, ignore it and simply answer the query. \n \n Answer:`;
const completion = await openai.chat.completions.create({
messages: [{ role: 'user', content: prompt }],
model: 'phi-3',
});
console.log("Answer:");
console.log(completion.choices[0].message.content);
}
} catch (error) {
console.log(`Error during search or scraping: ${error}`);
}
}
while (true) {
const query = readlineSync.question('Enter a question: ');
await answerQuery(query);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment