N8python/simplesearch.js

## simplesearch.js
import googleIt from 'google-it';
import axios from 'axios';
import cheerio from 'cheerio';
import OpenAI from 'openai';
import readlineSync from 'readline-sync';

const openai = new OpenAI({
    baseURL: "http://localhost:1234/v1",
    apiKey: 'My API Key'
});

function fetchWithTimeout(url, timeout = 5000) {
    return Promise.race([
        axios.get(url),
        new Promise((_, reject) =>
            setTimeout(() => reject(new Error('Request timed out')), timeout)
        )
    ]);
}

// Function to perform Google search and scrape URLs
async function answerQuery(query) {
    try {
        // Search using google-it
        const results = await googleIt({ 'query': query, 'disableConsole': true });
        // console.log("Search results:", results);
        // Collect all URLs from the search results
        const urls = results.map(result => result.link);

        /* console.log("URLs from Google Search:");
         console.log(urls);*/

        // Optionally, scrape data from the URLs (demonstrating with the first URL)
        if (urls.length > 0) {
            /* const response = await axios.get(urls[0]);
             const $ = cheerio.load(response.data);

             // Example: Print the title of the first URL's page
             console.log("Title of the first URL's page:");
             console.log($('title').text());*/
            const responsePromises = urls.map(url => fetchWithTimeout(url, 5000));
            const responses = await Promise.allSettled(responsePromises);
            const htmls = responses
                .filter(response => response.status === 'fulfilled')
                .map(response => response.value.data);

            const $s = htmls.map(html => cheerio.load(html));
            /* const titles = $s.map($ => $('title').text());
             const */
            // Recursively extract all text from the page, taking text in: <p>, <div> and <span>
            const extractText = ($, selector) => {
                return $(selector).map((index, element) => {
                    return $(element).text();
                }).get().join(' ');
            };
            const texts = $s.map($ => {
                return extractText($, 'p');
            });
            const titles = $s.map($ => {
                return extractText($, 'title');
            });
            const formattedData = urls.map((url, index) => {
                return {
                    url,
                    title: titles[index],
                    text: texts[index]
                };
            }).filter(data => data.text && data.text.length > 0);
            let finalString = formattedData.map((data, i) => {
                return `***${data.title.replace(/\t/g, '')}**: \n\n${data.text.replace(/\t/g, '')}`;
            }).join('\n\n');
            finalString = finalString.replace(/�/g, '?');
            if (finalString.length > 48000) {
                finalString = finalString.substring(0, 48000) + "[Truncated]"
            }
            const prompt = `Extract & summarize relevant information from this content: \n\n${finalString}\n\n In order to answer this query: ${query} \n \n When answering, use common sense, report only high-quality information, and if the answer is not clear, go off what you innately know. If the information provided is nonsensical or unrelated, ignore it and simply answer the query. \n \n Answer:`;
            const completion = await openai.chat.completions.create({
                messages: [{ role: 'user', content: prompt }],
                model: 'phi-3',
            });
            console.log("Answer:");
            console.log(completion.choices[0].message.content);
        }
    } catch (error) {
        console.log(`Error during search or scraping: ${error}`);
    }
}
while (true) {
    const query = readlineSync.question('Enter a question: ');
    await answerQuery(query);
}
	import googleIt from 'google-it';
	import axios from 'axios';
	import cheerio from 'cheerio';
	import OpenAI from 'openai';
	import readlineSync from 'readline-sync';

	const openai = new OpenAI({
	baseURL: "http://localhost:1234/v1",
	apiKey: 'My API Key'
	});

	function fetchWithTimeout(url, timeout = 5000) {
	return Promise.race([
	axios.get(url),
	new Promise((_, reject) =>
	setTimeout(() => reject(new Error('Request timed out')), timeout)
	)
	]);
	}

	// Function to perform Google search and scrape URLs
	async function answerQuery(query) {
	try {
	// Search using google-it
	const results = await googleIt({ 'query': query, 'disableConsole': true });
	// console.log("Search results:", results);
	// Collect all URLs from the search results
	const urls = results.map(result => result.link);

	/* console.log("URLs from Google Search:");
	console.log(urls);*/

	// Optionally, scrape data from the URLs (demonstrating with the first URL)
	if (urls.length > 0) {
	/* const response = await axios.get(urls[0]);
	const $ = cheerio.load(response.data);

	// Example: Print the title of the first URL's page
	console.log("Title of the first URL's page:");
	console.log($('title').text());*/
	const responsePromises = urls.map(url => fetchWithTimeout(url, 5000));
	const responses = await Promise.allSettled(responsePromises);
	const htmls = responses
	.filter(response => response.status === 'fulfilled')
	.map(response => response.value.data);

	const $s = htmls.map(html => cheerio.load(html));
	/* const titles = $s.map($ => $('title').text());
	const */
	// Recursively extract all text from the page, taking text in: <p>, <div> and <span>
	const extractText = ($, selector) => {
	return $(selector).map((index, element) => {
	return $(element).text();
	}).get().join(' ');
	};
	const texts = $s.map($ => {
	return extractText($, 'p');
	});
	const titles = $s.map($ => {
	return extractText($, 'title');
	});
	const formattedData = urls.map((url, index) => {
	return {
	url,
	title: titles[index],
	text: texts[index]
	};
	}).filter(data => data.text && data.text.length > 0);
	let finalString = formattedData.map((data, i) => {
	return `*${data.title.replace(/\t/g, '')}: \n\n${data.text.replace(/\t/g, '')}`;
	}).join('\n\n');
	finalString = finalString.replace(/�/g, '?');
	if (finalString.length > 48000) {
	finalString = finalString.substring(0, 48000) + "[Truncated]"
	}
	const prompt = `Extract & summarize relevant information from this content: \n\n${finalString}\n\n In order to answer this query: ${query} \n \n When answering, use common sense, report only high-quality information, and if the answer is not clear, go off what you innately know. If the information provided is nonsensical or unrelated, ignore it and simply answer the query. \n \n Answer:`;
	const completion = await openai.chat.completions.create({
	messages: [{ role: 'user', content: prompt }],
	model: 'phi-3',
	});
	console.log("Answer:");
	console.log(completion.choices[0].message.content);
	}
	} catch (error) {
	console.log(`Error during search or scraping: ${error}`);
	}
	}
	while (true) {
	const query = readlineSync.question('Enter a question: ');
	await answerQuery(query);
	}