Skip to content

Instantly share code, notes, and snippets.

@SrJSDev
Last active March 5, 2024 07:14
Show Gist options
  • Save SrJSDev/1e4e4cdc3c6209d468cf9ce148d7d2a7 to your computer and use it in GitHub Desktop.
Save SrJSDev/1e4e4cdc3c6209d468cf9ce148d7d2a7 to your computer and use it in GitHub Desktop.
GPT-4 Vision API + Puppeteer = Easy Web Scraping
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import OpenAI from 'openai';
import readline from 'readline';
import fs from 'fs';
// Configure Puppeteer with StealthPlugin
puppeteer.use(StealthPlugin());
// Initialize OpenAI and timeout constant
const openai = new OpenAI();
const timeout = 8000;
// Start the main function
main();
function async main() {
console.log("###########################################");
console.log("# GPT4V-Browsing by Unconventional Coding #");
console.log("###########################################\n");
const browser = await puppeteer.launch({ headless: "new" });
const page = await browser.newPage();
await page.setViewport({ width: 1200, height: 1200, deviceScaleFactor: 1.75 });
let messages = [{ "role": "system", "content": systemMessage }];
console.log("GPT: How can I assist you today?");
let userPrompt = "";
while (true) {
// Decide which user prompt to provide, text or screenshot
if (!userPrompt) {
userPrompt = await input("You: ");
console.log();
messages.push({ "role": "user", "content": userPrompt });
}
else {
const base64Image = await imageToBase64("screenshot.jpg");
messages.push({
"role": "user",
"content": [
{ "type": "image_url", "image_url": base64Image },
{ "type": "text", "text": `Here's the screenshot of the website you are on right now.
You can click on links with {"click": "Link text"}.
Or you can crawl to another URL if this one is incorrect with {"url": "url goes here"}.
If you find the answer to the user's question, you can respond normally.`
}
]
});
}
const response = await openai.chat.completions.create({
model: "gpt-4-vision-preview",
max_tokens: 1024,
messages: messages,
});
const responseText = response.choices[0].message.content;
messages.push({ "role": "assistant", "content": responseText });
console.log("GPT: " + responseText);
screenShotOf = await handleAssistantResponseSS(page, messages, responseText)
if (!screenShotOf) {
// Then the LLM gave an answer, logged above.
// If you want to start clean:
// messages = [{ "role": "system", "content": systemMessage }];
// console.log("GPT: How can I assist you today?");
// You'll need to provide a text prompt on next loop
userPrompt = "";
}
}
};
// Convert image to base64 format
async function imageToBase64(imageFile) {
try {
const data = await fs.promises.readFile(imageFile);
const base64Data = data.toString('base64');
return `data:image/jpeg;base64,${base64Data}`;
} catch (error) {
console.error('Error reading the file:', error);
throw error;
}
};
// Prompt user for input
async function input(text) {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise(resolve => {
rl.question(text, (prompt) => {
rl.close();
resolve(prompt);
});
});
};
// Sleep function to introduce delay
const sleep = (milliseconds) => new Promise(resolve => setTimeout(resolve, milliseconds));
// Remove attribute from DOM element
const removeAttribute = (element, attributeName) => {
element.removeAttribute(attributeName);
};
// Check if element is visible on the page
const isElementVisible = (el) => {
if (!el) return false;
function isStyleVisible(el) {
const style = window.getComputedStyle(el);
return style.width !== '0' &&
style.height !== '0' &&
style.opacity !== '0' &&
style.display !== 'none' &&
style.visibility !== 'hidden';
}
function isElementInViewport(el) {
const rect = el.getBoundingClientRect();
return (
rect.top >= 0 &&
rect.left >= 0 &&
rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
rect.right <= (window.innerWidth || document.documentElement.clientWidth)
);
}
// Check if the element is visible style-wise
if (!isStyleVisible(el)) {
return false;
}
// Traverse up the DOM and check if any ancestor element is hidden
let parent = el;
while (parent) {
if (!isStyleVisible(parent)) return false;
parent = parent.parentElement;
}
// Finally, check if the element is within the viewport
return isElementInViewport(el);
};
// Highlight visible links on the page
async function highlightElement (page, element) {
await page.evaluate(element => {
element.style.border = "1px solid red";
const position = element.getBoundingClientRect();
if (position.width > 5 && position.height > 5 && isElementVisible(element)) {
const linkText = element.textContent.replace(/[^a-zA-Z0-9 ]/g, '');
element.setAttribute("gpt-link-text", linkText);
}
}, element);
};
// Click on the specified link on the page
async function clickElement (page, linkText) {
const elements = await page.$$('[gpt-link-text]');
let partial, exact;
for (const element of elements) {
const attributeValue = await element.getAttribute('gpt-link-text');
if (attributeValue === linkText) {
exact = element;
break;
}
if (attributeValue.includes(linkText)) {
partial = element;
}
}
if (exact) {
await exact.click();
} else if (partial) {
await partial.click();
} else {
throw new Error("Can't find link");
}
};
// Handle click event on link specified in the message
async function handleLinkClickSS(page, messages, messageText) {
const linkText = messageText.split('{"click": "')[1].split('"}')[0].replace(/[^a-zA-Z0-9 ]/g, '');
console.log("Clicking on " + linkText);
try {
await clickElement(page, linkText);
await Promise.race([waitForEvent(page, 'load'), sleep(timeout)]);
await highlightLinks(page);
await page.screenshot({ path: "screenshot.jpg", quality: 100 });
return linkText;
} catch (error) {
console.log("ERROR: Clicking failed");
messages.push({ "role": "user", "content": "ERROR: I was unable to click that element" });
return false;
}
};
// Navigate to the specified URL
async function navigateToUrl (page, messages, url) {
console.log("Crawling " + url);
await page.goto(url, { waitUntil: "domcontentloaded" });
await highlightLinks(page);
await Promise.race([waitForEvent(page, 'load'), sleep(timeout)]);
await highlightLinks(page);
await page.screenshot({ path: "screenshot.jpg", quality: 100 });
return url;
};
// Handle navigation to URL specified in the message
async function handleUrlNavigationSS(page, messages, messageText) {
const url = messageText.split('{"url": "')[1].split('"}')[0];
return await navigateToUrl(page, messages, url);
};
// Handle assistant response and perform appropriate action
async function handleAssistantResponseSS (page, messages, responseText) {
if (responseText.includes('{"click": "')) {
return await handleLinkClick(page, messages, responseText);
}
if (responseText.includes('{"url": "')) {
return await handleUrlNavigation(page, messages, responseText);
}
return false;
};
const systemMessage = `
You are a website crawler. You will be given instructions on what to do by browsing.
You are connected to a web browser and you will be given the screenshot of the website you are on.
The links on the website will be highlighted in red in the screenshot. Always read exactly what is in the screenshot.
Don't guess link names.
You can go to a specific URL by answering with the following JSON format:
{"url": "url goes here"}
You can click links on the website by referencing the text inside of the link/button, by answering in the following JSON format:
{"click": "Text in link"}
Once you are on a URL and you have found the answer to the user's question, you can answer with a regular message.
In the beginning, go directly to URL that you think might contain the answer to the user's question.
Prefer to go directly to sub-urls like 'https://google.com/search?q=search' if possible.
Prefer to use Google for simple queries.
If the user message provides a direct URL, always answer by going to that one instead.`
@SrJSDev
Copy link
Author

SrJSDev commented Mar 5, 2024

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment