Skip to content

Instantly share code, notes, and snippets.

@josiahbryan
Created May 23, 2024 18:33
Show Gist options
  • Save josiahbryan/35d7534234f2652648c10e4b13a0730e to your computer and use it in GitHub Desktop.
Save josiahbryan/35d7534234f2652648c10e4b13a0730e to your computer and use it in GitHub Desktop.
Dockerfile/K8 Setup example for running Puppeteer
/* eslint-disable no-unused-vars */
/* eslint-disable no-continue */
/* eslint-disable no-restricted-syntax */
/* eslint-disable no-shadow */
/* eslint-disable no-use-before-define */
/* eslint-disable no-console */
/* global document, HTMLElement, window */
import puppeteer from 'puppeteer-extra';
import AppConfig from 'shared/config';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
// import { Browser, Page } from 'puppeteer';
import Logger from 'shared/utils/Logger.js';
import {
isPageExplicitlyLoading,
isValidURL,
waitTillHTMLRendered,
} from './utils.js';
import { highlightInteractiveElements } from './element-annotator.js';
/**
* This 10s timeout is the maximum time to wait for the page to load
*/
export const TIMEOUT = 60_000;
const imagePath = '/tmp/web-agent-screenshot.jpg';
// const browserWindowSize = { width: 900, height: 1600 };
const browserWindowSize = { width: 1300, height: 900 };
/**
* This service initializes a new browser session and a new page tab
* @param {Object} options - The options object
* @param {boolean} options.headless - Whether to run the browser in headless mode
* @returns {Promise<{ browser: Browser, page: Page }>} An object containing the browser and the page
*/
export const initController = async ({
logger = Logger,
defaultTimeout = 2 * 60 * 1000, // ~2 min
headless = process.env.BROWSER_SERVICE_HEADLESS_DEV ||
AppConfig.buildEnv !== 'dev',
} = {}) => {
const pup = puppeteer.default.use(StealthPlugin());
const pupOpts = {
headless,
executablePath: process.env.GOOGLE_CHROME_CANARY_PATH,
userDataDir: process.env.GOOGLE_CHROME_CANARY_USER_DATA_DIR,
args: [
// `--profile-directory=${process.env.PROFILE}`,
// '--disable-setuid-sandbox',
// '--no-sandbox',
// '--no-zygote',
`--window-size=${browserWindowSize.width},${browserWindowSize.height}`,
],
};
logger.debug(`...Launching browser with options:`, pupOpts);
const browser = await pup.launch(pupOpts);
const page = await browser.newPage();
if (defaultTimeout) {
// await page.setDefaultTimeout(defaultTimeout);
await page.setDefaultNavigationTimeout(defaultTimeout);
}
// await page.setViewport({
// width: browserWindowSize.width,
// height: browserWindowSize.height,
// deviceScaleFactor: 1,
// });
return { browser, page };
};
/**
* This service takes a screenshot of the given URL
* @param {string} url - The URL to take a screenshot of
* @param {Page} page - The page object
* @param {boolean} dataOnly - Whether to return only the html/text and not do the screenshot
* @param {boolean} disableAnnotations - Whether to disable the annotations
* @returns {Promise<{ html: string, text: string, imagePath: string }>} A promise that resolves to the path of the screenshot, the HTML content of the page, and the text content of the page
*/
export const screenshot = async ({
url,
page,
dataOnly = false,
disableAnnotations = false,
defaultTimeout: timeout = TIMEOUT,
logger,
}) => {
logger.debug(`...Opening ${url}`);
if (!isValidURL(url)) {
throw new Error(`Invalid URL: ${url}`);
}
try {
logger.debug(`...waiting for networkidle0`);
// TODO: What is the best way to wait for the page to load completely for a screenshot?
// TODO: currently, we have `waitTillHTMLRendered`, `sleep`, and `waifForEvent` functions
// wait 500 ms after the number of active network requests are 2
await page.goto(url, {
waitUntil: 'networkidle0',
timeout,
});
logger.debug(`...going to screenshot`);
// waitUntil is not enough to wait for the page to load completely, so we need extra logic to wait for the page to load
const screenshotResults = await waitAndScreenshot({
page,
dataOnly,
disableAnnotations,
logger,
});
return screenshotResults;
} catch (error) {
logger.error(`Error taking screenshot:`, error);
return { error };
}
};
/**
* Clicks on a navigation link and takes a screenshot
* @param {string} linkText - The text of the link to click on
* @param {Page} page - The page object
* @param {Browser} browser - The browser object
* @param {boolean} dataOnly - Whether to return only the html/text and not do the screenshot
* @param {boolean} disableAnnotations - Whether to disable the annotations
* @param {Logger} logger - The logger object
* @returns {Promise<{ html: string, text: string, imagePath: string }>} A promise that resolves to the path of the screenshot, the HTML content of the page, and the text content of the page
*/
export const clickNavigationAndScreenshot = async ({
linkText,
page,
browser,
logger,
dataOnly,
disableAnnotations,
}) => {
let screenshotResults;
try {
// To use a if statement to check if the link opens in a new tab, Promise.all cannot be used
// await Promise.all([page.waitForNavigation(), clickOnLink(linkText, page)]);
// change to this:
const navigationPromise = page.waitForNavigation();
const clickResponse = await clickOnLink({ linkText, page, logger });
if (!clickResponse) {
await navigationPromise;
screenshotResults = await waitAndScreenshot({
page,
logger,
dataOnly,
disableAnnotations,
});
} else {
// if the link opens in a new tab, ignore the navigationPromise as there won't be any navigation
// MUST NOT USE `AWAIT` HERE, otherwise it will wait the default timeout of 30s
navigationPromise.catch(() => undefined);
const newPage = await newTabNavigation({
clickResponse,
page,
browser,
logger,
});
if (newPage === undefined) {
throw new Error('The new page cannot be opened');
}
screenshotResults = await waitAndScreenshot({
page: newPage,
logger,
dataOnly,
disableAnnotations,
});
}
return screenshotResults;
} catch (error) {
return { error };
}
};
/**
* Clicks on a link with the given text
* @param {string} linkText - The text of the link to click on
* @param {Page} page - The page object
* @param {Logger} logger - The logger object
* @returns {Promise<string | undefined>} A promise that resolves to the link text if the link opens in a new tab, or undefined if it doesn't
*/
const clickOnLink = async ({ linkText, page, logger = Logger }) => {
try {
const clickResponse = await page.evaluate(async (linkText) => {
const isHTMLElement = (element) => {
return element instanceof HTMLElement;
};
const elements = document.querySelectorAll('[gpt-link-text]');
for (const element of elements) {
if (!isHTMLElement(element)) {
continue;
}
if (
element
.getAttribute('gpt-link-text')
?.includes(linkText.trim().toLowerCase()) // align with `setUniqueIdentifierAttribute` in `element-annotator.ts`
) {
if (element.getAttribute('target') === '_blank') {
return element.getAttribute('gpt-link-text');
}
element.style.backgroundColor = 'rgba(255,255,0,0.25)';
element.click();
// eslint-disable-next-line consistent-return
return;
}
}
// only if the loop ends without returning
throw new Error(`Link with text not found: "${linkText}"`);
}, linkText);
return clickResponse;
} catch (error) {
// console.log(`Error clicking on link: ${err}`);
// if (err instanceof Error) {
// // must rethrow the error so that it can be caught in the calling function
// throw err;
// }
return { error };
}
// return null;
};
/**
* Handles navigation to a new tab and returns the new page object
* @param {string} gptLinkText - The link text that triggers the new tab navigation
* @param {Page} page - The page object
* @param {Browser} browser - The browser object
* @returns {Promise<Page | undefined>} A promise that resolves to the new page object if it is successfully opened, or undefined if it cannot be opened
*/
const newTabNavigation = async ({
linkText: gptLinkText,
page,
browser,
logger = Logger,
}) => {
try {
// store the target of original page to know that this was the opener:
const currentPageTarget = page.target();
// execute click on the current page that triggers opening of new tab (new page):
const element = await page.$(`[gpt-link-text="${gptLinkText}"]`);
if (element === null) {
throw new Error('The element is null');
}
element.click();
// check if the new page is opened by the current page:
const newPageTarget = await browser.waitForTarget(
(target) => target.opener() === currentPageTarget,
);
// switch to the new page:
const newPage = await newPageTarget.page();
if (newPage === null) {
throw new Error('The new page is null');
}
// wait for page to be loaded (briefly)
await newPage.waitForSelector('body');
return newPage;
} catch (error) {
// if (err instanceof Error) {
// throw err;
// }
return { error };
}
// return null;
};
/**
* Waits for the page to load and takes a screenshot
* @param {Page} page - The page object
* @param {boolean} dataOnly - Whether to return only the html/text and not do the screenshot
* @param {boolean} disableAnnotations - Whether to disable the annotations
* @returns {Promise<{ html: string, text: string, imagePath: string }>} A promise that resolves to the path of the screenshot, the HTML content of the page, and the text content of the page
*/
const waitAndScreenshot = async ({
page,
dataOnly = false,
disableAnnotations = false,
logger = Logger,
}) => {
// // From https://www.webshare.io/academy-article/puppeteer-get-html
// // Another way of waiting for network activity to be idle
// await page.waitForFunction(
// 'window.performance.timing.loadEventEnd - window.performance.timing.navigationStart >= 500',
// );
// waitUntil in `GoToOptions` is not enough to wait for the page to load completely (especially with dynamic loading content), so we need to use waitTillHTMLRendered
const isLoading = await isPageExplicitlyLoading(page);
if (isLoading) {
await waitTillHTMLRendered(page);
}
if (!dataOnly) {
if (!disableAnnotations) {
logger.debug(`...Highlight all interactive elements`);
await highlightInteractiveElements(page);
}
logger.debug(`...Taking screenshot`);
await page.screenshot({
// path: "/agent/web-agent-screenshot.jpg" is a wrong path
path: imagePath,
fullPage: true,
});
}
logger.debug(`...Reading Text and HTML for the page`);
// const [html, text] = await Promise.all([
// page.content(),
// page.evaluate(() => document.body.innerText),
// ]);
const pageMetadata = await pageMetadataHelper(page);
return { imagePath, ...pageMetadata };
};
export const pageMetadataHelper = async (page) => {
const html = await page.content();
const { title, text, links, url } = await page.evaluate(() => {
const pageUrl = window.location.href;
const aHrefs = Array.from(document.querySelectorAll('a[href]'));
const processedLinks = aHrefs
.map((x) => [x.getAttribute('href'), x.textContent?.trim()])
.filter(
([url]) =>
url &&
!url.startsWith('#') &&
// eslint-disable-next-line no-script-url
!url.startsWith('javascript:') &&
!url.endsWith('.pdf') &&
!url.endsWith('.jpg') &&
!url.endsWith('.jpeg') &&
!url.endsWith('.png'),
)
.map((x) => {
try {
const [urlX, ...attribs] = x;
const url = decodeURIComponent(urlX);
if (url.startsWith('?')) {
const newUrl = new URL(pageUrl);
const params = new URLSearchParams(url);
params.forEach((value, key) => {
newUrl.searchParams.set(key, value);
});
return [newUrl.toString(), ...attribs];
}
if (url.startsWith('/')) {
const newUrl = new URL(pageUrl);
newUrl.pathname = url;
return [newUrl.toString(), ...attribs];
}
return [new URL(url).toString(), ...attribs];
} catch (err) {
console.warn(`Cannot parse URL '${x[0]}: ${err.message}`);
return [];
}
})
.filter((x) => x[0] && x[0].startsWith('http'))
.sort((a, b) => {
if (a[0].length < b[0].length) {
return -1;
}
if (a[0].length > b[0].length) {
return 1;
}
if (a[0] < b[0]) {
return -1;
}
if (a[0] > b[0]) {
return 1;
}
return 0;
});
const links = Object.values(
Object.fromEntries(processedLinks.map((x) => [x[0], x])),
);
return {
title: document.title,
text: document.body.innerText,
url: pageUrl,
links,
};
});
return { html, text, links, title, url };
};
# #######
#
# This file contains all the things needed to run a copy of the browser-service in k8s:
# - service - Exposes the deployment on port 80 to the cluster
# - deployment - Creates pods for the docker image containing the actual backend
#
# Ingress is handled in backend.yml with `browser-service.vaya.to` pointing to the service `browser-service` defined here.
#
# `browser-service` is built and pushed to the registry by the `/browser-service/build-and-deploy.sh` script, manually on demand.
#
# #######
# # Add a disruption budget to attempt to keep the backend "safe" during
# # k8 updates via DO
# # ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
# # ref: https://docs.digitalocean.com/products/kubernetes/how-to/upgrade-cluster/#disruption-free-upgrades
# apiVersion: policy/v1
# kind: PodDisruptionBudget
# metadata:
# name: browser-service-pdb
# spec:
# minAvailable: 1
# selector:
# matchLabels:
# app: browser-service
# ---
## Service: browser-service
#
apiVersion: v1
kind: Service
metadata:
name: browser-service
namespace: vaya-prod
labels:
app: browser-service
tags.datadoghq.com/env: "prod"
spec:
ports:
- port: 80
targetPort: 8091
selector:
app: browser-service
---
## Deployment: Worker
#
apiVersion: apps/v1
kind: Deployment
metadata:
name: browser-service
namespace: vaya-prod
labels:
app: browser-service
tags.datadoghq.com/env: "prod"
spec:
replicas: 1
# https://stackoverflow.com/a/37258369/1119559
# Right now, I don't have a plan/handle to rollback with K8s-specific RS history.
# If something goes wrong in a prod deployment, I'll change
# the ":latest" below to a specific SHA/version and redeploy.
revisionHistoryLimit: 0
# Guidance around this timing and the strategy below
# is from https://alexklibisz.com/2021/07/20/speed-limits-for-rolling-restarts-in-kubernetes.html
# minReadySeconds: 3 # TBD - need to qualify to see if this helps
# strategy:
# rollingUpdate:
# maxUnavailable: 1 # New! 2x the default of 25% (1/4)
# maxSurge: 2 # Moved Surge to 2 since replicas is at 6 to keep max nodes at 8 for next update
selector:
matchLabels:
app: browser-service
template:
metadata:
labels:
app: browser-service
spec:
containers:
- name: browser-service
image: registry.digitalocean.com/vaya/browser-service:latest
imagePullPolicy: Always
resources:
requests:
# Not sure how much browser-service will need, shall have to test
memory: "500M"
cpu: "100m"
# No limits imposed for now...
# limits:
# # api2-6g node pool now has 6GB usable, so this is roughly 6GB
# memory: "6000M"
# # Current k8 pod has 2vCPU
# # New api2-6gb has 4vCPU
# cpu: "3500m"
ports:
- containerPort: 8091
# startupProbe:
# initialDelaySeconds: 300
# # Period * Failure = 5 minutes
# # Protects slow start-up containers from getting killed
# # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes
# periodSeconds: 10
# failureThreshold: 30
# timeoutSeconds: 1
# successThreshold: 1
# httpGet:
# path: /api/version?k8_probe=startup
# port: 8091
# readinessProbe:
# httpGet:
# path: /api/version?k8_probe=readiness
# port: 8091
# livenessProbe:
# # Updated liveliness settings 2022-12-24 due to some reports taking long time to run (e.g golds billing)
# # and causing previous liveliness settings to timeout, resulting in pod restarts when the report was
# # running, never letting the report complete. These settings have been updated to compensate
# # (or attempt to compensate) for the temporary load
# initialDelaySeconds: 600
# # Period * Failure = 7.5 minutes
# # Protects containers that have temporary load from getting killed
# # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes
# periodSeconds: 15
# failureThreshold: 30
# # Higher timeout in case the server is under temporary load
# timeoutSeconds: 5
# successThreshold: 1
# httpGet:
# path: /api/version?k8_probe=liveness
# port: 8091
env:
- name: ENVIRONMENT
value: production
# Ref: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#send-statsd-metrics-to-the-agent
- name: DD_AGENT_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP
# Ref: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#origin-detection-over-udp
- name: DD_ENTITY_ID
valueFrom:
fieldRef:
fieldPath: metadata.uid
# TBD if this is needed
- name: DD_ENV
valueFrom:
fieldRef:
fieldPath: metadata.labels['tags.datadoghq.com/env']
# This SHOULD work ...
# ref: https://docs.datadoghq.com/tracing/guide/ignoring_apm_resources/?tab=datadogyaml
- name: DD_APM_FILTER_TAGS_REJECT
value: http.url_details.path:/api/version
# # TBD if this is needed
# - name: DD_VERSION
# valueFrom:
# fieldRef:
# fieldPath: metadata.labels['tags.datadoghq.com/version']
- name: DD_APM_ENABLED
value: "true"
- name: DD_HOSTNAME
valueFrom:
fieldRef:
fieldPath: metadata.name
# Using this in LoggingAdapter for system info
- name: K8_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
# Using this in LoggingAdapter for system info
- name: K8_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
# Using this in LoggingAdapter for system info
- name: K8_POD_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
FROM --platform=linux/amd64 node:20.11.1 as base
# Note: The --platform arg above is required for building on Apple Silicon (Mx) Macs
# Thanks to <https://dev.to/docker/unable-to-locate-package-google-chrome-stable-b62> for the tip.
USER root
WORKDIR /root
# Most of this apt-get setup is based on https://blog.apify.com/puppeteer-docker/
# Install the latest Chrome dev package and necessary fonts and libraries
RUN apt-get update \
&& apt-get install -y wget gnupg \
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/googlechrome-linux-keyring.gpg \
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/googlechrome-linux-keyring.gpg] https://dl-ssl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google.list \
&& apt-get update \
&& apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-freefont-ttf libxss1 dbus dbus-x11 \
--no-install-recommends \
&& apt-get install -qqy --no-install-recommends \
iproute2 iputils-ping telnet net-tools ssh \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd -r vaya && useradd -rm -g vaya -G audio,video vaya
# Determine the path of the installed Google Chrome
# Mostly for debugging the set of PUPPETEER_EXECUTABLE_PATH later
RUN which google-chrome-stable || true
# Use dumb-init to help prevent zombie processes. Without this, node doesn't seem able to kill
# zombie processes on dev. Should also ensure everything is killed on prod when the entire pod
# receives a SIGTERM.
# See https://www.elastic.io/nodejs-as-pid-1-under-docker-images/
ADD https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64 /usr/local/bin/dumb-init
RUN chmod +x /usr/local/bin/dumb-init
ENTRYPOINT ["dumb-init", "--"]
# Install our shared module because package.json has a dep to '../shared'
RUN mkdir /shared
WORKDIR /shared
# Have to use the .tar.gz because COPY refuses to copy from ../shared
# NOTE: You have to pre-build shared.tar.gz - see package.json and the docker:prebuild:shared script in that file
COPY buildfiles/shared.tar.gz .
# Have to use the .tar.gz because COPY refuses to copy from ../shared
# NOTE: You have to pre-build shared.tar.gz - see package.json and the docker:prebuild:shared script in that file
COPY buildfiles/shared.tar.gz .
RUN tar zxvf ./shared.tar.gz && \
rm -f ./shared.tar.gz && \
cd /shared && \
npm ci --legacy-peer-deps && \
chown -R vaya:vaya /shared
# Install the nodejs side of the app
RUN mkdir /app && chown vaya:vaya /app
WORKDIR /app
ENV NODE_ENV production
# Install our app src and node modules
COPY --chown=vaya:vaya . .
# Use the chromium we install manually instead
# Update the PUPPETEER_EXECUTABLE_PATH to the correct Chrome path (placeholder, update based on the output of `which google-chrome-stable`)
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
# Install node modules
# Yes, included in package.json, but installed npm globally here
# so we can run via commands in bash run files
# Also, clean cache after all that installing
RUN npm ci --legacy-peer-deps && \
npm install nodemon@1.18.10 -g && \
npm cache clean --force && \
chown -R vaya:vaya /app
# Inform docker of the port we're using - not sure that this is required,
# but it keeps the metadata clean anyway
EXPOSE 8091
# Switch to the non-root user
USER vaya
CMD [ "npm", "run", "start:docker-internal" ]
/* eslint-disable no-unused-vars */
/* eslint-disable no-restricted-syntax */
/* global document, window, HTMLElement */
// for reference, this variable must be defined in the browser context (inside the pageFunction)
// const UNIQUE_IDENTIFIER_ATTRIBUTE = "gpt-link-text";
const INTERACTIVE_ELEMENTS = [
'a',
'button',
/** to avoid clicking on the google search input */
'input',
'textarea',
'[role=button]',
'[role=treeitem]',
'[onclick]:not([onclick=""])',
];
/**
* Reset the unique identifier attribute and remove previously highlighted elements
* @param page
*/
const resetUniqueIdentifierAttribute = async (page) => {
await page.evaluate(() => {
const UNIQUE_IDENTIFIER_ATTRIBUTE = 'gpt-link-text';
const elements = document.querySelectorAll(
`[${UNIQUE_IDENTIFIER_ATTRIBUTE}]`,
);
for (const element of elements) {
element.removeAttribute(UNIQUE_IDENTIFIER_ATTRIBUTE);
}
});
};
/**
* This function annotates all the interactive elements on the page
* @param page
*/
const annotateAllInteractiveElements = async (page) => {
// $$eval method runs Array.from(document.querySelectorAll(selector)) within the `page`and passes the result as the first argument to the pageFunction.
// If no elements match the selector, the first argument to the pageFunction is [].
await page.$$eval(
INTERACTIVE_ELEMENTS.join(', '), // the selector can be defined outside the browser context
// the argument `elements` can be an empty array if no elements match the selector
function annotate(elements) {
// any console.log inside the callback will not be visible in the node terminal
// instead, it will be visible in the browser console
// handle empty array
if (elements.length === 0) {
// throw new Error('No elements found');
return;
}
//= =====================================VALIDATE ELEMENT CAN INTERACT=================================================
// This run-time check must be defined inside the pageFunction as it is running in the browser context. If defined outside, it will throw an error: "ReferenceError: isHTMLElement is not defined"
const isHTMLElement = (element) => {
// this assertion is to allow Element to be treated as HTMLElement and has `style` property
return element instanceof HTMLElement;
};
// copy paste the function from the utils.ts file as they are not accessible in the browser context if they are not defined inside the pageFunction
const isElementStyleVisible = (element) => {
const style = window.getComputedStyle(element);
return (
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0' &&
style.width !== '0px' &&
style.height !== '0px'
);
};
const isElementInViewport = (element) => {
const rect = element.getBoundingClientRect();
return (
rect.top >= 0 &&
rect.left >= 0 &&
rect.bottom <=
(window.innerHeight || document.documentElement.clientHeight) &&
rect.right <=
(window.innerWidth || document.documentElement.clientWidth)
);
};
const isElementVisible = (element) => {
if (element === null || element === undefined) {
// throw new Error('isElementVisible: Element is null or undefined');
return false;
}
let currentElement = element;
while (currentElement) {
if (!isElementStyleVisible(currentElement)) {
return false;
}
currentElement = currentElement.parentElement;
}
// return isElementInViewport(element); //disable the inViewport check for now
return true;
};
//= =======================================PREPARE UNIQUE IDENTIFIER================================================
// clean up the text by removing any characters that are not alphanumeric (letters and numbers) or spaces.
// Does not support non-English characters; Set the language of the page to English to avoid issues
const cleanUpTextContent = (text) => text.replace(/[^a-zA-Z0-9 ]/g, '');
const setUniqueIdentifierBasedOnTextContent = (element) => {
const UNIQUE_IDENTIFIER_ATTRIBUTE = 'gpt-link-text';
const { textContent, tagName } = element;
// if the node is a document or doctype, textContent will be null
if (textContent === null) {
return;
}
// TODO: <a title="MacBook Air 15&quot; M3 8-Core CPU 10-Core GPU 8/256GB Starlight"></a> This a link does not have textContent, but it has a title attribute. The title attribute can be used as the unique identifier
// there is no way for the llm to point a element without textContent, like a button with an icon (assumably), the following logic is disabled for now
// const linkText =
// textContent.trim() === ""
// ? `${tagName}-${crypto.randomUUID()}`
// : cleanUpTextContent(textContent).trim();
element.setAttribute(
UNIQUE_IDENTIFIER_ATTRIBUTE,
textContent.trim().toLowerCase(),
);
};
//= =======================================HIGHLIGHT INTERACTIVE ELEMENTS================================================
for (const element of elements) {
if (isHTMLElement(element)) {
// highlight all the interactive elements with a red bonding box
element.style.outline = '3px solid red';
}
if (isElementVisible(element)) {
// set a unique identifier attribute to the element
// this attribute will be used to identify the element that puppeteer should interact with
setUniqueIdentifierBasedOnTextContent(element);
}
}
},
);
};
/**
* This function highlights all the interactive elements on the page
* @param page
*/
export const highlightInteractiveElements = async (page) => {
await resetUniqueIdentifierAttribute(page);
await annotateAllInteractiveElements(page);
};
/* eslint-disable no-unused-vars */
/* global document, window, HTMLElement */
import Logger from 'shared/utils/Logger';
import { jsonSafeStringify } from 'shared/utils/jsonSafeStringify';
import { jsonSafeParse } from 'shared/utils/jsonSafeParse';
import { assertRequired } from 'shared/utils/assertRequired';
import { readFile } from 'fs/promises';
import { normalizeDecimals } from 'shared/utils/normalizeDecimals';
import { getSystemInfo } from 'shared/utils/getSystemInfo';
import {
screenshotUrl,
clickNavigationAndScreenshotUrl,
getSession,
startSession,
endSession,
getSessionList,
} from './utils/session-controller';
import { pageMetadataHelper } from './utils/browser-controller';
export async function usePuppeteerControllerRoutes(app) {
// TODO: Use shared/utils/appPlugin instead!!!!!!!!
const jsonApiWrapper = (fn) => async (req, res) => {
try {
const { body: bodyBuffer } = req;
const body = jsonSafeParse(
bodyBuffer?.toString === 'function'
? bodyBuffer.toString()
: bodyBuffer,
);
const json = await fn({ ...req, body }, res);
const system = getSystemInfo();
res.header('X-Vaya-Server', system.host.hostname);
res.header('X-Vaya-Service', system.host.serviceName);
res.header('X-Vaya-Namespace', system.host.k8sNamespace);
res.header('X-Vaya-Version', system.version);
if (system.appName) {
res.header('X-Vaya-App', system.appName);
}
// This is the value from the same-named header the client sent (if any)
// It's attached to the request by our logRequest middleware
res.header('X-Client-Tx', req.clientTransactionId);
// if (tenantAppAnnotation) {
// // Possible metrics correlation for tenant apps - TBD
// res.header('X-Vaya-Tenant-App', tenantAppAnnotation);
// }
// For latency measurements (combined with TimeService on the client side)
res.header('X-Vaya-Epoch', Date.now());
// Tell client what we measured in our logRequest.js handler on what they sent us
if (req.receiveLatency) {
res.header('X-Receive-Latency', req.receiveLatency);
res.header(
'X-Receive-KBps',
normalizeDecimals(req.receiveBytesPerSecond / 1024),
);
}
if (json) {
res.json(json);
}
} catch (err) {
const { logger = Logger } = req;
logger.error(`Error:`, err);
res.status(err.statusCode || 500).send(
jsonSafeStringify({
error: {
message: err.message,
stack: err?.stack,
data: err?.statusData,
},
}),
);
}
};
app.get(
'/api/version',
jsonApiWrapper(async (req) => {
// Just for the fun of it, log startup probes.
// The other probes happen so often, we don't want to clutter logs
if (req.query.k8_probe === 'startup') {
req.logger.info(`✅ Received startup probe from k8s ✨`);
}
return getSystemInfo();
}),
);
app.post(
'/api/screenshot',
jsonApiWrapper(async (req, res) => {
const { logger, body } = req;
const {
url,
traceId,
sessionId,
dataOnly,
imageOnly,
lockToken,
disableAnnotations,
} = body;
assertRequired({ url }, 'screenshot', logger);
const img = await screenshotUrl({
url,
traceId,
sessionId,
lockToken,
dataOnly,
disableAnnotations,
logger,
});
const { imagePath, ...json } = img || {};
logger.debug(`Got screenshot result:`, {
input: body,
output: img,
});
if (dataOnly) {
return json;
}
const bytes = await readFile(imagePath);
if (imageOnly) {
// res.json(result);
res.header('Content-Type', 'image/jpeg');
res.send(bytes);
}
const jsonWithBytes = {
imageBase64: bytes.toString('base64'),
...json,
};
return jsonWithBytes;
}),
);
app.post(
'/api/click_and_screenshot',
jsonApiWrapper(async (req, res) => {
const { logger = Logger, body } = req;
const {
linkText,
traceId,
sessionId,
dataOnly,
imageOnly,
lockToken,
disableAnnotations,
} = body;
// logger.debug(`click_and_screenshot body:`, {
// parsed: body,
// raw: bodyBuffer.toString(),
// });
assertRequired({ sessionId, linkText }, 'click_and_screenshot', logger);
const props = {
linkText,
traceId,
sessionId,
lockToken,
dataOnly,
disableAnnotations,
logger,
};
// logger.debug(`Calling click with props`, props);
const img = await clickNavigationAndScreenshotUrl(props);
const { imagePath, ...json } = img || {};
logger.debug(`Got click and screenshot result:`, {
input: body,
output: img,
});
if (dataOnly) {
return json;
}
const bytes = await readFile(imagePath);
if (imageOnly) {
// res.json(result);
res.header('Content-Type', 'image/jpeg');
res.send(bytes);
}
const jsonWithBytes = {
imageBase64: bytes.toString('base64'),
...json,
};
return jsonWithBytes;
}),
);
app.post(
'/api/current_page_content',
jsonApiWrapper(async (req, res) => {
const { logger = Logger, body } = req;
const { sessionId, lockToken, customPageFunction } = body;
assertRequired({ sessionId }, 'current_page_content', logger);
// Makes a factory...
const customPageFn =
// eslint-disable-next-line no-new-func
customPageFunction && new Function(`return ${customPageFunction}`)();
// if (customEvaluationString || customPageFunction) {
// logger.warn(`Custom function strings:`, {
// customEvaluationString,
// customPageFunction,
// customPageFn,
// customPageFnAsString: customPageFn.toString(),
// // fnOut: customPageFn
// });
// }
const { page } = await getSession({ sessionId, lockToken, logger });
const [metadata, functionResults] = await Promise.all([
pageMetadataHelper(page),
...(customPageFunction
? [customPageFn({ page, logger })]
: [() => undefined]),
]).catch((ex) => {
logger.error(`Error getting page content:`, ex);
});
const currentUrl = page.url();
const results = {
...metadata,
functionResults,
};
logger.debug(`Got current_page_content results:`, {
input: body,
output: results,
});
return results;
}),
);
app.post(
'/api/start_session',
jsonApiWrapper(async (req, res) => {
const { logger = Logger, body } = req;
const { traceId, sessionId: sessionIdInput, lockToken, metadata } = body;
const { sessionId } = await startSession({
sessionId: sessionIdInput,
lockToken,
traceId,
metadata,
logger,
});
logger.debug(`Started session`, {
sessionId,
traceId,
lockToken,
metadata,
});
return { sessionId };
}),
);
app.post(
'/api/end_session',
jsonApiWrapper(async (req, res) => {
const { logger = Logger, body } = req;
const { sessionId, lockToken } = body;
await endSession({ sessionId, lockToken, logger });
logger.debug(`Ended session`, {
sessionId,
lockToken,
});
return { ended: sessionId };
}),
);
app.post(
'/api/session_list',
jsonApiWrapper(async (req, res) => {
const { logger = Logger, body: { lockToken } = {} } = req;
const sessions = (await getSessionList({ lockToken, logger })).map(
({ sessionId, createdAt, metadata, traceId }) => ({
sessionId,
createdAt,
metadata,
traceId,
}),
);
logger.debug(`Got session list`, sessions, {
lockToken,
});
return sessions;
}),
);
}
/* eslint-disable no-unused-vars */
import Logger from 'shared/utils/Logger';
import StatusCodeError from 'shared/utils/StatusCodeError';
import nanoid from 'shared/utils/nanoid';
import { assertRequired } from 'shared/utils/assertRequired';
import {
initController,
screenshot,
clickNavigationAndScreenshot,
} from './browser-controller';
/**
* Object that stores session information.
* @type {Object}
*/
const SessionLookup = {};
/**
* Retrieves a list of sessions based on the provided lock token or all sessions if no token given
*
* @param {Object} options - The options for retrieving the session list.
* @param {string} options.lockToken - The lock token used to filter the sessions.
* @param {Object} [options.logger=Logger] - The logger object for logging messages (optional).
* @returns {Array} - An array of sessions that match the lock token.
*/
export async function getSessionList({ lockToken, logger = Logger } = {}) {
return Object.values(SessionLookup).filter(
(x) => !x.lockToken || x.lockToken === lockToken,
);
}
/**
* Retrieves a session based on the provided session ID and lock token.
* @param {Object} options - The options for retrieving the session.
* @param {string} options.sessionId - The ID of the session to retrieve.
* @param {string} options.lockToken - The lock token associated with the session.
* @param {Object} [options.logger=Logger] - The logger object to use for logging.
* @returns {Promise<Object>} The retrieved session data.
* @throws {StatusCodeError.BadRequest} If the session is not found.
* @throws {StatusCodeError.ExpectationFailed} If the lock token is missing or does not match the session's lock token.
*/
export async function getSession({ sessionId, lockToken, logger = Logger }) {
assertRequired({ sessionId });
const data = SessionLookup[sessionId];
if (!data) {
throw new StatusCodeError.BadRequest(`Session not found: ${sessionId}`);
}
if (data?.lockToken) {
if (!lockToken) {
throw new StatusCodeError.ExpectationFailed(
`Session previously created with lockToken, but no lockToken given to 'getSession'`,
);
}
if (data.lockToken !== lockToken) {
throw new StatusCodeError.ExpectationFailed(
`Session previously created with lockToken, but lockToken given does not match`,
);
}
}
return data;
}
/**
* Starts a new session. If sessionId already exists, it will be used instead of creating a new one.
* If lockToken is provided, other calls to endSession or interact with the session must specify the same lockToken.
* If existing session had a lockToken, the caller must provide it to use the sessionId here again.
*
* @param {Object} options - The options for starting the session.
* @param {string} [options.sessionId] - The session ID. If not provided, a new session ID will be generated.
* @param {string} [options.traceId] - An arbitrary ID to help trace the session.
* @param {string} [options.lockToken] - If provided, other calls to endSession or interact with the session must specify the same lockToken.
* @param {Object} [options.metadata] - Any metadata to store with the session (returned on GET sessions endpoint).
* @param {Logger} [options.logger] - The logger to use for logging session information.
* @param {number} [options.defaultTimeout] - The default timeout for page navigation, defaults to ~2 minutes if not given
* @returns {Promise<Object>} The newly created session object.
*/
export async function startSession({
sessionId = `browser_svc_${nanoid()}`, // User can give explicit sessionId to use otherwise we generate one
traceId, // arbitrary ID to help trace the session, TBD how we're using this
lockToken, // If given, other calls to endSession or interact with the session must specify the lockToken
metadata, // Any metadata to store with the session (returned on GET sessions endpoint)
defaultTimeout,
logger = Logger,
}) {
if (SessionLookup[sessionId]) {
const session = await getSession({ sessionId, lockToken, logger });
logger.info(`Re-using existing session '${sessionId}'`);
return session;
// throw new StatusCodeError.BadRequest(`Session already exists: ${sessionId}`);
}
const { browser, page } = await initController({ logger, defaultTimeout });
const session = {
browser,
page,
sessionId,
traceId,
// If lockToken given, other calls to endSession or interact with the session must specify the lockToken
// to ensure they are the same caller that started the session otherwise we will throw errors
// if the lockToken stored is set but the caller does not provide it or mismatches it
lockToken,
createdAt: Date.now(),
metadata,
logger,
defaultTimeout,
};
logger.info(`Started session '${sessionId}':`, session);
SessionLookup[sessionId] = session;
return session;
}
/**
* Ends a session by closing the browser and removing it from the session lookup.
*
* @param {Object} options - The options for ending the session.
* @param {string} options.sessionId - The ID of the session to end.
* @param {string} options.lockToken - The lock token associated with the session.
* @param {Object} [options.logger=Logger] - The logger to use for logging.
* @returns {Promise<void>} - A promise that resolves when the session is ended.
*/
export async function endSession({ sessionId, lockToken, logger = Logger }) {
assertRequired({ sessionId });
const { browser } = await getSession({ sessionId, lockToken, logger });
await browser.close().catch((ex) => {
logger.warn(`Error closing browser for session '${sessionId}':`, ex);
});
delete SessionLookup[sessionId];
}
/**
* Takes a screenshot of a given URL.
*
* @param {Object} options - The options for taking a screenshot.
* @param {string} options.url - The URL to take a screenshot of.
* @param {boolean} [options.dataOnly=false] - Whether to capture only the data of the screenshot.
* @param {boolean} [options.disableAnnotations=false] - Whether to disable annotations on the screenshot.
* @param {string} [options.sessionId] - The ID of the session to use. If not provided, a new session will be started.
* @param {string} [options.lockToken] - The lock token associated with the session.
* @param {Object} [options.metadata] - Additional metadata for the session.
* @param {Object} [options.logger=Logger] - The logger to use for logging.
* @returns {Promise<Buffer>} - A promise that resolves with the screenshot data as a Buffer.
*/
export async function screenshotUrl({
url,
dataOnly,
disableAnnotations,
sessionId,
lockToken,
metadata,
logger = Logger,
retries = 0,
defaultTimeout,
}) {
const session = sessionId
? await getSession({ sessionId, lockToken, logger })
: await startSession({ metadata, lockToken, defaultTimeout });
const { browser, page } = session;
const data = await screenshot({
url,
page,
dataOnly,
disableAnnotations,
defaultTimeout: session.defaultTimeout,
logger,
});
if (data.error) {
if (data.error?.message?.includes('Session closed') && sessionId) {
if (retries > 2) {
logger.error(
`Failed to take screenshot after ${retries} retries:`,
data.error,
);
throw data.error;
}
await endSession(session).catch((ex) => {});
const result = await startSession({ ...session, logger });
if (!result.sessionId) {
throw new Error(
`Failed to re-start session after original error: ${data.error?.message}`,
);
}
return screenshotUrl({
url,
dataOnly,
disableAnnotations,
sessionId,
lockToken,
metadata,
logger,
retries: retries + 1,
});
}
throw data.error;
}
// await browser.close();
if (!sessionId) {
await endSession(session);
}
return data;
}
/**
* Clicks on a navigation link, takes a screenshot of the resulting page, and returns the screenshot data.
*
* @param {Object} options - The options for clicking navigation link and taking screenshot.
* @param {string} options.linkText - The text of the navigation link to click.
* @param {boolean} [options.dataOnly=false] - Indicates whether to capture only the data of the screenshot.
* @param {string} options.sessionId - The ID of the session.
* @param {string} options.lockToken - The lock token of the session.
* @param {Logger} [options.logger=Logger] - The logger instance to use for logging.
* @returns {Promise<Buffer>} A promise that resolves with the screenshot data as a Buffer.
* @throws {Error} If the session is invalid.
*/
export async function clickNavigationAndScreenshotUrl({
linkText,
dataOnly = false,
sessionId,
lockToken,
logger = Logger,
retries = 0,
}) {
assertRequired(
{ linkText, sessionId },
'clickNavigationAndScreenshotUrl',
logger,
);
const session = await getSession({ sessionId, lockToken, logger });
if (!session) {
throw new Error(`Invalid session: ${sessionId}`);
}
const { browser, page } = session;
const data = await clickNavigationAndScreenshot({
linkText,
page,
browser,
logger,
dataOnly,
});
if (data.error) {
if (data.error?.message?.includes('Session closed') && sessionId) {
if (retries > 2) {
logger.error(
`Failed to take screenshot after ${retries} retries:`,
data.error,
);
throw data.error;
}
await endSession(session).catch((ex) => {});
const result = await startSession({ ...session, logger });
if (!result.sessionId) {
throw new Error(
`Failed to re-start session after original error: ${data.error?.message}`,
);
}
return clickNavigationAndScreenshotUrl({
linkText,
dataOnly,
sessionId,
lockToken,
logger,
retries: retries + 1,
});
}
throw data.error;
}
return data;
}
/* eslint-disable no-console */
/* eslint-disable no-shadow */
/* global document, window , HTMLElement */
import fs from 'fs';
/**
* Capitalizes the first letter of a string.
* @param {string} str - The input string
* @returns {string} The capitalized string
*/
export const capitalize = (str) => {
return str.charAt(0).toUpperCase() + str.slice(1);
};
/**
* Sleeps the execution of the program for a given amount of time.
* @param {number} delayMillis - The amount of time to sleep in milliseconds
* @returns {Promise<string>} A promise that resolves with a string indicating the time waited
*/
export const sleep = async (delayMillis) => {
return new Promise((resolve) =>
setTimeout(() => {
resolve(`Waited for ${delayMillis / 1000} seconds`);
}, delayMillis),
);
};
/**
* Checks if the file path is a valid image path that ends with .jpg, .jpeg, or .png (case-insensitive).
* @param {string} filePath - The file path to check
* @returns {boolean} A boolean indicating whether the file path is valid
*/
export const isValidImagePath = (filePath) => {
// Regular expression to match file paths ending with .jpg, .jpeg, or .png
const regex = /\.(jpg|jpeg|png)$/i;
// Test the filePath against the regex
return regex.test(filePath);
};
/**
* Checks if the given string is a valid URL.
* @param {string | undefined} txt - The string to check
* @returns {boolean} A boolean indicating whether the string is a valid URL
*/
export const isValidURL = (txt) => {
if (txt === undefined) {
return false;
}
const pattern = new RegExp(
'^(https?:\\/\\/)' + // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // domain name and extension
'((\\d{1,3}\\.){3}\\d{1,3}))' + // OR ip (v4) address
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // port and path
'(\\?[;&a-z\\d%_.~+=-]*)?' + // query string
'(\\#[-a-z\\d_]*)?$',
'i',
);
return pattern.test(txt);
};
/**
* Checks if the given string is a valid JSON.
* @param {string} string - The string to check
* @returns {boolean} A boolean indicating whether the string is a valid JSON
*/
export const isValidJson = (string) => {
try {
JSON.parse(string);
} catch (err) {
return false;
}
return true;
};
/**
* Checks if the given element is visible style-wise.
* @param {Element} element - The element to check
* @returns {boolean} A boolean indicating whether the element is visible
*/
export const isElementStyleVisible = (element) => {
// get the final computed style, including css, inline styles and JS applied styles
const style = window.getComputedStyle(element);
// the computed style will also return pixel values for width and height
return (
style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0' &&
style.width !== '0px' &&
style.height !== '0px'
);
};
/**
* Checks if the given element is in the viewport.
* @param {Element} element - The element to check
* @returns {boolean} A boolean indicating whether the element is in the viewport
*/
export const isElementInViewport = (element) => {
const rect = element.getBoundingClientRect();
// both innerWidth and documentElement.clientWidth are used to support all browsers
return (
rect.top >= 0 &&
rect.left >= 0 &&
rect.bottom <=
(window.innerHeight || document.documentElement.clientHeight) &&
rect.right <= (window.innerWidth || document.documentElement.clientWidth)
);
};
/**
* Checks if the given element is visible to the user.
* @param {Element | undefined | null} element - The element to check
* @returns {boolean} A boolean indicating whether the element is visible
* @throws {Error} An error if the element is null or undefined
*/
export const isElementVisible = (element) => {
// if the passed element is null or undefined, throw an error
if (element === null || element === undefined) {
throw new Error('isElementVisible: Element is null or undefined');
}
let currentElement = element;
// loop through the parent elements (including the element itself) to check if any of them is not visible
while (currentElement) {
// early return if the current element is not visible
if (!isElementStyleVisible(currentElement)) {
return false;
}
currentElement = currentElement.parentElement;
}
// loop stops when the currentElement is null (i.e. no more parent elements), @example: button -> div -> body -> html -> null, meaning the button and all its parents are visible, thus the button is visible
// check if the element is in the viewport; only need to check the element itself, as the position of the parent elements is irrelevant
return isElementInViewport(element);
};
/**
* Checks if the given element is an HTMLElement.
* @param {Element} element - The element to check
* @returns {boolean} A boolean indicating whether the element is an HTMLElement
*/
export const isHTMLElement = (element) => {
return element instanceof HTMLElement;
};
/**
* Replaces all the non-alphanumeric characters or spaces in the input text with an empty string.
* @param {string} text - The input text
* @returns {string} The cleaned up text
* @todo This function does not support non-English characters. These characters will be removed too.
*/
export const cleanUpTextContent = (text) => text.replace(/[^a-zA-Z0-9 ]/g, '');
/**
* Waits for a specific event to occur on the page.
* @param {Page} page - The Puppeteer page
* @param {keyof DocumentEventMap} eventType - The type of event to wait for
* @returns {Promise<string>} A promise that resolves when the event occurs
*/
export const waitForEvent = async (page, eventType) => {
// remember, all variables declared in the node context are not accessible in the browser context. However, you can pass them as arguments to the pageFunction - The 2nd argument which is after the pageFunction. In this case, we are passing the eventType; otherwise, Error [ReferenceError]: eventType is not defined
return page.evaluate(
(eventType) =>
new Promise((resolve) => {
document.addEventListener(eventType, () => {
resolve(`Event: ${eventType} occurred`);
});
}),
eventType,
);
};
/**
* Waits until the HTML is fully rendered on the page.
* @param {Page} page - The Puppeteer page
* @param {number} timeout - The maximum time to wait in milliseconds (default: 30000)
* @param {boolean} checkOnlyHTMLBody - Whether to check only the HTML body for rendering (default: false)
* @returns {Promise<void>}
*/
export const waitTillHTMLRendered = async (
page,
timeout = 30000,
checkOnlyHTMLBody = false,
) => {
const waitTimeBetweenChecks = 1000;
const maximumChecks = timeout / waitTimeBetweenChecks; // assuming check itself does not take time
let lastHTMLSize = 0;
let stableSizeCount = 0;
const COUNT_THRESHOLD = 3;
const isSizeStable = (currentSize, lastSize) => {
if (currentSize !== lastSize) {
return false; // still rendering
}
if (currentSize === lastSize && lastSize === 0) {
return false; // page remains empty - failed to render
}
return true; // stable
};
for (let i = 0; i < maximumChecks; i++) {
// eslint-disable-next-line no-await-in-loop
const html = await page.content();
const currentHTMLSize = html.length;
// eslint-disable-next-line no-await-in-loop
const currentBodyHTMLSize = await page.evaluate(
() => document.body.innerHTML.length,
);
const currentSize = checkOnlyHTMLBody
? currentBodyHTMLSize
: currentHTMLSize;
console.log(
'last: ',
lastHTMLSize,
' <> curr: ',
currentHTMLSize,
' body html size: ',
currentBodyHTMLSize,
);
stableSizeCount = isSizeStable(currentSize, lastHTMLSize)
? stableSizeCount + 1 // cannot use stableSizeCount++ because it will return the original value of stableSizeCount
: 0;
console.log(`Stable size count: ${stableSizeCount}`);
if (stableSizeCount >= COUNT_THRESHOLD) {
console.log('Page rendered fully..');
break;
}
lastHTMLSize = currentSize;
// eslint-disable-next-line no-await-in-loop
await page.waitForTimeout(waitTimeBetweenChecks); // remember to await
}
};
/**
* Checks if the page is explicitly loading.
* @param {Page} page - The Puppeteer page
* @returns {Promise<boolean>} A promise that resolves with a boolean indicating whether the page is explicitly loading
*/
export const isPageExplicitlyLoading = async (page) => {
const targetClassNames = ['loading', 'progress', 'spinner', 'wait'];
const selectors = targetClassNames.map(
(className) =>
`[class*="${className}"], [class*="${capitalize(
className,
)}"], [class*="${className.toUpperCase()}"]`,
);
// document readState can be `complete` while the page is still loading
return page.evaluate((selectors) => {
const loadingElement = document.querySelector(selectors.join(', '));
return (
document.readyState === 'loading' ||
(loadingElement !== null && loadingElement.style.display !== 'none')
);
}, selectors);
};
/**
* This service takes the path to an image file and returns a base64 string. Transform the image into a format that can be processed by the GPT model
* @param imageFilePath - The path to the image file
* @returns A promise that resolves to the base64 string
*/
export const imageToBase64String = async (imageFilePath) => {
const URI_PREFIX = 'data:image/jpeg;base64,';
// check if the file path is a string of jpg or jpeg or png
if (!isValidImagePath(imageFilePath)) {
throw new Error('Invalid image file path');
}
try {
// Read the image from disk in an async manner
// The fs.promises.readFile method returns a promise and avoids the need for a callback (as in the fs.readFile method)
const data = await fs.promises.readFile(imageFilePath);
// Convert the image data to a base64 string
const base64String = data.toString('base64');
// prepend the metadata to the base64 string
const dataURI = `${URI_PREFIX}${base64String}`;
return dataURI;
} catch (err) {
throw new Error(`Error reading file from disk: ${err}`);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment