Created
May 23, 2024 18:33
-
-
Save josiahbryan/35d7534234f2652648c10e4b13a0730e to your computer and use it in GitHub Desktop.
Dockerfile/K8 Setup example for running Puppeteer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-unused-vars */ | |
/* eslint-disable no-continue */ | |
/* eslint-disable no-restricted-syntax */ | |
/* eslint-disable no-shadow */ | |
/* eslint-disable no-use-before-define */ | |
/* eslint-disable no-console */ | |
/* global document, HTMLElement, window */ | |
import puppeteer from 'puppeteer-extra'; | |
import AppConfig from 'shared/config'; | |
import StealthPlugin from 'puppeteer-extra-plugin-stealth'; | |
// import { Browser, Page } from 'puppeteer'; | |
import Logger from 'shared/utils/Logger.js'; | |
import { | |
isPageExplicitlyLoading, | |
isValidURL, | |
waitTillHTMLRendered, | |
} from './utils.js'; | |
import { highlightInteractiveElements } from './element-annotator.js'; | |
/** | |
* This 10s timeout is the maximum time to wait for the page to load | |
*/ | |
export const TIMEOUT = 60_000; | |
const imagePath = '/tmp/web-agent-screenshot.jpg'; | |
// const browserWindowSize = { width: 900, height: 1600 }; | |
const browserWindowSize = { width: 1300, height: 900 }; | |
/** | |
* This service initializes a new browser session and a new page tab | |
* @param {Object} options - The options object | |
* @param {boolean} options.headless - Whether to run the browser in headless mode | |
* @returns {Promise<{ browser: Browser, page: Page }>} An object containing the browser and the page | |
*/ | |
export const initController = async ({ | |
logger = Logger, | |
defaultTimeout = 2 * 60 * 1000, // ~2 min | |
headless = process.env.BROWSER_SERVICE_HEADLESS_DEV || | |
AppConfig.buildEnv !== 'dev', | |
} = {}) => { | |
const pup = puppeteer.default.use(StealthPlugin()); | |
const pupOpts = { | |
headless, | |
executablePath: process.env.GOOGLE_CHROME_CANARY_PATH, | |
userDataDir: process.env.GOOGLE_CHROME_CANARY_USER_DATA_DIR, | |
args: [ | |
// `--profile-directory=${process.env.PROFILE}`, | |
// '--disable-setuid-sandbox', | |
// '--no-sandbox', | |
// '--no-zygote', | |
`--window-size=${browserWindowSize.width},${browserWindowSize.height}`, | |
], | |
}; | |
logger.debug(`...Launching browser with options:`, pupOpts); | |
const browser = await pup.launch(pupOpts); | |
const page = await browser.newPage(); | |
if (defaultTimeout) { | |
// await page.setDefaultTimeout(defaultTimeout); | |
await page.setDefaultNavigationTimeout(defaultTimeout); | |
} | |
// await page.setViewport({ | |
// width: browserWindowSize.width, | |
// height: browserWindowSize.height, | |
// deviceScaleFactor: 1, | |
// }); | |
return { browser, page }; | |
}; | |
/** | |
* This service takes a screenshot of the given URL | |
* @param {string} url - The URL to take a screenshot of | |
* @param {Page} page - The page object | |
* @param {boolean} dataOnly - Whether to return only the html/text and not do the screenshot | |
* @param {boolean} disableAnnotations - Whether to disable the annotations | |
* @returns {Promise<{ html: string, text: string, imagePath: string }>} A promise that resolves to the path of the screenshot, the HTML content of the page, and the text content of the page | |
*/ | |
export const screenshot = async ({ | |
url, | |
page, | |
dataOnly = false, | |
disableAnnotations = false, | |
defaultTimeout: timeout = TIMEOUT, | |
logger, | |
}) => { | |
logger.debug(`...Opening ${url}`); | |
if (!isValidURL(url)) { | |
throw new Error(`Invalid URL: ${url}`); | |
} | |
try { | |
logger.debug(`...waiting for networkidle0`); | |
// TODO: What is the best way to wait for the page to load completely for a screenshot? | |
// TODO: currently, we have `waitTillHTMLRendered`, `sleep`, and `waifForEvent` functions | |
// wait 500 ms after the number of active network requests are 2 | |
await page.goto(url, { | |
waitUntil: 'networkidle0', | |
timeout, | |
}); | |
logger.debug(`...going to screenshot`); | |
// waitUntil is not enough to wait for the page to load completely, so we need extra logic to wait for the page to load | |
const screenshotResults = await waitAndScreenshot({ | |
page, | |
dataOnly, | |
disableAnnotations, | |
logger, | |
}); | |
return screenshotResults; | |
} catch (error) { | |
logger.error(`Error taking screenshot:`, error); | |
return { error }; | |
} | |
}; | |
/** | |
* Clicks on a navigation link and takes a screenshot | |
* @param {string} linkText - The text of the link to click on | |
* @param {Page} page - The page object | |
* @param {Browser} browser - The browser object | |
* @param {boolean} dataOnly - Whether to return only the html/text and not do the screenshot | |
* @param {boolean} disableAnnotations - Whether to disable the annotations | |
* @param {Logger} logger - The logger object | |
* @returns {Promise<{ html: string, text: string, imagePath: string }>} A promise that resolves to the path of the screenshot, the HTML content of the page, and the text content of the page | |
*/ | |
export const clickNavigationAndScreenshot = async ({ | |
linkText, | |
page, | |
browser, | |
logger, | |
dataOnly, | |
disableAnnotations, | |
}) => { | |
let screenshotResults; | |
try { | |
// To use a if statement to check if the link opens in a new tab, Promise.all cannot be used | |
// await Promise.all([page.waitForNavigation(), clickOnLink(linkText, page)]); | |
// change to this: | |
const navigationPromise = page.waitForNavigation(); | |
const clickResponse = await clickOnLink({ linkText, page, logger }); | |
if (!clickResponse) { | |
await navigationPromise; | |
screenshotResults = await waitAndScreenshot({ | |
page, | |
logger, | |
dataOnly, | |
disableAnnotations, | |
}); | |
} else { | |
// if the link opens in a new tab, ignore the navigationPromise as there won't be any navigation | |
// MUST NOT USE `AWAIT` HERE, otherwise it will wait the default timeout of 30s | |
navigationPromise.catch(() => undefined); | |
const newPage = await newTabNavigation({ | |
clickResponse, | |
page, | |
browser, | |
logger, | |
}); | |
if (newPage === undefined) { | |
throw new Error('The new page cannot be opened'); | |
} | |
screenshotResults = await waitAndScreenshot({ | |
page: newPage, | |
logger, | |
dataOnly, | |
disableAnnotations, | |
}); | |
} | |
return screenshotResults; | |
} catch (error) { | |
return { error }; | |
} | |
}; | |
/** | |
* Clicks on a link with the given text | |
* @param {string} linkText - The text of the link to click on | |
* @param {Page} page - The page object | |
* @param {Logger} logger - The logger object | |
* @returns {Promise<string | undefined>} A promise that resolves to the link text if the link opens in a new tab, or undefined if it doesn't | |
*/ | |
const clickOnLink = async ({ linkText, page, logger = Logger }) => { | |
try { | |
const clickResponse = await page.evaluate(async (linkText) => { | |
const isHTMLElement = (element) => { | |
return element instanceof HTMLElement; | |
}; | |
const elements = document.querySelectorAll('[gpt-link-text]'); | |
for (const element of elements) { | |
if (!isHTMLElement(element)) { | |
continue; | |
} | |
if ( | |
element | |
.getAttribute('gpt-link-text') | |
?.includes(linkText.trim().toLowerCase()) // align with `setUniqueIdentifierAttribute` in `element-annotator.ts` | |
) { | |
if (element.getAttribute('target') === '_blank') { | |
return element.getAttribute('gpt-link-text'); | |
} | |
element.style.backgroundColor = 'rgba(255,255,0,0.25)'; | |
element.click(); | |
// eslint-disable-next-line consistent-return | |
return; | |
} | |
} | |
// only if the loop ends without returning | |
throw new Error(`Link with text not found: "${linkText}"`); | |
}, linkText); | |
return clickResponse; | |
} catch (error) { | |
// console.log(`Error clicking on link: ${err}`); | |
// if (err instanceof Error) { | |
// // must rethrow the error so that it can be caught in the calling function | |
// throw err; | |
// } | |
return { error }; | |
} | |
// return null; | |
}; | |
/** | |
* Handles navigation to a new tab and returns the new page object | |
* @param {string} gptLinkText - The link text that triggers the new tab navigation | |
* @param {Page} page - The page object | |
* @param {Browser} browser - The browser object | |
* @returns {Promise<Page | undefined>} A promise that resolves to the new page object if it is successfully opened, or undefined if it cannot be opened | |
*/ | |
const newTabNavigation = async ({ | |
linkText: gptLinkText, | |
page, | |
browser, | |
logger = Logger, | |
}) => { | |
try { | |
// store the target of original page to know that this was the opener: | |
const currentPageTarget = page.target(); | |
// execute click on the current page that triggers opening of new tab (new page): | |
const element = await page.$(`[gpt-link-text="${gptLinkText}"]`); | |
if (element === null) { | |
throw new Error('The element is null'); | |
} | |
element.click(); | |
// check if the new page is opened by the current page: | |
const newPageTarget = await browser.waitForTarget( | |
(target) => target.opener() === currentPageTarget, | |
); | |
// switch to the new page: | |
const newPage = await newPageTarget.page(); | |
if (newPage === null) { | |
throw new Error('The new page is null'); | |
} | |
// wait for page to be loaded (briefly) | |
await newPage.waitForSelector('body'); | |
return newPage; | |
} catch (error) { | |
// if (err instanceof Error) { | |
// throw err; | |
// } | |
return { error }; | |
} | |
// return null; | |
}; | |
/** | |
* Waits for the page to load and takes a screenshot | |
* @param {Page} page - The page object | |
* @param {boolean} dataOnly - Whether to return only the html/text and not do the screenshot | |
* @param {boolean} disableAnnotations - Whether to disable the annotations | |
* @returns {Promise<{ html: string, text: string, imagePath: string }>} A promise that resolves to the path of the screenshot, the HTML content of the page, and the text content of the page | |
*/ | |
const waitAndScreenshot = async ({ | |
page, | |
dataOnly = false, | |
disableAnnotations = false, | |
logger = Logger, | |
}) => { | |
// // From https://www.webshare.io/academy-article/puppeteer-get-html | |
// // Another way of waiting for network activity to be idle | |
// await page.waitForFunction( | |
// 'window.performance.timing.loadEventEnd - window.performance.timing.navigationStart >= 500', | |
// ); | |
// waitUntil in `GoToOptions` is not enough to wait for the page to load completely (especially with dynamic loading content), so we need to use waitTillHTMLRendered | |
const isLoading = await isPageExplicitlyLoading(page); | |
if (isLoading) { | |
await waitTillHTMLRendered(page); | |
} | |
if (!dataOnly) { | |
if (!disableAnnotations) { | |
logger.debug(`...Highlight all interactive elements`); | |
await highlightInteractiveElements(page); | |
} | |
logger.debug(`...Taking screenshot`); | |
await page.screenshot({ | |
// path: "/agent/web-agent-screenshot.jpg" is a wrong path | |
path: imagePath, | |
fullPage: true, | |
}); | |
} | |
logger.debug(`...Reading Text and HTML for the page`); | |
// const [html, text] = await Promise.all([ | |
// page.content(), | |
// page.evaluate(() => document.body.innerText), | |
// ]); | |
const pageMetadata = await pageMetadataHelper(page); | |
return { imagePath, ...pageMetadata }; | |
}; | |
export const pageMetadataHelper = async (page) => { | |
const html = await page.content(); | |
const { title, text, links, url } = await page.evaluate(() => { | |
const pageUrl = window.location.href; | |
const aHrefs = Array.from(document.querySelectorAll('a[href]')); | |
const processedLinks = aHrefs | |
.map((x) => [x.getAttribute('href'), x.textContent?.trim()]) | |
.filter( | |
([url]) => | |
url && | |
!url.startsWith('#') && | |
// eslint-disable-next-line no-script-url | |
!url.startsWith('javascript:') && | |
!url.endsWith('.pdf') && | |
!url.endsWith('.jpg') && | |
!url.endsWith('.jpeg') && | |
!url.endsWith('.png'), | |
) | |
.map((x) => { | |
try { | |
const [urlX, ...attribs] = x; | |
const url = decodeURIComponent(urlX); | |
if (url.startsWith('?')) { | |
const newUrl = new URL(pageUrl); | |
const params = new URLSearchParams(url); | |
params.forEach((value, key) => { | |
newUrl.searchParams.set(key, value); | |
}); | |
return [newUrl.toString(), ...attribs]; | |
} | |
if (url.startsWith('/')) { | |
const newUrl = new URL(pageUrl); | |
newUrl.pathname = url; | |
return [newUrl.toString(), ...attribs]; | |
} | |
return [new URL(url).toString(), ...attribs]; | |
} catch (err) { | |
console.warn(`Cannot parse URL '${x[0]}: ${err.message}`); | |
return []; | |
} | |
}) | |
.filter((x) => x[0] && x[0].startsWith('http')) | |
.sort((a, b) => { | |
if (a[0].length < b[0].length) { | |
return -1; | |
} | |
if (a[0].length > b[0].length) { | |
return 1; | |
} | |
if (a[0] < b[0]) { | |
return -1; | |
} | |
if (a[0] > b[0]) { | |
return 1; | |
} | |
return 0; | |
}); | |
const links = Object.values( | |
Object.fromEntries(processedLinks.map((x) => [x[0], x])), | |
); | |
return { | |
title: document.title, | |
text: document.body.innerText, | |
url: pageUrl, | |
links, | |
}; | |
}); | |
return { html, text, links, title, url }; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ####### | |
# | |
# This file contains all the things needed to run a copy of the browser-service in k8s: | |
# - service - Exposes the deployment on port 80 to the cluster | |
# - deployment - Creates pods for the docker image containing the actual backend | |
# | |
# Ingress is handled in backend.yml with `browser-service.vaya.to` pointing to the service `browser-service` defined here. | |
# | |
# `browser-service` is built and pushed to the registry by the `/browser-service/build-and-deploy.sh` script, manually on demand. | |
# | |
# ####### | |
# # Add a disruption budget to attempt to keep the backend "safe" during | |
# # k8 updates via DO | |
# # ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ | |
# # ref: https://docs.digitalocean.com/products/kubernetes/how-to/upgrade-cluster/#disruption-free-upgrades | |
# apiVersion: policy/v1 | |
# kind: PodDisruptionBudget | |
# metadata: | |
# name: browser-service-pdb | |
# spec: | |
# minAvailable: 1 | |
# selector: | |
# matchLabels: | |
# app: browser-service | |
# --- | |
## Service: browser-service | |
# | |
apiVersion: v1 | |
kind: Service | |
metadata: | |
name: browser-service | |
namespace: vaya-prod | |
labels: | |
app: browser-service | |
tags.datadoghq.com/env: "prod" | |
spec: | |
ports: | |
- port: 80 | |
targetPort: 8091 | |
selector: | |
app: browser-service | |
--- | |
## Deployment: Worker | |
# | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: browser-service | |
namespace: vaya-prod | |
labels: | |
app: browser-service | |
tags.datadoghq.com/env: "prod" | |
spec: | |
replicas: 1 | |
# https://stackoverflow.com/a/37258369/1119559 | |
# Right now, I don't have a plan/handle to rollback with K8s-specific RS history. | |
# If something goes wrong in a prod deployment, I'll change | |
# the ":latest" below to a specific SHA/version and redeploy. | |
revisionHistoryLimit: 0 | |
# Guidance around this timing and the strategy below | |
# is from https://alexklibisz.com/2021/07/20/speed-limits-for-rolling-restarts-in-kubernetes.html | |
# minReadySeconds: 3 # TBD - need to qualify to see if this helps | |
# strategy: | |
# rollingUpdate: | |
# maxUnavailable: 1 # New! 2x the default of 25% (1/4) | |
# maxSurge: 2 # Moved Surge to 2 since replicas is at 6 to keep max nodes at 8 for next update | |
selector: | |
matchLabels: | |
app: browser-service | |
template: | |
metadata: | |
labels: | |
app: browser-service | |
spec: | |
containers: | |
- name: browser-service | |
image: registry.digitalocean.com/vaya/browser-service:latest | |
imagePullPolicy: Always | |
resources: | |
requests: | |
# Not sure how much browser-service will need, shall have to test | |
memory: "500M" | |
cpu: "100m" | |
# No limits imposed for now... | |
# limits: | |
# # api2-6g node pool now has 6GB usable, so this is roughly 6GB | |
# memory: "6000M" | |
# # Current k8 pod has 2vCPU | |
# # New api2-6gb has 4vCPU | |
# cpu: "3500m" | |
ports: | |
- containerPort: 8091 | |
# startupProbe: | |
# initialDelaySeconds: 300 | |
# # Period * Failure = 5 minutes | |
# # Protects slow start-up containers from getting killed | |
# # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes | |
# periodSeconds: 10 | |
# failureThreshold: 30 | |
# timeoutSeconds: 1 | |
# successThreshold: 1 | |
# httpGet: | |
# path: /api/version?k8_probe=startup | |
# port: 8091 | |
# readinessProbe: | |
# httpGet: | |
# path: /api/version?k8_probe=readiness | |
# port: 8091 | |
# livenessProbe: | |
# # Updated liveliness settings 2022-12-24 due to some reports taking long time to run (e.g golds billing) | |
# # and causing previous liveliness settings to timeout, resulting in pod restarts when the report was | |
# # running, never letting the report complete. These settings have been updated to compensate | |
# # (or attempt to compensate) for the temporary load | |
# initialDelaySeconds: 600 | |
# # Period * Failure = 7.5 minutes | |
# # Protects containers that have temporary load from getting killed | |
# # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes | |
# periodSeconds: 15 | |
# failureThreshold: 30 | |
# # Higher timeout in case the server is under temporary load | |
# timeoutSeconds: 5 | |
# successThreshold: 1 | |
# httpGet: | |
# path: /api/version?k8_probe=liveness | |
# port: 8091 | |
env: | |
- name: ENVIRONMENT | |
value: production | |
# Ref: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#send-statsd-metrics-to-the-agent | |
- name: DD_AGENT_HOST | |
valueFrom: | |
fieldRef: | |
fieldPath: status.hostIP | |
# Ref: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#origin-detection-over-udp | |
- name: DD_ENTITY_ID | |
valueFrom: | |
fieldRef: | |
fieldPath: metadata.uid | |
# TBD if this is needed | |
- name: DD_ENV | |
valueFrom: | |
fieldRef: | |
fieldPath: metadata.labels['tags.datadoghq.com/env'] | |
# This SHOULD work ... | |
# ref: https://docs.datadoghq.com/tracing/guide/ignoring_apm_resources/?tab=datadogyaml | |
- name: DD_APM_FILTER_TAGS_REJECT | |
value: http.url_details.path:/api/version | |
# # TBD if this is needed | |
# - name: DD_VERSION | |
# valueFrom: | |
# fieldRef: | |
# fieldPath: metadata.labels['tags.datadoghq.com/version'] | |
- name: DD_APM_ENABLED | |
value: "true" | |
- name: DD_HOSTNAME | |
valueFrom: | |
fieldRef: | |
fieldPath: metadata.name | |
# Using this in LoggingAdapter for system info | |
- name: K8_POD_NAME | |
valueFrom: | |
fieldRef: | |
fieldPath: metadata.name | |
# Using this in LoggingAdapter for system info | |
- name: K8_POD_NAMESPACE | |
valueFrom: | |
fieldRef: | |
fieldPath: metadata.namespace | |
# Using this in LoggingAdapter for system info | |
- name: K8_POD_NODE_NAME | |
valueFrom: | |
fieldRef: | |
fieldPath: spec.nodeName |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM --platform=linux/amd64 node:20.11.1 as base | |
# Note: The --platform arg above is required for building on Apple Silicon (Mx) Macs | |
# Thanks to <https://dev.to/docker/unable-to-locate-package-google-chrome-stable-b62> for the tip. | |
USER root | |
WORKDIR /root | |
# Most of this apt-get setup is based on https://blog.apify.com/puppeteer-docker/ | |
# Install the latest Chrome dev package and necessary fonts and libraries | |
RUN apt-get update \ | |
&& apt-get install -y wget gnupg \ | |
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/googlechrome-linux-keyring.gpg \ | |
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/googlechrome-linux-keyring.gpg] https://dl-ssl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google.list \ | |
&& apt-get update \ | |
&& apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-freefont-ttf libxss1 dbus dbus-x11 \ | |
--no-install-recommends \ | |
&& apt-get install -qqy --no-install-recommends \ | |
iproute2 iputils-ping telnet net-tools ssh \ | |
&& rm -rf /var/lib/apt/lists/* \ | |
&& groupadd -r vaya && useradd -rm -g vaya -G audio,video vaya | |
# Determine the path of the installed Google Chrome | |
# Mostly for debugging the set of PUPPETEER_EXECUTABLE_PATH later | |
RUN which google-chrome-stable || true | |
# Use dumb-init to help prevent zombie processes. Without this, node doesn't seem able to kill | |
# zombie processes on dev. Should also ensure everything is killed on prod when the entire pod | |
# receives a SIGTERM. | |
# See https://www.elastic.io/nodejs-as-pid-1-under-docker-images/ | |
ADD https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64 /usr/local/bin/dumb-init | |
RUN chmod +x /usr/local/bin/dumb-init | |
ENTRYPOINT ["dumb-init", "--"] | |
# Install our shared module because package.json has a dep to '../shared' | |
RUN mkdir /shared | |
WORKDIR /shared | |
# Have to use the .tar.gz because COPY refuses to copy from ../shared | |
# NOTE: You have to pre-build shared.tar.gz - see package.json and the docker:prebuild:shared script in that file | |
COPY buildfiles/shared.tar.gz . | |
# Have to use the .tar.gz because COPY refuses to copy from ../shared | |
# NOTE: You have to pre-build shared.tar.gz - see package.json and the docker:prebuild:shared script in that file | |
COPY buildfiles/shared.tar.gz . | |
RUN tar zxvf ./shared.tar.gz && \ | |
rm -f ./shared.tar.gz && \ | |
cd /shared && \ | |
npm ci --legacy-peer-deps && \ | |
chown -R vaya:vaya /shared | |
# Install the nodejs side of the app | |
RUN mkdir /app && chown vaya:vaya /app | |
WORKDIR /app | |
ENV NODE_ENV production | |
# Install our app src and node modules | |
COPY --chown=vaya:vaya . . | |
# Use the chromium we install manually instead | |
# Update the PUPPETEER_EXECUTABLE_PATH to the correct Chrome path (placeholder, update based on the output of `which google-chrome-stable`) | |
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \ | |
PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable | |
# Install node modules | |
# Yes, included in package.json, but installed npm globally here | |
# so we can run via commands in bash run files | |
# Also, clean cache after all that installing | |
RUN npm ci --legacy-peer-deps && \ | |
npm install nodemon@1.18.10 -g && \ | |
npm cache clean --force && \ | |
chown -R vaya:vaya /app | |
# Inform docker of the port we're using - not sure that this is required, | |
# but it keeps the metadata clean anyway | |
EXPOSE 8091 | |
# Switch to the non-root user | |
USER vaya | |
CMD [ "npm", "run", "start:docker-internal" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-unused-vars */ | |
/* eslint-disable no-restricted-syntax */ | |
/* global document, window, HTMLElement */ | |
// for reference, this variable must be defined in the browser context (inside the pageFunction) | |
// const UNIQUE_IDENTIFIER_ATTRIBUTE = "gpt-link-text"; | |
const INTERACTIVE_ELEMENTS = [ | |
'a', | |
'button', | |
/** to avoid clicking on the google search input */ | |
'input', | |
'textarea', | |
'[role=button]', | |
'[role=treeitem]', | |
'[onclick]:not([onclick=""])', | |
]; | |
/** | |
* Reset the unique identifier attribute and remove previously highlighted elements | |
* @param page | |
*/ | |
const resetUniqueIdentifierAttribute = async (page) => { | |
await page.evaluate(() => { | |
const UNIQUE_IDENTIFIER_ATTRIBUTE = 'gpt-link-text'; | |
const elements = document.querySelectorAll( | |
`[${UNIQUE_IDENTIFIER_ATTRIBUTE}]`, | |
); | |
for (const element of elements) { | |
element.removeAttribute(UNIQUE_IDENTIFIER_ATTRIBUTE); | |
} | |
}); | |
}; | |
/** | |
* This function annotates all the interactive elements on the page | |
* @param page | |
*/ | |
const annotateAllInteractiveElements = async (page) => { | |
// $$eval method runs Array.from(document.querySelectorAll(selector)) within the `page`and passes the result as the first argument to the pageFunction. | |
// If no elements match the selector, the first argument to the pageFunction is []. | |
await page.$$eval( | |
INTERACTIVE_ELEMENTS.join(', '), // the selector can be defined outside the browser context | |
// the argument `elements` can be an empty array if no elements match the selector | |
function annotate(elements) { | |
// any console.log inside the callback will not be visible in the node terminal | |
// instead, it will be visible in the browser console | |
// handle empty array | |
if (elements.length === 0) { | |
// throw new Error('No elements found'); | |
return; | |
} | |
//= =====================================VALIDATE ELEMENT CAN INTERACT================================================= | |
// This run-time check must be defined inside the pageFunction as it is running in the browser context. If defined outside, it will throw an error: "ReferenceError: isHTMLElement is not defined" | |
const isHTMLElement = (element) => { | |
// this assertion is to allow Element to be treated as HTMLElement and has `style` property | |
return element instanceof HTMLElement; | |
}; | |
// copy paste the function from the utils.ts file as they are not accessible in the browser context if they are not defined inside the pageFunction | |
const isElementStyleVisible = (element) => { | |
const style = window.getComputedStyle(element); | |
return ( | |
style.display !== 'none' && | |
style.visibility !== 'hidden' && | |
style.opacity !== '0' && | |
style.width !== '0px' && | |
style.height !== '0px' | |
); | |
}; | |
const isElementInViewport = (element) => { | |
const rect = element.getBoundingClientRect(); | |
return ( | |
rect.top >= 0 && | |
rect.left >= 0 && | |
rect.bottom <= | |
(window.innerHeight || document.documentElement.clientHeight) && | |
rect.right <= | |
(window.innerWidth || document.documentElement.clientWidth) | |
); | |
}; | |
const isElementVisible = (element) => { | |
if (element === null || element === undefined) { | |
// throw new Error('isElementVisible: Element is null or undefined'); | |
return false; | |
} | |
let currentElement = element; | |
while (currentElement) { | |
if (!isElementStyleVisible(currentElement)) { | |
return false; | |
} | |
currentElement = currentElement.parentElement; | |
} | |
// return isElementInViewport(element); //disable the inViewport check for now | |
return true; | |
}; | |
//= =======================================PREPARE UNIQUE IDENTIFIER================================================ | |
// clean up the text by removing any characters that are not alphanumeric (letters and numbers) or spaces. | |
// Does not support non-English characters; Set the language of the page to English to avoid issues | |
const cleanUpTextContent = (text) => text.replace(/[^a-zA-Z0-9 ]/g, ''); | |
const setUniqueIdentifierBasedOnTextContent = (element) => { | |
const UNIQUE_IDENTIFIER_ATTRIBUTE = 'gpt-link-text'; | |
const { textContent, tagName } = element; | |
// if the node is a document or doctype, textContent will be null | |
if (textContent === null) { | |
return; | |
} | |
// TODO: <a title="MacBook Air 15" M3 8-Core CPU 10-Core GPU 8/256GB Starlight"></a> This a link does not have textContent, but it has a title attribute. The title attribute can be used as the unique identifier | |
// there is no way for the llm to point a element without textContent, like a button with an icon (assumably), the following logic is disabled for now | |
// const linkText = | |
// textContent.trim() === "" | |
// ? `${tagName}-${crypto.randomUUID()}` | |
// : cleanUpTextContent(textContent).trim(); | |
element.setAttribute( | |
UNIQUE_IDENTIFIER_ATTRIBUTE, | |
textContent.trim().toLowerCase(), | |
); | |
}; | |
//= =======================================HIGHLIGHT INTERACTIVE ELEMENTS================================================ | |
for (const element of elements) { | |
if (isHTMLElement(element)) { | |
// highlight all the interactive elements with a red bonding box | |
element.style.outline = '3px solid red'; | |
} | |
if (isElementVisible(element)) { | |
// set a unique identifier attribute to the element | |
// this attribute will be used to identify the element that puppeteer should interact with | |
setUniqueIdentifierBasedOnTextContent(element); | |
} | |
} | |
}, | |
); | |
}; | |
/** | |
* This function highlights all the interactive elements on the page | |
* @param page | |
*/ | |
export const highlightInteractiveElements = async (page) => { | |
await resetUniqueIdentifierAttribute(page); | |
await annotateAllInteractiveElements(page); | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-unused-vars */ | |
/* global document, window, HTMLElement */ | |
import Logger from 'shared/utils/Logger'; | |
import { jsonSafeStringify } from 'shared/utils/jsonSafeStringify'; | |
import { jsonSafeParse } from 'shared/utils/jsonSafeParse'; | |
import { assertRequired } from 'shared/utils/assertRequired'; | |
import { readFile } from 'fs/promises'; | |
import { normalizeDecimals } from 'shared/utils/normalizeDecimals'; | |
import { getSystemInfo } from 'shared/utils/getSystemInfo'; | |
import { | |
screenshotUrl, | |
clickNavigationAndScreenshotUrl, | |
getSession, | |
startSession, | |
endSession, | |
getSessionList, | |
} from './utils/session-controller'; | |
import { pageMetadataHelper } from './utils/browser-controller'; | |
export async function usePuppeteerControllerRoutes(app) { | |
// TODO: Use shared/utils/appPlugin instead!!!!!!!! | |
const jsonApiWrapper = (fn) => async (req, res) => { | |
try { | |
const { body: bodyBuffer } = req; | |
const body = jsonSafeParse( | |
bodyBuffer?.toString === 'function' | |
? bodyBuffer.toString() | |
: bodyBuffer, | |
); | |
const json = await fn({ ...req, body }, res); | |
const system = getSystemInfo(); | |
res.header('X-Vaya-Server', system.host.hostname); | |
res.header('X-Vaya-Service', system.host.serviceName); | |
res.header('X-Vaya-Namespace', system.host.k8sNamespace); | |
res.header('X-Vaya-Version', system.version); | |
if (system.appName) { | |
res.header('X-Vaya-App', system.appName); | |
} | |
// This is the value from the same-named header the client sent (if any) | |
// It's attached to the request by our logRequest middleware | |
res.header('X-Client-Tx', req.clientTransactionId); | |
// if (tenantAppAnnotation) { | |
// // Possible metrics correlation for tenant apps - TBD | |
// res.header('X-Vaya-Tenant-App', tenantAppAnnotation); | |
// } | |
// For latency measurements (combined with TimeService on the client side) | |
res.header('X-Vaya-Epoch', Date.now()); | |
// Tell client what we measured in our logRequest.js handler on what they sent us | |
if (req.receiveLatency) { | |
res.header('X-Receive-Latency', req.receiveLatency); | |
res.header( | |
'X-Receive-KBps', | |
normalizeDecimals(req.receiveBytesPerSecond / 1024), | |
); | |
} | |
if (json) { | |
res.json(json); | |
} | |
} catch (err) { | |
const { logger = Logger } = req; | |
logger.error(`Error:`, err); | |
res.status(err.statusCode || 500).send( | |
jsonSafeStringify({ | |
error: { | |
message: err.message, | |
stack: err?.stack, | |
data: err?.statusData, | |
}, | |
}), | |
); | |
} | |
}; | |
app.get( | |
'/api/version', | |
jsonApiWrapper(async (req) => { | |
// Just for the fun of it, log startup probes. | |
// The other probes happen so often, we don't want to clutter logs | |
if (req.query.k8_probe === 'startup') { | |
req.logger.info(`✅ Received startup probe from k8s ✨`); | |
} | |
return getSystemInfo(); | |
}), | |
); | |
app.post( | |
'/api/screenshot', | |
jsonApiWrapper(async (req, res) => { | |
const { logger, body } = req; | |
const { | |
url, | |
traceId, | |
sessionId, | |
dataOnly, | |
imageOnly, | |
lockToken, | |
disableAnnotations, | |
} = body; | |
assertRequired({ url }, 'screenshot', logger); | |
const img = await screenshotUrl({ | |
url, | |
traceId, | |
sessionId, | |
lockToken, | |
dataOnly, | |
disableAnnotations, | |
logger, | |
}); | |
const { imagePath, ...json } = img || {}; | |
logger.debug(`Got screenshot result:`, { | |
input: body, | |
output: img, | |
}); | |
if (dataOnly) { | |
return json; | |
} | |
const bytes = await readFile(imagePath); | |
if (imageOnly) { | |
// res.json(result); | |
res.header('Content-Type', 'image/jpeg'); | |
res.send(bytes); | |
} | |
const jsonWithBytes = { | |
imageBase64: bytes.toString('base64'), | |
...json, | |
}; | |
return jsonWithBytes; | |
}), | |
); | |
app.post( | |
'/api/click_and_screenshot', | |
jsonApiWrapper(async (req, res) => { | |
const { logger = Logger, body } = req; | |
const { | |
linkText, | |
traceId, | |
sessionId, | |
dataOnly, | |
imageOnly, | |
lockToken, | |
disableAnnotations, | |
} = body; | |
// logger.debug(`click_and_screenshot body:`, { | |
// parsed: body, | |
// raw: bodyBuffer.toString(), | |
// }); | |
assertRequired({ sessionId, linkText }, 'click_and_screenshot', logger); | |
const props = { | |
linkText, | |
traceId, | |
sessionId, | |
lockToken, | |
dataOnly, | |
disableAnnotations, | |
logger, | |
}; | |
// logger.debug(`Calling click with props`, props); | |
const img = await clickNavigationAndScreenshotUrl(props); | |
const { imagePath, ...json } = img || {}; | |
logger.debug(`Got click and screenshot result:`, { | |
input: body, | |
output: img, | |
}); | |
if (dataOnly) { | |
return json; | |
} | |
const bytes = await readFile(imagePath); | |
if (imageOnly) { | |
// res.json(result); | |
res.header('Content-Type', 'image/jpeg'); | |
res.send(bytes); | |
} | |
const jsonWithBytes = { | |
imageBase64: bytes.toString('base64'), | |
...json, | |
}; | |
return jsonWithBytes; | |
}), | |
); | |
app.post( | |
'/api/current_page_content', | |
jsonApiWrapper(async (req, res) => { | |
const { logger = Logger, body } = req; | |
const { sessionId, lockToken, customPageFunction } = body; | |
assertRequired({ sessionId }, 'current_page_content', logger); | |
// Makes a factory... | |
const customPageFn = | |
// eslint-disable-next-line no-new-func | |
customPageFunction && new Function(`return ${customPageFunction}`)(); | |
// if (customEvaluationString || customPageFunction) { | |
// logger.warn(`Custom function strings:`, { | |
// customEvaluationString, | |
// customPageFunction, | |
// customPageFn, | |
// customPageFnAsString: customPageFn.toString(), | |
// // fnOut: customPageFn | |
// }); | |
// } | |
const { page } = await getSession({ sessionId, lockToken, logger }); | |
const [metadata, functionResults] = await Promise.all([ | |
pageMetadataHelper(page), | |
...(customPageFunction | |
? [customPageFn({ page, logger })] | |
: [() => undefined]), | |
]).catch((ex) => { | |
logger.error(`Error getting page content:`, ex); | |
}); | |
const currentUrl = page.url(); | |
const results = { | |
...metadata, | |
functionResults, | |
}; | |
logger.debug(`Got current_page_content results:`, { | |
input: body, | |
output: results, | |
}); | |
return results; | |
}), | |
); | |
app.post( | |
'/api/start_session', | |
jsonApiWrapper(async (req, res) => { | |
const { logger = Logger, body } = req; | |
const { traceId, sessionId: sessionIdInput, lockToken, metadata } = body; | |
const { sessionId } = await startSession({ | |
sessionId: sessionIdInput, | |
lockToken, | |
traceId, | |
metadata, | |
logger, | |
}); | |
logger.debug(`Started session`, { | |
sessionId, | |
traceId, | |
lockToken, | |
metadata, | |
}); | |
return { sessionId }; | |
}), | |
); | |
app.post( | |
'/api/end_session', | |
jsonApiWrapper(async (req, res) => { | |
const { logger = Logger, body } = req; | |
const { sessionId, lockToken } = body; | |
await endSession({ sessionId, lockToken, logger }); | |
logger.debug(`Ended session`, { | |
sessionId, | |
lockToken, | |
}); | |
return { ended: sessionId }; | |
}), | |
); | |
app.post( | |
'/api/session_list', | |
jsonApiWrapper(async (req, res) => { | |
const { logger = Logger, body: { lockToken } = {} } = req; | |
const sessions = (await getSessionList({ lockToken, logger })).map( | |
({ sessionId, createdAt, metadata, traceId }) => ({ | |
sessionId, | |
createdAt, | |
metadata, | |
traceId, | |
}), | |
); | |
logger.debug(`Got session list`, sessions, { | |
lockToken, | |
}); | |
return sessions; | |
}), | |
); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-unused-vars */ | |
import Logger from 'shared/utils/Logger'; | |
import StatusCodeError from 'shared/utils/StatusCodeError'; | |
import nanoid from 'shared/utils/nanoid'; | |
import { assertRequired } from 'shared/utils/assertRequired'; | |
import { | |
initController, | |
screenshot, | |
clickNavigationAndScreenshot, | |
} from './browser-controller'; | |
/** | |
* Object that stores session information. | |
* @type {Object} | |
*/ | |
const SessionLookup = {}; | |
/** | |
* Retrieves a list of sessions based on the provided lock token or all sessions if no token given | |
* | |
* @param {Object} options - The options for retrieving the session list. | |
* @param {string} options.lockToken - The lock token used to filter the sessions. | |
* @param {Object} [options.logger=Logger] - The logger object for logging messages (optional). | |
* @returns {Array} - An array of sessions that match the lock token. | |
*/ | |
export async function getSessionList({ lockToken, logger = Logger } = {}) { | |
return Object.values(SessionLookup).filter( | |
(x) => !x.lockToken || x.lockToken === lockToken, | |
); | |
} | |
/** | |
* Retrieves a session based on the provided session ID and lock token. | |
* @param {Object} options - The options for retrieving the session. | |
* @param {string} options.sessionId - The ID of the session to retrieve. | |
* @param {string} options.lockToken - The lock token associated with the session. | |
* @param {Object} [options.logger=Logger] - The logger object to use for logging. | |
* @returns {Promise<Object>} The retrieved session data. | |
* @throws {StatusCodeError.BadRequest} If the session is not found. | |
* @throws {StatusCodeError.ExpectationFailed} If the lock token is missing or does not match the session's lock token. | |
*/ | |
export async function getSession({ sessionId, lockToken, logger = Logger }) { | |
assertRequired({ sessionId }); | |
const data = SessionLookup[sessionId]; | |
if (!data) { | |
throw new StatusCodeError.BadRequest(`Session not found: ${sessionId}`); | |
} | |
if (data?.lockToken) { | |
if (!lockToken) { | |
throw new StatusCodeError.ExpectationFailed( | |
`Session previously created with lockToken, but no lockToken given to 'getSession'`, | |
); | |
} | |
if (data.lockToken !== lockToken) { | |
throw new StatusCodeError.ExpectationFailed( | |
`Session previously created with lockToken, but lockToken given does not match`, | |
); | |
} | |
} | |
return data; | |
} | |
/** | |
* Starts a new session. If sessionId already exists, it will be used instead of creating a new one. | |
* If lockToken is provided, other calls to endSession or interact with the session must specify the same lockToken. | |
* If existing session had a lockToken, the caller must provide it to use the sessionId here again. | |
* | |
* @param {Object} options - The options for starting the session. | |
* @param {string} [options.sessionId] - The session ID. If not provided, a new session ID will be generated. | |
* @param {string} [options.traceId] - An arbitrary ID to help trace the session. | |
* @param {string} [options.lockToken] - If provided, other calls to endSession or interact with the session must specify the same lockToken. | |
* @param {Object} [options.metadata] - Any metadata to store with the session (returned on GET sessions endpoint). | |
* @param {Logger} [options.logger] - The logger to use for logging session information. | |
* @param {number} [options.defaultTimeout] - The default timeout for page navigation, defaults to ~2 minutes if not given | |
* @returns {Promise<Object>} The newly created session object. | |
*/ | |
export async function startSession({ | |
sessionId = `browser_svc_${nanoid()}`, // User can give explicit sessionId to use otherwise we generate one | |
traceId, // arbitrary ID to help trace the session, TBD how we're using this | |
lockToken, // If given, other calls to endSession or interact with the session must specify the lockToken | |
metadata, // Any metadata to store with the session (returned on GET sessions endpoint) | |
defaultTimeout, | |
logger = Logger, | |
}) { | |
if (SessionLookup[sessionId]) { | |
const session = await getSession({ sessionId, lockToken, logger }); | |
logger.info(`Re-using existing session '${sessionId}'`); | |
return session; | |
// throw new StatusCodeError.BadRequest(`Session already exists: ${sessionId}`); | |
} | |
const { browser, page } = await initController({ logger, defaultTimeout }); | |
const session = { | |
browser, | |
page, | |
sessionId, | |
traceId, | |
// If lockToken given, other calls to endSession or interact with the session must specify the lockToken | |
// to ensure they are the same caller that started the session otherwise we will throw errors | |
// if the lockToken stored is set but the caller does not provide it or mismatches it | |
lockToken, | |
createdAt: Date.now(), | |
metadata, | |
logger, | |
defaultTimeout, | |
}; | |
logger.info(`Started session '${sessionId}':`, session); | |
SessionLookup[sessionId] = session; | |
return session; | |
} | |
/** | |
* Ends a session by closing the browser and removing it from the session lookup. | |
* | |
* @param {Object} options - The options for ending the session. | |
* @param {string} options.sessionId - The ID of the session to end. | |
* @param {string} options.lockToken - The lock token associated with the session. | |
* @param {Object} [options.logger=Logger] - The logger to use for logging. | |
* @returns {Promise<void>} - A promise that resolves when the session is ended. | |
*/ | |
export async function endSession({ sessionId, lockToken, logger = Logger }) { | |
assertRequired({ sessionId }); | |
const { browser } = await getSession({ sessionId, lockToken, logger }); | |
await browser.close().catch((ex) => { | |
logger.warn(`Error closing browser for session '${sessionId}':`, ex); | |
}); | |
delete SessionLookup[sessionId]; | |
} | |
/** | |
* Takes a screenshot of a given URL. | |
* | |
* @param {Object} options - The options for taking a screenshot. | |
* @param {string} options.url - The URL to take a screenshot of. | |
* @param {boolean} [options.dataOnly=false] - Whether to capture only the data of the screenshot. | |
* @param {boolean} [options.disableAnnotations=false] - Whether to disable annotations on the screenshot. | |
* @param {string} [options.sessionId] - The ID of the session to use. If not provided, a new session will be started. | |
* @param {string} [options.lockToken] - The lock token associated with the session. | |
* @param {Object} [options.metadata] - Additional metadata for the session. | |
* @param {Object} [options.logger=Logger] - The logger to use for logging. | |
* @returns {Promise<Buffer>} - A promise that resolves with the screenshot data as a Buffer. | |
*/ | |
export async function screenshotUrl({ | |
url, | |
dataOnly, | |
disableAnnotations, | |
sessionId, | |
lockToken, | |
metadata, | |
logger = Logger, | |
retries = 0, | |
defaultTimeout, | |
}) { | |
const session = sessionId | |
? await getSession({ sessionId, lockToken, logger }) | |
: await startSession({ metadata, lockToken, defaultTimeout }); | |
const { browser, page } = session; | |
const data = await screenshot({ | |
url, | |
page, | |
dataOnly, | |
disableAnnotations, | |
defaultTimeout: session.defaultTimeout, | |
logger, | |
}); | |
if (data.error) { | |
if (data.error?.message?.includes('Session closed') && sessionId) { | |
if (retries > 2) { | |
logger.error( | |
`Failed to take screenshot after ${retries} retries:`, | |
data.error, | |
); | |
throw data.error; | |
} | |
await endSession(session).catch((ex) => {}); | |
const result = await startSession({ ...session, logger }); | |
if (!result.sessionId) { | |
throw new Error( | |
`Failed to re-start session after original error: ${data.error?.message}`, | |
); | |
} | |
return screenshotUrl({ | |
url, | |
dataOnly, | |
disableAnnotations, | |
sessionId, | |
lockToken, | |
metadata, | |
logger, | |
retries: retries + 1, | |
}); | |
} | |
throw data.error; | |
} | |
// await browser.close(); | |
if (!sessionId) { | |
await endSession(session); | |
} | |
return data; | |
} | |
/** | |
* Clicks on a navigation link, takes a screenshot of the resulting page, and returns the screenshot data. | |
* | |
* @param {Object} options - The options for clicking navigation link and taking screenshot. | |
* @param {string} options.linkText - The text of the navigation link to click. | |
* @param {boolean} [options.dataOnly=false] - Indicates whether to capture only the data of the screenshot. | |
* @param {string} options.sessionId - The ID of the session. | |
* @param {string} options.lockToken - The lock token of the session. | |
* @param {Logger} [options.logger=Logger] - The logger instance to use for logging. | |
* @returns {Promise<Buffer>} A promise that resolves with the screenshot data as a Buffer. | |
* @throws {Error} If the session is invalid. | |
*/ | |
export async function clickNavigationAndScreenshotUrl({ | |
linkText, | |
dataOnly = false, | |
sessionId, | |
lockToken, | |
logger = Logger, | |
retries = 0, | |
}) { | |
assertRequired( | |
{ linkText, sessionId }, | |
'clickNavigationAndScreenshotUrl', | |
logger, | |
); | |
const session = await getSession({ sessionId, lockToken, logger }); | |
if (!session) { | |
throw new Error(`Invalid session: ${sessionId}`); | |
} | |
const { browser, page } = session; | |
const data = await clickNavigationAndScreenshot({ | |
linkText, | |
page, | |
browser, | |
logger, | |
dataOnly, | |
}); | |
if (data.error) { | |
if (data.error?.message?.includes('Session closed') && sessionId) { | |
if (retries > 2) { | |
logger.error( | |
`Failed to take screenshot after ${retries} retries:`, | |
data.error, | |
); | |
throw data.error; | |
} | |
await endSession(session).catch((ex) => {}); | |
const result = await startSession({ ...session, logger }); | |
if (!result.sessionId) { | |
throw new Error( | |
`Failed to re-start session after original error: ${data.error?.message}`, | |
); | |
} | |
return clickNavigationAndScreenshotUrl({ | |
linkText, | |
dataOnly, | |
sessionId, | |
lockToken, | |
logger, | |
retries: retries + 1, | |
}); | |
} | |
throw data.error; | |
} | |
return data; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-console */ | |
/* eslint-disable no-shadow */ | |
/* global document, window , HTMLElement */ | |
import fs from 'fs'; | |
/** | |
* Capitalizes the first letter of a string. | |
* @param {string} str - The input string | |
* @returns {string} The capitalized string | |
*/ | |
export const capitalize = (str) => { | |
return str.charAt(0).toUpperCase() + str.slice(1); | |
}; | |
/** | |
* Sleeps the execution of the program for a given amount of time. | |
* @param {number} delayMillis - The amount of time to sleep in milliseconds | |
* @returns {Promise<string>} A promise that resolves with a string indicating the time waited | |
*/ | |
export const sleep = async (delayMillis) => { | |
return new Promise((resolve) => | |
setTimeout(() => { | |
resolve(`Waited for ${delayMillis / 1000} seconds`); | |
}, delayMillis), | |
); | |
}; | |
/** | |
* Checks if the file path is a valid image path that ends with .jpg, .jpeg, or .png (case-insensitive). | |
* @param {string} filePath - The file path to check | |
* @returns {boolean} A boolean indicating whether the file path is valid | |
*/ | |
export const isValidImagePath = (filePath) => { | |
// Regular expression to match file paths ending with .jpg, .jpeg, or .png | |
const regex = /\.(jpg|jpeg|png)$/i; | |
// Test the filePath against the regex | |
return regex.test(filePath); | |
}; | |
/** | |
* Checks if the given string is a valid URL. | |
* @param {string | undefined} txt - The string to check | |
* @returns {boolean} A boolean indicating whether the string is a valid URL | |
*/ | |
export const isValidURL = (txt) => { | |
if (txt === undefined) { | |
return false; | |
} | |
const pattern = new RegExp( | |
'^(https?:\\/\\/)' + // protocol | |
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // domain name and extension | |
'((\\d{1,3}\\.){3}\\d{1,3}))' + // OR ip (v4) address | |
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // port and path | |
'(\\?[;&a-z\\d%_.~+=-]*)?' + // query string | |
'(\\#[-a-z\\d_]*)?$', | |
'i', | |
); | |
return pattern.test(txt); | |
}; | |
/** | |
* Checks if the given string is a valid JSON. | |
* @param {string} string - The string to check | |
* @returns {boolean} A boolean indicating whether the string is a valid JSON | |
*/ | |
export const isValidJson = (string) => { | |
try { | |
JSON.parse(string); | |
} catch (err) { | |
return false; | |
} | |
return true; | |
}; | |
/** | |
* Checks if the given element is visible style-wise. | |
* @param {Element} element - The element to check | |
* @returns {boolean} A boolean indicating whether the element is visible | |
*/ | |
export const isElementStyleVisible = (element) => { | |
// get the final computed style, including css, inline styles and JS applied styles | |
const style = window.getComputedStyle(element); | |
// the computed style will also return pixel values for width and height | |
return ( | |
style.display !== 'none' && | |
style.visibility !== 'hidden' && | |
style.opacity !== '0' && | |
style.width !== '0px' && | |
style.height !== '0px' | |
); | |
}; | |
/** | |
* Checks if the given element is in the viewport. | |
* @param {Element} element - The element to check | |
* @returns {boolean} A boolean indicating whether the element is in the viewport | |
*/ | |
export const isElementInViewport = (element) => { | |
const rect = element.getBoundingClientRect(); | |
// both innerWidth and documentElement.clientWidth are used to support all browsers | |
return ( | |
rect.top >= 0 && | |
rect.left >= 0 && | |
rect.bottom <= | |
(window.innerHeight || document.documentElement.clientHeight) && | |
rect.right <= (window.innerWidth || document.documentElement.clientWidth) | |
); | |
}; | |
/** | |
* Checks if the given element is visible to the user. | |
* @param {Element | undefined | null} element - The element to check | |
* @returns {boolean} A boolean indicating whether the element is visible | |
* @throws {Error} An error if the element is null or undefined | |
*/ | |
export const isElementVisible = (element) => { | |
// if the passed element is null or undefined, throw an error | |
if (element === null || element === undefined) { | |
throw new Error('isElementVisible: Element is null or undefined'); | |
} | |
let currentElement = element; | |
// loop through the parent elements (including the element itself) to check if any of them is not visible | |
while (currentElement) { | |
// early return if the current element is not visible | |
if (!isElementStyleVisible(currentElement)) { | |
return false; | |
} | |
currentElement = currentElement.parentElement; | |
} | |
// loop stops when the currentElement is null (i.e. no more parent elements), @example: button -> div -> body -> html -> null, meaning the button and all its parents are visible, thus the button is visible | |
// check if the element is in the viewport; only need to check the element itself, as the position of the parent elements is irrelevant | |
return isElementInViewport(element); | |
}; | |
/** | |
* Checks if the given element is an HTMLElement. | |
* @param {Element} element - The element to check | |
* @returns {boolean} A boolean indicating whether the element is an HTMLElement | |
*/ | |
export const isHTMLElement = (element) => { | |
return element instanceof HTMLElement; | |
}; | |
/** | |
* Replaces all the non-alphanumeric characters or spaces in the input text with an empty string. | |
* @param {string} text - The input text | |
* @returns {string} The cleaned up text | |
* @todo This function does not support non-English characters. These characters will be removed too. | |
*/ | |
export const cleanUpTextContent = (text) => text.replace(/[^a-zA-Z0-9 ]/g, ''); | |
/** | |
* Waits for a specific event to occur on the page. | |
* @param {Page} page - The Puppeteer page | |
* @param {keyof DocumentEventMap} eventType - The type of event to wait for | |
* @returns {Promise<string>} A promise that resolves when the event occurs | |
*/ | |
export const waitForEvent = async (page, eventType) => { | |
// remember, all variables declared in the node context are not accessible in the browser context. However, you can pass them as arguments to the pageFunction - The 2nd argument which is after the pageFunction. In this case, we are passing the eventType; otherwise, Error [ReferenceError]: eventType is not defined | |
return page.evaluate( | |
(eventType) => | |
new Promise((resolve) => { | |
document.addEventListener(eventType, () => { | |
resolve(`Event: ${eventType} occurred`); | |
}); | |
}), | |
eventType, | |
); | |
}; | |
/** | |
* Waits until the HTML is fully rendered on the page. | |
* @param {Page} page - The Puppeteer page | |
* @param {number} timeout - The maximum time to wait in milliseconds (default: 30000) | |
* @param {boolean} checkOnlyHTMLBody - Whether to check only the HTML body for rendering (default: false) | |
* @returns {Promise<void>} | |
*/ | |
export const waitTillHTMLRendered = async ( | |
page, | |
timeout = 30000, | |
checkOnlyHTMLBody = false, | |
) => { | |
const waitTimeBetweenChecks = 1000; | |
const maximumChecks = timeout / waitTimeBetweenChecks; // assuming check itself does not take time | |
let lastHTMLSize = 0; | |
let stableSizeCount = 0; | |
const COUNT_THRESHOLD = 3; | |
const isSizeStable = (currentSize, lastSize) => { | |
if (currentSize !== lastSize) { | |
return false; // still rendering | |
} | |
if (currentSize === lastSize && lastSize === 0) { | |
return false; // page remains empty - failed to render | |
} | |
return true; // stable | |
}; | |
for (let i = 0; i < maximumChecks; i++) { | |
// eslint-disable-next-line no-await-in-loop | |
const html = await page.content(); | |
const currentHTMLSize = html.length; | |
// eslint-disable-next-line no-await-in-loop | |
const currentBodyHTMLSize = await page.evaluate( | |
() => document.body.innerHTML.length, | |
); | |
const currentSize = checkOnlyHTMLBody | |
? currentBodyHTMLSize | |
: currentHTMLSize; | |
console.log( | |
'last: ', | |
lastHTMLSize, | |
' <> curr: ', | |
currentHTMLSize, | |
' body html size: ', | |
currentBodyHTMLSize, | |
); | |
stableSizeCount = isSizeStable(currentSize, lastHTMLSize) | |
? stableSizeCount + 1 // cannot use stableSizeCount++ because it will return the original value of stableSizeCount | |
: 0; | |
console.log(`Stable size count: ${stableSizeCount}`); | |
if (stableSizeCount >= COUNT_THRESHOLD) { | |
console.log('Page rendered fully..'); | |
break; | |
} | |
lastHTMLSize = currentSize; | |
// eslint-disable-next-line no-await-in-loop | |
await page.waitForTimeout(waitTimeBetweenChecks); // remember to await | |
} | |
}; | |
/** | |
* Checks if the page is explicitly loading. | |
* @param {Page} page - The Puppeteer page | |
* @returns {Promise<boolean>} A promise that resolves with a boolean indicating whether the page is explicitly loading | |
*/ | |
export const isPageExplicitlyLoading = async (page) => { | |
const targetClassNames = ['loading', 'progress', 'spinner', 'wait']; | |
const selectors = targetClassNames.map( | |
(className) => | |
`[class*="${className}"], [class*="${capitalize( | |
className, | |
)}"], [class*="${className.toUpperCase()}"]`, | |
); | |
// document readState can be `complete` while the page is still loading | |
return page.evaluate((selectors) => { | |
const loadingElement = document.querySelector(selectors.join(', ')); | |
return ( | |
document.readyState === 'loading' || | |
(loadingElement !== null && loadingElement.style.display !== 'none') | |
); | |
}, selectors); | |
}; | |
/** | |
* This service takes the path to an image file and returns a base64 string. Transform the image into a format that can be processed by the GPT model | |
* @param imageFilePath - The path to the image file | |
* @returns A promise that resolves to the base64 string | |
*/ | |
export const imageToBase64String = async (imageFilePath) => { | |
const URI_PREFIX = 'data:image/jpeg;base64,'; | |
// check if the file path is a string of jpg or jpeg or png | |
if (!isValidImagePath(imageFilePath)) { | |
throw new Error('Invalid image file path'); | |
} | |
try { | |
// Read the image from disk in an async manner | |
// The fs.promises.readFile method returns a promise and avoids the need for a callback (as in the fs.readFile method) | |
const data = await fs.promises.readFile(imageFilePath); | |
// Convert the image data to a base64 string | |
const base64String = data.toString('base64'); | |
// prepend the metadata to the base64 string | |
const dataURI = `${URI_PREFIX}${base64String}`; | |
return dataURI; | |
} catch (err) { | |
throw new Error(`Error reading file from disk: ${err}`); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment