Skip to content

Instantly share code, notes, and snippets.

Created December 27, 2024 13:51
import { chromium } from 'playwright';
import { fileURLToPath } from 'url';
import dotenv from 'dotenv';
dotenv.config({ path: '.env.local' });
* Scrapes content from a given URL using Playwright with CDP connection
* @param {string} url - The URL to scrape
* @returns {Promise<string>} - The scraped content
export async function scrapeUrl(url) {
if (!process.env.BRIGHT_PLAYWRIGHT_URL) {
throw new Error('BRIGHT_PLAYWRIGHT_URL environment variable is not set');
const browser = await chromium.connectOverCDP(process.env.BRIGHT_PLAYWRIGHT_URL);
try {
const context = await browser.newContext();
const page = await context.newPage();
await page.goto(url, { timeout: 60000 });
await page.waitForLoadState('networkidle');
// Get HTML content
const html = await page.content();
return { html };
} catch (error) {
console.error('Error during scraping:', error);
throw error;
} finally {
await browser.close();
* Shows the HTML content of a given URL
* @param {string} url - The URL to show content from
* @returns {Promise<void>}
async function showContent(url) {
try {
const { html } = await scrapeUrl(url);
} catch (error) {
console.error('Error showing content:', error);
throw error;
// Main function to handle command line usage
async function main() {
const [command, ...args] = process.argv.slice(2);
if (!command || !args.length) {
console.error('Please provide a URL to scrape.');
console.error('Usage: pnpm run scrape show-content <url>');
try {
if (command !== 'show-content') {
console.error('Invalid command. Only "show-content" is supported.');
console.error('Usage: pnpm run scrape show-content <url>');
await showContent(args[0]);
} catch (error) {
console.error('Operation failed:', error);
// Run main function if this file is run directly
if (process.argv[1] === fileURLToPath(import.meta.url)) {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment