Skip to content

Instantly share code, notes, and snippets.

@benyaminbeyzaie
Created November 13, 2023 20:09
Show Gist options
  • Save benyaminbeyzaie/0c93e1058143289fa145616918a2d409 to your computer and use it in GitHub Desktop.
Save benyaminbeyzaie/0c93e1058143289fa145616918a2d409 to your computer and use it in GitHub Desktop.
Minimal scrape and prettify
import playwright from "playwright";
import fs from "fs";
import pretty from "pretty";
import * as cheerio from "cheerio";
import path from "path";
const saveFolder = "generated";
async function saveFile(filename, content) {
try {
// Check if the folder exists, if not, create it
if (!fs.existsSync(saveFolder)) {
fs.mkdirSync(saveFolder);
}
// Save the file in the generated folder
fs.writeFileSync(path.join(saveFolder, filename), content);
console.log(`File saved: ${filename}`);
} catch (error) {
console.error("Error saving file:", error);
}
}
const browser = await playwright.chromium.launch({ headless: false });
const context = await browser.newContext();
const page = await context.newPage();
await page.goto("https://digikala.com");
await page.waitForTimeout(10000);
const content = await page.content();
const $ = cheerio.load(content);
// Remove all script tags
$("head").remove();
$("script").remove();
$("style").remove();
$("link").remove();
$("noscript").remove();
$("iframe").remove();
$("img").removeAttr("src");
$("path").remove();
$("*").removeAttr("style");
const modifiedHtml = $.html();
const prettifiedHtml = pretty(modifiedHtml);
saveFile("modified_page.html", prettifiedHtml);
saveFile("page_content.html", content);
await browser.close();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment