Skip to content

Instantly share code, notes, and snippets.

@ehzawad
Created December 5, 2023 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehzawad/149326926d38ab2ae63a087a9ab56aba to your computer and use it in GitHub Desktop.
Save ehzawad/149326926d38ab2ae63a087a9ab56aba to your computer and use it in GitHub Desktop.
webpage to html converter
```js
// const puppeteer = require('puppeteer');
// async function convertWebpageToPDF(url, outputFilename) {
// const browser = await puppeteer.launch({ headless: "new" });
// const page = await browser.newPage();
// // Increase the timeout to 60 seconds (60000 milliseconds)
// await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
// // Replace '.cookie-accept-button' with the actual selector for the cookie acceptance button
// const cookiesButtonSelector = '.cookie-accept-button';
// if (await page.$(cookiesButtonSelector) !== null) {
// await page.click(cookiesButtonSelector);
// }
// // Scroll through the entire webpage
// await autoScroll(page);
// // Capture the full webpage as PDF
// await page.pdf({ path: outputFilename, format: 'A4', printBackground: true });
// await browser.close();
// console.log(`PDF created: ${outputFilename}`);
// }
// // Function to automatically scroll through the entire webpage
// async function autoScroll(page){
// await page.evaluate(async () => {
// await new Promise((resolve, reject) => {
// let totalHeight = 0;
// let distance = 100;
// let timer = setInterval(() => {
// let scrollHeight = document.body.scrollHeight;
// window.scrollBy(0, distance);
// totalHeight += distance;
// if(totalHeight >= scrollHeight){
// clearInterval(timer);
// resolve();
// }
// }, 100);
// });
// });
// }
// // Example usage
// convertWebpageToPDF('https://rasa.com/docs/rasa/rules', 'rasa-rules.pdf');
// const puppeteer = require('puppeteer');
// async function convertWebpageToPDF(url, outputFilename) {
// const browser = await puppeteer.launch({ headless: "new" });
// const page = await browser.newPage();
// // Set a standard viewport size
// await page.setViewport({ width: 1366, height: 768 });
// // Navigate to the page with an increased timeout
// await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
// // Handle cookie consent - replace the selector with the actual one
// const cookiesButtonSelector = 'YOUR_COOKIE_CONSENT_SELECTOR'; // Replace with the actual selector
// if (await page.$(cookiesButtonSelector) !== null) {
// await page.click(cookiesButtonSelector);
// console.log('Cookie consent accepted.');
// }
// // Scroll through the entire webpage
// await autoScroll(page);
// // Optional: Add a delay to ensure all dynamic content has stabilized
// await page.waitForTimeout(5000); // Wait for 5 seconds
// // Capture the full webpage as PDF
// await page.pdf({ path: outputFilename, format: 'A4', printBackground: true });
// await browser.close();
// console.log(`PDF created: ${outputFilename}`);
// }
// // Function to automatically scroll through the entire webpage
// async function autoScroll(page) {
// await page.evaluate(async () => {
// await new Promise((resolve, reject) => {
// let totalHeight = 0;
// let distance = 100;
// let timer = setInterval(() => {
// let scrollHeight = document.body.scrollHeight;
// window.scrollBy(0, distance);
// totalHeight += distance;
// if(totalHeight >= scrollHeight){
// clearInterval(timer);
// resolve();
// }
// }, 100);
// });
// });
// }
// // Example usage
// convertWebpageToPDF('https://rasa.com/docs/rasa/rules', 'rasa-rules.pdf');
const puppeteer = require('puppeteer');
async function convertWebpageToPDF(url, outputFilename) {
const browser = await puppeteer.launch({ headless: "new" });
const page = await browser.newPage();
// Set a wider viewport size
await page.setViewport({ width: 1920, height: 1080 });
try {
// Navigate to the page with a disabled timeout
await page.goto(url, { waitUntil: 'networkidle2', timeout: 0 });
// Handle cookie consent - replace the selector with the actual one
const cookiesButtonSelector = 'YOUR_COOKIE_CONSENT_SELECTOR'; // Replace with the actual selector
if (await page.$(cookiesButtonSelector) !== null) {
await page.click(cookiesButtonSelector);
console.log('Cookie consent accepted.');
}
// Scroll through the entire webpage
await autoScroll(page);
// Optional: Add a delay to ensure all dynamic content has stabilized
await page.waitForTimeout(5000); // Wait for 5 seconds
// Capture the full webpage as PDF
await page.pdf({ path: outputFilename, format: 'A4', printBackground: true });
} catch (error) {
console.error(`Error navigating to the page: ${error.message}`);
} finally {
await browser.close();
console.log(`PDF created: ${outputFilename}`);
}
}
// Function to automatically scroll through the entire webpage
async function autoScroll(page) {
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
let totalHeight = 0;
let distance = 100;
let timer = setInterval(() => {
let scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve();
}
}, 100);
});
});
}
// Example usage
convertWebpageToPDF('https://rasa.com/docs/rasa/rules', 'rasa-rules.pdf');
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment