Last active
November 14, 2022 12:56
-
-
Save mnmkng/0ace1f4e1035f92d8da4f4ae1080f026 to your computer and use it in GitHub Desktop.
Code example: How to scrape the web with Playwright 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Import the playwright library into our scraper. | |
const playwright = require('playwright'); | |
async function main() { | |
// Open a Chromium browser. We use headless: false | |
// to be able to watch what's going on. | |
const browser = await playwright.chromium.launch({ | |
headless: false | |
}); | |
// Open a new page / tab in the browser. | |
const page = await browser.newPage({ | |
bypassCSP: true, // This is needed to enable JavaScript execution on GitHub. | |
}); | |
// Tell the tab to navigate to the GitHub Topics page. | |
await page.goto('https://github.com/topics/javascript'); | |
// Click and tell Playwright to keep watching for more than | |
// 20 repository cards to appear in the page. | |
await page.click('text=Load more'); | |
await page.waitForFunction(() => { | |
const repoCards = document.querySelectorAll('article.border'); | |
return repoCards.length > 20; | |
}); | |
// Pause for 10 seconds, to see what's going on. | |
await page.waitForTimeout(10000); | |
// Turn off the browser to clean up after ourselves. | |
await browser.close(); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment