Skip to content

Instantly share code, notes, and snippets.

@kyletaylored
Created July 12, 2021 14:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kyletaylored/f9bc5f0d82942687038040b1a5cdacdd to your computer and use it in GitHub Desktop.
Save kyletaylored/f9bc5f0d82942687038040b1a5cdacdd to your computer and use it in GitHub Desktop.
Traffic simulator in a node script
{
"name": "traffic-simulator",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"puppeteer": "^10.1.0",
"sitemapper": "^3.1.12",
"user-agents": "^1.0.710"
}
}
const puppeteer = require('puppeteer')
const UserAgent = require('user-agents');
const Sitemapper = require('sitemapper');
// Generate user agents
const userAgent = new UserAgent();
const userAgents = Array(500).fill().map(() => userAgent());
// Get random user agent
let assignRandomAgent = async (page) => {
const now = Date.now();
// Use either native Puppeteer device, or random UA. 50/50 chance.
if (now % 2 == 0) {
const agent = randomArrayItem(Object.keys(puppeteer.devices));
console.log("user agent: ", agent);
const device = puppeteer.devices[agent];
await page.emulate(device);
} else {
const agent = randomArrayItem(userAgents);
console.log("user agent: ", agent.data.userAgent);
await page.setUserAgent(agent.data.userAgent, {
"platform": agent.data.platform,
"mobile": (agent.data.deviceCategory === 'mobile')
})
await page.setViewport({
"width": agent.data.viewportWidth,
"height": agent.data.viewportHeight,
"isMobile": (agent.data.deviceCategory === 'mobile'),
"isLandscape": (agent.data.screenHeight < agent.data.screenWidth)
})
}
}
/**
* Return random array item.
* @param {Array} arr
* @returns
*/
const randomArrayItem = (arr) => {
return arr[Math.floor(Math.random()*arr.length)];
}
// Get pages to crawl
const getPosts = async () => {
const sitemap = new Sitemapper();
return await sitemap.fetch('https://www.thepaperpeople.work/sitemap_index.xml').then((sites) => {
return sites;
});
}
const clearBrowser = async (page) => {
// clear cookies
const client = await page.target().createCDPSession()
await await client.send('Network.clearBrowserCookies')
}
const visitPage = async (browser, paths) => {
const page = await browser.newPage();
await assignRandomAgent(page);
// Go to paths
for (let path in paths) {
await page.goto(paths[path], {waitUntil: 'networkidle2'}).catch((err) => {
console.log("err: ", err);
});
console.log(await page.title());
}
// Clear cookies
clearBrowser(page);
// await page.screenshot({
// path: 'full.png',
// fullPage: true
// });
}
/**
* Generate random subset of posts.
* @param {array} postList
* @returns
*/
const getRandomPosts = (postList) => {
let posts = [];
// Grab up to 6 pages to crawl.
const limit = Math.floor(Math.random() * 6) + 1;
for(let i = 0; i < limit; i++) {
let post = randomArrayItem(postList);
posts.push(post);
}
return posts;
}
/**
* Crawl pages
* @param object browser
* @param array posts
* @param array postPromises
*/
const crawlPage = (browser, posts, postPromises) => {
postPromises.push(visitPage(browser, getRandomPosts(posts)));
}
// Main
(async () => {
const postsSitemap = await getPosts();
const posts = postsSitemap.sites;
const browser = await puppeteer.launch()
let postPromises = [];
let crawlCount = 0;
setInterval(() => {
console.log("Crawl Count: ", crawlCount);
crawlCount++;
crawlPage(browser, posts, postPromises)
}, 5000);
// await browser.close();
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment