Skip to content

Instantly share code, notes, and snippets.

@advanceboy
Created December 3, 2023 17:22
Show Gist options
  • Save advanceboy/5ad3f80eb31e1b26d3ecff2ee6d51487 to your computer and use it in GitHub Desktop.
Save advanceboy/5ad3f80eb31e1b26d3ecff2ee6d51487 to your computer and use it in GitHub Desktop.
ブラウザ操作中の通信の内容をローカルファイルに自動で保存する
// Run the Chrome Devtools Protocol via puppeteer-core to automatically save the contents of network communications to a local file.
const puppeteer = require('puppeteer-core');
const fs = require('fs/promises');
const path = require('path');
const [nodePath, scriptPath, ...args] = process.argv;
const how2useMsg = " node intercept-requests.js outputDir [urlPattern] [matchRegExp]\n outputDir: Output Directory\n urlPattern: If specified, only URLs matched with wildcards will be fetched. see https://chromedevtools.github.io/devtools-protocol/tot/Fetch/#type-RequestPattern . It's better to specify this for performance reasons.\n matchRegExp: If specified, only URLs that match regular expressions will be saved.";
if (args.length < 1) {
console.log(`one or more arguments are required.\n\n${how2useMsg}`);
process.exit(1);
}
(async () => {
try {
const dirStat = await fs.stat(args[0]);
if (!dirStat.isDirectory()) { throw {}; }
} catch {
console.log(`invalid directory: ${args[0]}\n\n${how2useMsg}`);
process.exit(1);
}
let urlPattern = '*';
if (args.length >= 2) {
urlPattern = args[1];
}
let matchRegExp = null;
if (args.length >= 3) {
try {
matchRegExp = new RegExp(args[2]);
} catch { }
}
const browser = await puppeteer.launch({
args: ['--guest'],
channel: 'chrome',
defaultViewport: null,
headless: false,
product: 'chrome'
});
// if use firefox
// const browser = await puppeteer.launch({
// args: ['-private'],
// defaultViewport: null,
// executablePath: String.raw`C:\Program Files\Mozilla Firefox\firefox.exe`,
// headless: false,
// product: 'firefox'
// });
const page = (await browser.pages())[0];
try {
const client = await page.target().createCDPSession();
await client.send('Fetch.enable', { 'patterns': [{ urlPattern, 'requestStage': 'Response' }] });
client.on('Fetch.requestPaused', async (requestEvent) => {
const { request, requestId, responseStatusCode, responseHeaders } = requestEvent;
try {
if (!responseStatusCode) throw `responseStatusCode: ${responseStatusCode}`;
if (!matchRegExp || matchRegExp.test(request.url)) {
// resolve file path
let fileName = null, contentType = null;
responseHeaders.forEach(h => {
switch (h.name.toLowerCase()) {
case 'content-disposition':
fileName = h.value;
break;
case 'content-type':
contentType = h.value.toLowerCase().split(';')[0];
break;
}
});
fileName = fileName || new URL(request.url).pathname.split('/').pop();
if (!path.extname(fileName) && contentType) {
let match;
fileName +=
contentType == 'text/plain' ? '.txt' :
contentType == 'text/javascript' ? '.js' :
contentType == 'text/css' ? '.css' :
contentType == 'text/xml' ? '.xml' :
contentType == 'application/json' ? '.json' :
contentType == 'application/zip' ? '.zip' :
(match = contentType.match(/^image\/(.*?)(?:\+.*)?$/)) ? `.${match[1]}` :
(match = contentType.match(/^text\/html(?=$|;)/)) ? '.html' :
'';
}
const writeBase = path.join(args[0], fileName);
let writePath = writeBase;
let fileCounter = 0;
while (true) {
try {
const fileStat = await fs.stat(writePath);
const ext = path.extname(writeBase);
writePath = writeBase.substring(0, writeBase.length - ext.length) + `~${++fileCounter}` + ext;
} catch {
break;
}
}
const f = await fs.open(writePath, 'w');
try {
// dump response body
const response = await client.send('Fetch.getResponseBody', { requestId });
const buff = Buffer.from(response.body, response.base64Encoded ? 'base64' : 'utf-8');
await f.write(buff);
} finally {
f?.close();
}
}
await client.send('Fetch.fulfillRequest', { requestId, responseCode: responseStatusCode, responseHeaders, 'body': response.body });
} catch {
await client.send('Fetch.continueRequest', { requestId });
}
});
// wait for close
await new Promise((resolve, reject) => page.on('close', resolve));
} finally {
await Promise.all((await browser.pages()).map(p => p.close()));
await browser.close();
}
})();
{
"dependencies": {
"puppeteer-core": ">=21.0.0"
},
"engines": {
"node": ">=14.8"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment