Skip to content

Instantly share code, notes, and snippets.

@kaizhu256
Last active January 17, 2024 05:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save kaizhu256/8c91818ca8db033fc9dee4e48277c0ba to your computer and use it in GitHub Desktop.
Save kaizhu256/8c91818ca8db033fc9dee4e48277c0ba to your computer and use it in GitHub Desktop.
this zero-dependency nodejs-script demonstrates how to programmatically script chrome-browser to do tasks using chrome-devtools-protocol commands over websocket
/*
* chrome-devtools-client-example.js
*
* this zero-dependency nodejs-script demonstrates how to programmatically
* script chrome-browser to do tasks using chrome-devtools-protocol commands
* over websocket
*
* tldr - jump down to section "quickstart-example" to see an example task
*
* Chrome DevTools Protocol Documentation:
* https://chromedevtools.github.io/devtools-protocol/
*/
/* jslint utility2:true */
function assertOrThrow(passed, msg) {
/*
* this function will throw <msg> if <passed> is falsy
*/
if (passed) {
return;
}
throw (
(
msg
&& typeof msg.message === "string"
&& typeof msg.stack === "string"
)
// if msg is err, then leave as is
? msg
: new Error(
typeof msg === "string"
// if msg is string, then leave as is
? msg
// else JSON.stringify(msg)
: JSON.stringify(msg, undefined, 4)
)
);
}
function noop() {
/*
* this function will do nothing
*/
return;
}
function chromeDevtoolsClientCreate({
chromeBin,
modeMockProcessPlatform,
modeSilent,
processPlatform,
timeout
}) {
/*
* this function with create chrome-devtools-client from <chromeBin>
*/
let chromeCleanup;
let chromeClient;
let chromeProcess;
let chromeSessionId;
let chromeUserDataDir;
let websocket;
let wsReader;
return Promise.resolve().then(function () {
/*
* this function will init <chromeCleanup> and <chromeClient>
*/
let callbackDict;
let callbackId;
let timerTimeout;
callbackDict = {};
callbackId = 0;
chromeCleanup = function () {
/*
* this function will
* kill <chromeProcess>
* rm -rf <chromeUserDataDir>
* destroy <chromeClient>, <websocket>, <wsReader>
*/
// cleanup timerTimeout
clearTimeout(timerTimeout);
// kill <chromeProcess>
try {
if (processPlatform === "win32") {
require("child_process").spawnSync("taskkill", [
"/pid", chromeProcess.pid, "/T", "/F"
], {
stdio: "ignore"
});
} else {
// kill child process tree with ".kill(-pid)" command.
process.kill(-chromeProcess.pid, "SIGKILL");
}
} catch (ignore) {}
// rm -rf <chromeUserDataDir>
require("fs").rmdirSync(chromeUserDataDir, {
recursive: true
});
// destroy <chromeClient>, <websocket>, <wsReader>
chromeClient.destroy();
try {
websocket.destroy();
} catch (ignore) {}
wsReader.destroy();
};
// init timerTimeout
timeout = timeout || 30000;
timerTimeout = setTimeout(function () {
chromeCleanup();
chromeClient.emit("error", new Error(
"chrome-devtools - timeout " + timeout + " ms"
));
}, timeout);
function ChromeClient() {
/*
* this function will construct <chromeClient>
*/
require("stream").Duplex.call(this);
}
require("util").inherits(ChromeClient, require("stream").Duplex);
chromeClient = new ChromeClient();
chromeClient.__proto__._destroy = chromeCleanup;
chromeClient.__proto__._read = function () {
/*
* this function will implement stream.Duplex.prototype._read
*/
if (websocket && websocket.readable) {
websocket.resume();
}
};
chromeClient.__proto__._write = function (payload, ignore, callback) {
/*
* this function will implement stream.Duplex.prototype._write
*/
// console.error("SEND \u25ba " + payload.slice(0, 256).toString());
let header;
let maskKey;
let result;
// init header
header = Buffer.alloc(2 + 8 + 4);
// init fin = true
header[0] |= 0x80;
// init opcode = text-frame
header[0] |= 1;
// init mask = true
header[1] |= 0x80;
// init payload.length
if (payload.length < 126) {
header = header.slice(0, 2 + 0 + 4);
header[1] |= payload.length;
} else if (payload.length < 65536) {
header = header.slice(0, 2 + 2 + 4);
header[1] |= 126;
header.writeUInt16BE(payload.length, 2);
} else {
header[1] |= 127;
header.writeUInt32BE(payload.length, 6);
}
// init maskKey
maskKey = require("crypto").randomBytes(4);
maskKey.copy(header, header.length - 4);
// send header
websocket.cork();
websocket.write(header);
// send payload ^ maskKey
payload.forEach(function (ignore, ii) {
payload[ii] ^= maskKey[ii & 3];
});
// return write-result
result = websocket.write(payload, callback);
websocket.uncork();
return result;
};
chromeClient.on("data", function (payload) {
/*
* this function will handle callback for <payload>
* received from chrome-browser using chrome-devtools-protocol
*/
// console.error("\u25c0 RECV " + payload.slice(0, 256).toString());
let callback;
let {
method,
id,
error,
params,
result
} = JSON.parse(payload);
assertOrThrow(!method || (
/^[A-Z]\w*?\.[a-z]\w*?$/
).test(method), new Error(
"chrome-devtools - invalid method " + method
));
// init callback
callback = callbackDict[id];
delete callbackDict[id];
// callback.resolve
if (callback) {
// preserve stack-trace
callback.err.message = "chrome-devtools - "
+ JSON.stringify(error);
assertOrThrow(!error, callback.err);
callback.resolve(result);
return;
}
assertOrThrow(!error, "chrome-devtools - " + error);
chromeClient.emit(method, params);
});
chromeClient.rpc = function (method, params) {
/*
* this function will message-pass
* JSON.stringify({
* id: <callbackId>,
* method: <method>,
* params: <params>,
* sessionId: <chromeSessionId>
* })
* to chrome-browser using chrome-devtools-protocol
*/
callbackId = (callbackId % 256) + 1;
chromeClient.write(Buffer.from(JSON.stringify({
id: callbackId,
method,
params,
sessionId: chromeSessionId
})));
return new Promise(function (resolve) {
callbackDict[callbackId] = {
err: new Error(),
method,
resolve
};
});
};
}).then(function () {
/*
* this function will init <wsReader>
* that can read websocket-frames from <websocket>
*/
let WS_READ_HEADER;
let WS_READ_LENGTH16;
let WS_READ_LENGTH63;
let WS_READ_PAYLOAD;
let wsBufList;
let wsPayloadLength;
let wsReadState;
WS_READ_HEADER = 0;
WS_READ_LENGTH16 = 1;
WS_READ_LENGTH63 = 2;
WS_READ_PAYLOAD = 3;
wsBufList = [];
wsPayloadLength = 0;
wsReadState = WS_READ_HEADER;
/*
https://tools.ietf.org/html/draft-ietf-hybi-thewebsocketprotocol-13#section-5.2
+---------------------------------------------------------------+
|0 1 2 3 |
|0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f|
+-+-+-+-+-------+-+-------------+-------------------------------+
|F|R|R|R| opcode|M| Payload len | Extended payload length |
|I|S|S|S| (4) |A| (7) | (16/63) |
|N|V|V|V| |S| | (if payload len==126/127) |
| |1|2|3| |K| | |
+-+-+-+-+-------+-+-------------+ - - - - - - - - - - - - - - - +
| Extended payload length continued, if payload len == 127 |
+ - - - - - - - - - - - - - - - +-------------------------------+
| |Masking-key, if MASK set to 1 |
+-------------------------------+-------------------------------+
| Masking-key (continued) | Payload Data |
+-------------------------------- - - - - - - - - - - - - - - - +
: Payload Data continued ... :
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
| Payload Data continued ... |
+---------------------------------------------------------------+
FIN: 1 bit
Indicates that this is the final fragment in a message. The first
fragment MAY also be the final fragment.
RSV1, RSV2, RSV3: 1 bit each
MUST be 0 unless an extension is negotiated which defines meanings
for non-zero values. If a nonzero value is received and none of
the negotiated extensions defines the meaning of such a nonzero
value, the receiving endpoint MUST _Fail the WebSocket
Connection_.
Opcode: 4 bits
Defines the interpretation of the payload data. If an unknown
opcode is received, the receiving endpoint MUST _Fail the
WebSocket Connection_. The following values are defined.
* %x0 denotes a continuation frame
* %x1 denotes a text frame
* %x2 denotes a binary frame
* %x3-7 are reserved for further non-control frames
* %x8 denotes a connection close
* %x9 denotes a ping
* %xA denotes a pong
* %xB-F are reserved for further control frames
Mask: 1 bit
Defines whether the payload data is masked. If set to 1, a
masking key is present in masking-key, and this is used to unmask
the payload data as per Section 5.3. All frames sent from client
to server have this bit set to 1.
Payload length: 7 bits, 7+16 bits, or 7+64 bits
The length of the payload data, in bytes: if 0-125, that is the
payload length. If 126, the following 2 bytes interpreted as a 16
bit unsigned integer are the payload length. If 127, the
following 8 bytes interpreted as a 64-bit unsigned integer (the
most significant bit MUST be 0) are the payload length. Multibyte
length quantities are expressed in network byte order. The
payload length is the length of the extension data + the length of
the application data. The length of the extension data may be
zero, in which case the payload length is the length of the
application data.
Masking-key: 0 or 4 bytes
All frames sent from the client to the server are masked by a 32-
bit value that is contained within the frame. This field is
present if the mask bit is set to 1, and is absent if the mask bit
is set to 0. See Section 5.3 for further information on client-
to-server masking.
Payload data: (x+y) bytes
The payload data is defined as extension data concatenated with
application data.
Extension data: x bytes
The extension data is 0 bytes unless an extension has been
negotiated. Any extension MUST specify the length of the
extension data, or how that length may be calculated, and how the
extension use MUST be negotiated during the opening handshake. If
present, the extension data is included in the total payload
length.
Application data: y bytes
Arbitrary application data, taking up the remainder of the frame
after any extension data. The length of the application data is
equal to the payload length minus the length of the extension
data.
*/
function wsBufListRead(nn) {
/*
* this function will read <nn> bytes from <wsBufList>
*/
let buf;
wsBufList = (
wsBufList.length === 1
? wsBufList[0]
: Buffer.concat(wsBufList)
);
buf = wsBufList.slice(0, nn);
wsBufList = [
wsBufList.slice(nn)
];
return buf;
}
function wsFrameRead() {
/*
* this function will read websocket-data-frame
*/
let buf;
let opcode;
if (wsBufList.reduce(function (aa, bb) {
return aa + bb.length;
}, 0) < (
wsReadState === WS_READ_PAYLOAD
? Math.max(wsPayloadLength, 1)
: wsReadState === WS_READ_LENGTH63
? 8
: 2
)) {
return;
}
switch (wsReadState) {
// read frame-header
case WS_READ_HEADER:
buf = wsBufListRead(2);
// validate opcode
opcode = buf[0] & 0x0f;
assertOrThrow(
opcode === 0x01,
"chrome-devtools - opcode must be 0x01, not 0x0"
+ opcode.toString(16)
);
wsPayloadLength = buf[1] & 0x7f;
wsReadState = (
wsPayloadLength === 126
? WS_READ_LENGTH16
: wsPayloadLength === 127
? WS_READ_LENGTH63
: WS_READ_PAYLOAD
);
break;
// read frame-payload-length-16
case WS_READ_LENGTH16:
wsPayloadLength = wsBufListRead(2).readUInt16BE(0);
wsReadState = WS_READ_PAYLOAD;
break;
// read frame-payload-length-63
case WS_READ_LENGTH63:
buf = wsBufListRead(8);
wsPayloadLength = (
buf.readUInt32BE(0) * 0x100000000 + buf.readUInt32BE(4)
);
wsReadState = WS_READ_PAYLOAD;
break;
// read frame-payload-data
case WS_READ_PAYLOAD:
assertOrThrow(
0 <= wsPayloadLength && wsPayloadLength <= 10000000,
"chrome-devtools - "
+ "payload-length must be between 0 and 256 MiB, not "
+ wsPayloadLength
);
buf = wsBufListRead(wsPayloadLength);
wsReadState = WS_READ_HEADER;
chromeClient.push(buf);
break;
}
return true;
}
function WsReader() {
/*
* this function will construct <wsReader>
*/
require("stream").Transform.call(this);
}
require("util").inherits(WsReader, require("stream").Transform);
wsReader = new WsReader();
wsReader.__proto__._transform = function (chunk, ignore, callback) {
/*
* this function will implement Transform.prototype._transform
*/
wsBufList.push(chunk);
while (true) {
if (!wsFrameRead()) {
break;
}
}
callback();
};
}).then(function () {
/*
* this function will init <chromeProcess>
*/
processPlatform = processPlatform || process.platform;
chromeUserDataDir = require("fs").mkdtempSync(require("path").join(
require("os").tmpdir(),
"puppeteer_dev_profile-"
));
chromeBin = chromeBin || (
processPlatform === "darwin"
? "/Applications/Google Chrome.app/Contents/MacOS/"
+ "Google Chrome"
: processPlatform === "win32"
? "C:\\Program Files\\Google\\Chrome\\Application\\"
+ "chrome.exe"
: "/usr/bin/google-chrome-stable"
);
console.error("\nchrome-devtools - spawning " + chromeBin);
chromeProcess = require("child_process").spawn((
chromeBin
), [
"--headless",
"--incognito",
(
processPlatform === "linux"
? "--no-sandbox"
: ""
),
"--remote-debugging-port=0",
"--user-data-dir=" + chromeUserDataDir
], {
// On non-windows platforms, `detached: false` makes child process
// a leader of a new process group, making it possible to kill
// child process tree with `.kill(-pid)` command.
// https://nodejs.org/api/child_process.html#child_process_options_detached
detached: process.platform !== "win32",
stdio: [
"ignore", (
!modeSilent
? 1
: "ignore"
), "pipe"
]
});
if (!modeSilent) {
chromeProcess.on("error", noop);
chromeProcess.stderr.pipe(process.stderr, {
end: false
});
}
process.on("exit", chromeCleanup);
process.on("SIGINT", chromeCleanup);
process.on("SIGTERM", chromeCleanup);
process.on("SIGHUP", chromeCleanup);
return new Promise(function (resolve, reject) {
let stderr;
// coverage-hack
if (modeMockProcessPlatform) {
chromeCleanup();
reject();
return;
}
stderr = "";
chromeProcess.stderr.on("data", function onData(chunk) {
assertOrThrow(
stderr.length < 65536,
"chrome-devtools - cannot connect to chrome"
);
stderr += chunk;
stderr.replace((
/^DevTools\u0020listening\u0020on\u0020(ws:\/\/.*)$/m
), function (ignore, url) {
chromeProcess.stderr.removeListener("data", onData);
resolve(url);
return "";
});
});
});
}).then(function (websocketUrl) {
/*
* this function will init <websocket>
*/
let secWebsocketKey;
console.error(
"chrome-devtools - connecting to websocket " + websocketUrl
);
secWebsocketKey = require("crypto").randomBytes(16).toString("base64");
return new Promise(function (resolve) {
require("http").get(Object.assign(require("url").parse(
websocketUrl
), {
"createConnection": function (opt) {
opt.path = opt.socketPath;
return require("net").connect(opt);
},
"headers": {
"Connection": "Upgrade",
"Sec-WebSocket-Key": secWebsocketKey,
"Sec-WebSocket-Version": 13,
"Upgrade": "websocket"
},
"protocol": "http:",
"protocolVersion": 13
})).once("upgrade", function (res, _websocket, head) {
assertOrThrow(
(
res.headers["sec-websocket-accept"]
=== require("crypto").createHash("sha1").update(
secWebsocketKey
+ "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
).digest("base64")
),
"chrome-devtools - invalid sec-websocket-accept header"
);
websocket = _websocket;
websocket.unshift(head);
// websocket - disable timeout
websocket.setTimeout(0);
// websocket - disable nagle's algorithm
websocket.setNoDelay();
websocket.on("end", websocket.end.bind(websocket));
// pipe websocket to wsReader
websocket.pipe(wsReader);
resolve();
});
});
}).then(function () {
/*
* this function will init <chromeSessionId>
*/
console.error("\nchrome-devtools - creating blank webpage");
return chromeClient.rpc("Target.createTarget", {
url: "about:blank"
}).then(function (data) {
return chromeClient.rpc("Target.attachToTarget", {
targetId: data.targetId,
flatten: true
});
}).then(function ({
sessionId
}) {
chromeSessionId = sessionId;
});
}).then(function () {
/*
* this function will navigate chrome to <url>
*/
// init screensize
chromeClient.rpc("Emulation.setDeviceMetricsOverride", {
deviceScaleFactor: 1,
height: 600,
mobile: false,
screenOrientation: {
angle: 0,
type: "portraitPrimary"
},
width: 800
});
// init page
chromeClient.rpc("Page.enable", undefined);
chromeClient.rpc("Page.setLifecycleEventsEnabled", {
enabled: true
});
chromeClient.rpc("Performance.enable", undefined);
}).then(function () {
/*
* this function will resolve <chromeClient>
*/
return chromeClient;
});
}
/*
* quickstart-example
*
* this example uses chromeClient from chromeDevtoolsClientCreate()
* to direct chrome-browser to
* 1. navigate https://www.amazon.com
* 2. save screenshot of webpage
* 3. save entire rendered html-content
* 4. save all href-links
*/
(async function () {
let chromeClient;
let chromeFrameId;
let data;
let fs;
let url;
fs = require("fs").promises;
// init chromeClient and start chrome-browser with blank-webpage
chromeClient = await chromeDevtoolsClientCreate({
timeout: 30000
});
/*
// disable https-certificate if behind firewall
chromeClient.rpc("Security.setIgnoreCertificateErrors", {
ignore: true
});
*/
// navigate webpage to url
url = "https://www.amazon.com/?foo=1&bar=2";
console.error("chrome-devtools - navigate webpage to url " + url);
chromeClient.rpc("Page.navigate", {
url
});
// wait for "load" event to fire in webpage
data = await chromeClient.rpc("Page.getFrameTree");
chromeFrameId = data.frameTree.frame.id;
await new Promise(function (resolve) {
chromeClient.on("Page.lifecycleEvent", function onLoad({
frameId,
name
}) {
if (frameId === chromeFrameId && name === "load") {
chromeClient.removeListener(
"Page.lifecycleEvent",
onLoad
);
resolve();
}
});
});
// wait 1000 ms for webpage to render
await new Promise(function (resolve) {
setTimeout(resolve, 1000);
});
// save screenshot of webpage to screenshot.png
data = await chromeClient.rpc("Page.captureScreenshot", {
format: "png"
});
assertOrThrow(!data.exceptionDetails, data.exceptionDetails);
await fs.writeFile("screenshot.png", Buffer.from(data.data, "base64"));
console.error("chrome-devtools - wrote file screenshot.png");
// save rendered html-content to content.html
data = await chromeClient.rpc("Runtime.evaluate", {
awaitPromise: true,
expression: "document.documentElement.outerHTML",
returnByValue: false
});
assertOrThrow(!data.exceptionDetails, data.exceptionDetails);
await fs.writeFile("content.html", data.result.value);
console.error("chrome-devtools - wrote file content.html");
// use css-selector to scrape-and-save href-links in webpage to links.txt
data = await chromeClient.rpc("Runtime.evaluate", {
awaitPromise: true,
expression: (
"JSON.stringify(Array.from("
+ " document.querySelectorAll(\"[href]\")"
+ ").map(function (elem) {"
+ " return elem.href;"
+ "}), undefined, 4);"
),
returnByValue: false
});
assertOrThrow(!data.exceptionDetails, data.exceptionDetails);
await fs.writeFile("links.txt", data.result.value);
console.error("chrome-devtools - wrote file links.txt");
// cleanup chromeClient, kill chrome-process, destroy websocket
chromeClient.destroy();
/* jslint ignore:start */
/*
* stderr output
C:\temp>node.exe chrome-devtools-client.js
chrome-devtools - spawning C:\Program Files (x86)\Google\Chrome\Application\chrome.exe
DevTools listening on ws://127.0.0.1:63138/devtools/browser/1ee66054-71db-428d-be13-016c072d60b5
chrome-devtools - connecting to websocket ws://127.0.0.1:63138/devtools/browser/1ee66054-71db-428d-be13-016c072d60b5
chrome-devtools - creating blank webpage
chrome-devtools - navigate webpage to url https://www.amazon.com/?foo=1&bar=2
...
chrome-devtools - wrote file screenshot.png
chrome-devtools - wrote file content.html
chrome-devtools - wrote file links.txt
C:\temp>*/
/* jslint ignore:end */
}());
@kaizhu256
Copy link
Author

screenshot of script running in windows (works in linux as well -- and mostly likely macos too)

screenshot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment