Last active
October 12, 2023 23:15
-
-
Save Semmu/06c2a808ff93ba47d7118a95371c53ae to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. replace `!!!` everywhere with meaningful values | |
- your cookie in the fetch headers | |
- your tokens in the fetch URLs | |
grab them from your browser devtools network tab and copy the requests as nodejs fetch commands to make it easy | |
2. first run `node scrape_archived.js` | |
3. then run `./scrape.sh` | |
FYI it only scrapes the archived messages of your saved items (saved messages has 3 possible states: in-progress, complete, and archived) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"type": "module", | |
"name": "slack-scrape", | |
"version": "1.0.0", | |
"description": "", | |
"main": "scrape.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
node scrape_msgs.js | |
while [[ $? -ne 0 ]]; do | |
echo sleeping... | |
sleep 60 | |
node scrape_msgs.js | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from "fs" | |
const d = console.log | |
const HEADERS = { | |
"accept": "*/*", | |
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,hu;q=0.7", | |
"content-type": "multipart/form-data; boundary=----WebKitFormBoundary7VmY11kTIBACOPhl", | |
"sec-ch-ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", | |
"sec-ch-ua-mobile": "?0", | |
"sec-ch-ua-platform": "\"macOS\"", | |
"sec-fetch-dest": "empty", | |
"sec-fetch-mode": "cors", | |
"sec-fetch-site": "same-site", | |
"sec-gpc": "1", | |
"cookie": "!!!" | |
} | |
const getSavedItems = async (limit = 5, cursor = null, filter = 'saved') => { | |
return await (await fetch("https://cppftw.slack.com/api/saved.list?!!!", { | |
"headers": HEADERS, | |
"referrerPolicy": "no-referrer", | |
"body": `------WebKitFormBoundary7VmY11kTIBACOPhl | |
} | |
Content-Disposition: form-data; name=\"token\" | |
!!! | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"limit\" | |
${limit} | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"filter\" | |
${filter}${ cursor ? ` | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"cursor\" | |
${cursor}` : ''} | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"include_tombstones\" | |
true | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"_x_reason\" | |
saved-api/savedList | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"_x_mode\" | |
online | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"_x_sonic\" | |
true | |
------WebKitFormBoundary7VmY11kTIBACOPhl | |
Content-Disposition: form-data; name=\"_x_app_name\" | |
client | |
------WebKitFormBoundary7VmY11kTIBACOPhl--`, | |
"method": "POST" | |
})).json() | |
} | |
let msgs = new Set() | |
let response = null | |
let cursor = null | |
do { | |
d('call with', {cursor}) | |
response = await getSavedItems(50, cursor, 'archived') | |
// d(response) | |
d(`got ${response['saved_items'].length} msgs:`, response['saved_items']) | |
response['saved_items'].forEach(msg => { | |
msgs.add(JSON.stringify(msg)) | |
}) | |
cursor = response['response_metadata']['next_cursor'] | |
} while (cursor) | |
// d(JSON.stringify(Array.from(msgs))) | |
fs.writeFileSync('archived.json', JSON.stringify(Array.from(msgs).map(msg => JSON.parse(msg)), null, 2)); | |
d(msgs.size) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from "fs" | |
import {execSync} from "child_process" | |
const HEADERS = { | |
"accept": "*/*", | |
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,hu;q=0.7", | |
"content-type": "multipart/form-data; boundary=----WebKitFormBoundaryuCFwDhhrnEFlx8a8", | |
"sec-ch-ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", | |
"sec-ch-ua-mobile": "?0", | |
"sec-ch-ua-platform": "\"macOS\"", | |
"sec-fetch-dest": "empty", | |
"sec-fetch-mode": "cors", | |
"sec-fetch-site": "same-site", | |
"sec-gpc": "1", | |
"cookie": "!!!" | |
} | |
const d = console.log | |
let msgs = JSON.parse(fs.readFileSync('archived.json').toString()) | |
// msgs.length = 50 | |
const getMessage = async (channel, timestamp) => { | |
return await (await fetch("https://cppftw.slack.com/api/messages.list?!!!", { | |
"headers": HEADERS, | |
"referrerPolicy": "no-referrer", | |
"body": `------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"token\" | |
!!! | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"message_ids\" | |
[{\"channel\":\"${channel}\",\"timestamps\":[\"${timestamp}\"]}] | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"org_wide_aware\" | |
true | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"_x_reason\" | |
messages-ufm | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"_x_mode\" | |
online | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"_x_sonic\" | |
true | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8 | |
Content-Disposition: form-data; name=\"_x_app_name\" | |
client | |
------WebKitFormBoundaryuCFwDhhrnEFlx8a8-- | |
`, | |
"method": "POST" | |
})).json() | |
} | |
let count = 0 | |
msgs.forEach(async (msg) => { | |
if (msg['item_type'] === 'message') { | |
const filename = `${msg['item_id']}---${msg['ts']}.json` | |
if (fs.existsSync(filename)) { | |
d(`- skipping ${filename}`) | |
} else { | |
const resp = await getMessage(msg['item_id'], msg['ts']) | |
if (!resp['ok']) { | |
d(`error for ${filename}`, resp) | |
throw "resp not ok" | |
} else { | |
fs.writeFileSync(filename, JSON.stringify(resp, null, 2)) | |
count += 1 | |
d({count, filename}) | |
execSync('sleep 1') | |
} | |
} | |
} | |
}) | |
d({count}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment