Skip to content

Instantly share code, notes, and snippets.

@Semmu
Last active October 12, 2023 23:15
Show Gist options
  • Save Semmu/06c2a808ff93ba47d7118a95371c53ae to your computer and use it in GitHub Desktop.
Save Semmu/06c2a808ff93ba47d7118a95371c53ae to your computer and use it in GitHub Desktop.
1. replace `!!!` everywhere with meaningful values
- your cookie in the fetch headers
- your tokens in the fetch URLs
grab them from your browser devtools network tab and copy the requests as nodejs fetch commands to make it easy
2. first run `node scrape_archived.js`
3. then run `./scrape.sh`
FYI it only scrapes the archived messages of your saved items (saved messages has 3 possible states: in-progress, complete, and archived)
{
"type": "module",
"name": "slack-scrape",
"version": "1.0.0",
"description": "",
"main": "scrape.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC"
}
#!/bin/zsh
node scrape_msgs.js
while [[ $? -ne 0 ]]; do
echo sleeping...
sleep 60
node scrape_msgs.js
done
import fs from "fs"
const d = console.log
const HEADERS = {
"accept": "*/*",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,hu;q=0.7",
"content-type": "multipart/form-data; boundary=----WebKitFormBoundary7VmY11kTIBACOPhl",
"sec-ch-ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"sec-gpc": "1",
"cookie": "!!!"
}
const getSavedItems = async (limit = 5, cursor = null, filter = 'saved') => {
return await (await fetch("https://cppftw.slack.com/api/saved.list?!!!", {
"headers": HEADERS,
"referrerPolicy": "no-referrer",
"body": `------WebKitFormBoundary7VmY11kTIBACOPhl
}
Content-Disposition: form-data; name=\"token\"
!!!
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"limit\"
${limit}
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"filter\"
${filter}${ cursor ? `
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"cursor\"
${cursor}` : ''}
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"include_tombstones\"
true
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"_x_reason\"
saved-api/savedList
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"_x_mode\"
online
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"_x_sonic\"
true
------WebKitFormBoundary7VmY11kTIBACOPhl
Content-Disposition: form-data; name=\"_x_app_name\"
client
------WebKitFormBoundary7VmY11kTIBACOPhl--`,
"method": "POST"
})).json()
}
let msgs = new Set()
let response = null
let cursor = null
do {
d('call with', {cursor})
response = await getSavedItems(50, cursor, 'archived')
// d(response)
d(`got ${response['saved_items'].length} msgs:`, response['saved_items'])
response['saved_items'].forEach(msg => {
msgs.add(JSON.stringify(msg))
})
cursor = response['response_metadata']['next_cursor']
} while (cursor)
// d(JSON.stringify(Array.from(msgs)))
fs.writeFileSync('archived.json', JSON.stringify(Array.from(msgs).map(msg => JSON.parse(msg)), null, 2));
d(msgs.size)
import fs from "fs"
import {execSync} from "child_process"
const HEADERS = {
"accept": "*/*",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,hu;q=0.7",
"content-type": "multipart/form-data; boundary=----WebKitFormBoundaryuCFwDhhrnEFlx8a8",
"sec-ch-ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"sec-gpc": "1",
"cookie": "!!!"
}
const d = console.log
let msgs = JSON.parse(fs.readFileSync('archived.json').toString())
// msgs.length = 50
const getMessage = async (channel, timestamp) => {
return await (await fetch("https://cppftw.slack.com/api/messages.list?!!!", {
"headers": HEADERS,
"referrerPolicy": "no-referrer",
"body": `------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"token\"
!!!
------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"message_ids\"
[{\"channel\":\"${channel}\",\"timestamps\":[\"${timestamp}\"]}]
------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"org_wide_aware\"
true
------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"_x_reason\"
messages-ufm
------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"_x_mode\"
online
------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"_x_sonic\"
true
------WebKitFormBoundaryuCFwDhhrnEFlx8a8
Content-Disposition: form-data; name=\"_x_app_name\"
client
------WebKitFormBoundaryuCFwDhhrnEFlx8a8--
`,
"method": "POST"
})).json()
}
let count = 0
msgs.forEach(async (msg) => {
if (msg['item_type'] === 'message') {
const filename = `${msg['item_id']}---${msg['ts']}.json`
if (fs.existsSync(filename)) {
d(`- skipping ${filename}`)
} else {
const resp = await getMessage(msg['item_id'], msg['ts'])
if (!resp['ok']) {
d(`error for ${filename}`, resp)
throw "resp not ok"
} else {
fs.writeFileSync(filename, JSON.stringify(resp, null, 2))
count += 1
d({count, filename})
execSync('sleep 1')
}
}
}
})
d({count})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment