Skip to content

Instantly share code, notes, and snippets.

@ninjastic
Created January 13, 2024 05:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ninjastic/bc568895778132e1414564147d3a9aaf to your computer and use it in GitHub Desktop.
Save ninjastic/bc568895778132e1414564147d3a9aaf to your computer and use it in GitHub Desktop.
Downloads a JSON with your BitcoinTalk PMs
(async () => {
const decoder = new TextDecoder('windows-1252')
const parser = new DOMParser()
let inbox = true
let outbox = true
let pageToFetch
let shouldFinish
const sleep = async (ms) => await new Promise(resolve => setTimeout(resolve, ms))
const fetchPMs = async (type, index) => {
console.log('fetching', type, 'INDEX', index)
const html = await fetch(`https://bitcointalk.org/index.php?action=pm;f=${type};sort=date;start=${(index - 1) * 20}`).then(async response => decoder.decode(await response.arrayBuffer()))
const $ = parser.parseFromString(html, 'text/html')
const isFirstPage = index === 1
const hasSinglePage = $.querySelectorAll('#bodyarea > table > tbody > tr > td:nth-child(2) > form > div.bordercolor > table > tbody > tr > td a.navPages').length === 0
const hasNextPageAnchor = [...$.querySelectorAll('span.prevnext > a.navPages')].find(element => element.textContent === '»') !== undefined
const isLastPage = hasSinglePage || (!isFirstPage && !hasNextPageAnchor)
const pms = [...$.querySelectorAll("form[name=pmFolder] tr")].filter(element => element.querySelector('a[name*=msg]')).map(pm => {
const data = {}
const header = pm.querySelector("td > table > tbody > tr > td > table > tbody > tr > td > table")
data.type = type
data.title = header.querySelector('td[align=left] > b').textContent
data.date = header.querySelector('td[align=left] > div.smalltext').textContent.match(/on:\s(.*)\s/).at(1)
data.to = header.querySelector('td[align=left] > div.smalltext > a').textContent
data.toUrl = header.querySelector('td[align=left] > div.smalltext > a').getAttribute('href')
data.responded = header.querySelector('tr > td:first-child > div.smalltext:nth-child(3)')?.textContent === '« You have forwarded or responded to this message. »'
data.message = pm.querySelector('.personalmessage').innerHTML
const isGuestMessage = pm.querySelector("td > table > tbody > tr:nth-child(1) > td:nth-child(1) > div.smalltext")?.textContent.trim() === 'Guest'
if (isGuestMessage) {
data.author = pm.querySelector("td > table > tbody > tr:nth-child(1) > td:nth-child(1) > b").textContent
data.authorUrl = ''
} else {
data.author = pm.querySelector('a[title*="View the profile of"]').textContent
data.authorUrl = pm.querySelector('a[title*="View the profile of"]').getAttribute('href')
}
return data
})
return { isLastPage, pms }
}
const downloadJson = (data) => {
const jsonText = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(data));
const downloadAnchorNode = document.createElement('a');
downloadAnchorNode.setAttribute("href", jsonText);
downloadAnchorNode.setAttribute("download", "pms.json");
document.body.appendChild(downloadAnchorNode);
downloadAnchorNode.click();
downloadAnchorNode.remove()
}
const pms = []
if (inbox) {
pageToFetch = 1
shouldFinish = false
do {
const { isLastPage, pms: parsedPms } = await fetchPMs('inbox', pageToFetch)
pms.push(...parsedPms)
shouldFinish = isLastPage
pageToFetch += 1
sleep(1500)
} while (!shouldFinish)
}
if (outbox) {
pageToFetch = 1
shouldFinish = false
do {
const { isLastPage, pms: parsedPms } = await fetchPMs('outbox', pageToFetch)
pms.push(...parsedPms)
shouldFinish = isLastPage
pageToFetch += 1
sleep(1500)
} while (!shouldFinish)
}
console.log('Scraped', pms.length, 'PMs.')
downloadJson(pms)
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment