Last active
January 21, 2024 20:38
-
-
Save renoirb/8d255b9b4a90e83332134dd63af34d1f to your computer and use it in GitHub Desktop.
Extract data from Proton Mail
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Extract all messages from left panel | |
* | |
* We can click on "next" page, and re-run | |
*/ | |
/** | |
* .item-firstline > .item-senders > [data-testid="message-column:sender-address"][title], | |
* .item-firstline > .item-senders > [data-testid="message-column:sender-address"].textContent, | |
* | |
* .item-secondline > .item-subject > [data-testid="message-column:subject"][title][role="heading"], | |
* .item-secondline > .item-icons > .item-meta-infos > ul > li, | |
*/ | |
var LINES = [ | |
[ | |
// Yesterday | |
'sentDateText', | |
'.delight-item-firstline', | |
'.delight-item-firstline-infos time[datetime]', | |
(n) => n.textContent, | |
], | |
[ | |
// Thursday, August 19th, 2021 at 20:11 | |
'sentDateTime', | |
'.delight-item-firstline', | |
'.delight-item-firstline-infos time[datetime]', | |
(n) => n.getAttribute('datetime'), | |
], | |
[ | |
'senderEmails', | |
'.delight-item-firstline', | |
'.item-senders [data-testid="message-column:sender-address"][title]', | |
(n) => n.getAttribute('title').split(', '), | |
], | |
[ | |
'senderNames', | |
'.delight-item-firstline', | |
'.item-senders [data-testid="message-column:sender-address"][title]', | |
(n) => n.textContent.split(','), | |
], | |
[ | |
'subject', | |
'.delight-item-secondline', | |
'.item-subject [role="heading"][data-testid="message-column:subject"]', | |
(n) => n.textContent, | |
], | |
[ | |
'folder', | |
'.delight-item-secondline', | |
// '.item-subject > span:not([role="heading"])' /* To get one or many, but would need more work */, | |
'.item-subject span.flex span[data-testid^="item-location"]' /* Just get the first one, even though there can be many*/, | |
(n) => { | |
let folder = null | |
try { | |
const testing = n && n.hasAttribute('data-testid') ? n?.textContent : ''; | |
folder = testing | |
} catch (e) { | |
// nothing | |
throw new Error(e) | |
} | |
console.log('folder', { element: n, folder }) | |
return folder | |
}, | |
], | |
/** | |
* This won't work, the DOM does not have a list of labels, only the first | |
* | |
* ['labels', '.item-secondline > .item-icons > .item-meta-infos > ul', (n) => Array.from(n.childNodes);], | |
*/ | |
] | |
let MESSAGE_LIST_PARENT_SELECTOR = '.delight-items-column-list-inner.delight-items-column-list-inner--mail .delight-items-column-list-container div' | |
let rows = [] | |
let MESSAGE_LIST = document.querySelector(MESSAGE_LIST_PARENT_SELECTOR) | |
let tryMessageListParentSelector = (selector) => { | |
const parent = document.querySelectorAll(selector ?? MESSAGE_LIST_PARENT_SELECTOR)[0] ?? [] | |
const shouldBeNonZero = Array.isArray(parent) ? [].length : parent.childNodes.length | |
if (shouldBeNonZero > 0) { | |
MESSAGE_LIST = document.querySelector(selector) | |
MESSAGE_LIST_PARENT_SELECTOR = selector | |
} else { | |
const message = `Selector did not find a div with many children for messages` | |
throw new Error(message) | |
} | |
console.log('tryMessageListParent', { selector, parent, shouldBeNonZero, passed: shouldBeNonZero > 0 }) | |
} | |
tryMessageListParentSelector(MESSAGE_LIST_PARENT_SELECTOR) | |
/** | |
* Run the following for each page. Click manually, then invoke this, filling the "rows" array. | |
*/ | |
var appendToRows = () => | |
Array.from(MESSAGE_LIST?.childNodes).forEach((MESSAGE_DOM_NODE, i) => { | |
console.debug(`row ${i}.0`, { MESSAGE_DOM_NODE, i }) | |
const data = Object.create(null) | |
for (const [fieldName, rowSel, detailSel, closure] of LINES) { | |
console.debug(`\n\nrow ${fieldName} ${i}.`) | |
const rowNode = MESSAGE_DOM_NODE.querySelector(rowSel) | |
const localSel = `${MESSAGE_LIST_PARENT_SELECTOR} ${detailSel}` | |
console.debug(`row ${fieldName} ${i}.1`, { rowSel, localSel, detailSel, rowNode }) | |
if (rowNode) { | |
const detailRowNode = rowNode.querySelector(detailSel) ?? null | |
console.debug(`row ${fieldName} ${i}.2`, { rowNode, detailRowNode, closure, aweile: detailRowNode !== null }) | |
if (detailRowNode !== null) { | |
let value = null | |
try { | |
value = closure.call(rowNode, detailRowNode) | |
console.debug(`row ${fieldName} ${i}.2a`, { value }) | |
} catch(e) { | |
// nothing | |
console.error(`Error at row ${fieldName} ${i}.3: ${e}`, { value }) | |
value = null | |
} | |
Object.assign(data, { [fieldName]: value }) | |
console.debug(`row ${fieldName} ${i}.3`, { value }) | |
} | |
} else { | |
const message = `Error at row ${i} for rowNode` | |
throw new Error(message) | |
} | |
} | |
rows.push(data) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* From a message, click on "more" and "message headers" | |
* | |
* Run this command. | |
*/ | |
var extractEmailHeaders = (selector = 'pre') => { | |
const elRef = document.querySelector(selector) | |
const headers = new Map() | |
const eachLineHeaderNameParts = [] | |
var extractHeaderName = (input) => { | |
// Support when there's maybe more than one ":" | |
// e.g. "Subject: Re: Fooo" | |
const splitted = input.split(':') | |
let headerName = splitted.length > 1 ? splitted[0] : '' | |
return headerName | |
} | |
if (elRef) { | |
const textContent = elRef.textContent | |
if (/^[A-Z-]+:/i.test(textContent)) { | |
let headerNameForContinuation = '' | |
const RE_HEADER_SEP = /\r?\n/ | |
const lines = textContent.split(RE_HEADER_SEP) | |
for (let i = 0; i < lines.length; i++) { | |
const cur = lines[i] | |
const next = lines[i + 1] | |
if (cur === '') { | |
// Next line is an empty string, | |
// we are no longer in headers | |
break | |
} | |
let headerName = extractHeaderName(cur) | |
eachLineHeaderNameParts.push(headerName) | |
const curIsMultiLineContinuation = /^\s/.test(cur) | |
const nextIsMultiLine = /^\s/.test(next) | |
if (nextIsMultiLine) { | |
// TODO? | |
// Not supporting multi-line as of now | |
// Also not supporting when more than one header with same name. | |
if (!curIsMultiLineContinuation) { | |
headerNameForContinuation = headerName | |
} | |
} else { | |
if (!curIsMultiLineContinuation) { | |
headerNameForContinuation = '' | |
if (!/^x-pm/i.test(headerName)) { | |
const headerData = cur.replace(`${headerName}: `, '') | |
headerName = headerName.toLocaleLowerCase().trim() | |
headers.set(headerName, headerData) | |
} | |
} | |
} | |
} | |
// console.log('eachLineHeaderNameParts', eachLineHeaderNameParts) | |
} | |
} | |
return headers | |
} | |
/** | |
* From the result of extractEmailHeaders, | |
* use this to format the headers you want to extract. | |
*/ | |
var stringifyUsefulHeaders = (map) => { | |
const lines = [] | |
const copy = new Map(map) | |
const addLine = (headerName) => { | |
const headerData = copy.get(headerName) | |
if (headerData) { | |
copy.delete(headerName) | |
lines.push(`${headerName}: ${headerData}`) | |
} | |
} | |
addLine('from') | |
addLine('subject') | |
addLine('to') | |
addLine('reply-to') | |
addLine('x-original-to') | |
addLine('delivered-to') | |
addLine('x-attached') | |
// for (const [headerName, headerData] of map) { | |
// console.log('stringifyUsefulHeaders', headerName, headerData) | |
// } | |
return lines.join('\n') | |
} |
jQ queries
Finding based on name in one of the senders
jq '.[] | select(.senderNames[0] | contains("amazon"))'
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Process
appendToRows()
for each page.rows
in Dev-Toolscopy(rows)
in Dev-Tools and paste into an empty file the contents, that'll be useful to see the subject, sender, etc.extractEmailHeaders()
var headers = extractEmailHeaders()
stringifyUsefulHeaders(headers)
With all of this, it should help to figure out all the variations of headers from the same sender but for different purposes.
Example output
Extract from message list
Extract from message headers