Skip to content

Instantly share code, notes, and snippets.

@renoirb
Last active January 21, 2024 20:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save renoirb/8d255b9b4a90e83332134dd63af34d1f to your computer and use it in GitHub Desktop.
Save renoirb/8d255b9b4a90e83332134dd63af34d1f to your computer and use it in GitHub Desktop.
Extract data from Proton Mail
/**
* Extract all messages from left panel
*
* We can click on "next" page, and re-run
*/
/**
* .item-firstline > .item-senders > [data-testid="message-column:sender-address"][title],
* .item-firstline > .item-senders > [data-testid="message-column:sender-address"].textContent,
*
* .item-secondline > .item-subject > [data-testid="message-column:subject"][title][role="heading"],
* .item-secondline > .item-icons > .item-meta-infos > ul > li,
*/
var LINES = [
[
// Yesterday
'sentDateText',
'.delight-item-firstline',
'.delight-item-firstline-infos time[datetime]',
(n) => n.textContent,
],
[
// Thursday, August 19th, 2021 at 20:11
'sentDateTime',
'.delight-item-firstline',
'.delight-item-firstline-infos time[datetime]',
(n) => n.getAttribute('datetime'),
],
[
'senderEmails',
'.delight-item-firstline',
'.item-senders [data-testid="message-column:sender-address"][title]',
(n) => n.getAttribute('title').split(', '),
],
[
'senderNames',
'.delight-item-firstline',
'.item-senders [data-testid="message-column:sender-address"][title]',
(n) => n.textContent.split(','),
],
[
'subject',
'.delight-item-secondline',
'.item-subject [role="heading"][data-testid="message-column:subject"]',
(n) => n.textContent,
],
[
'folder',
'.delight-item-secondline',
// '.item-subject > span:not([role="heading"])' /* To get one or many, but would need more work */,
'.item-subject span.flex span[data-testid^="item-location"]' /* Just get the first one, even though there can be many*/,
(n) => {
let folder = null
try {
const testing = n && n.hasAttribute('data-testid') ? n?.textContent : '';
folder = testing
} catch (e) {
// nothing
throw new Error(e)
}
console.log('folder', { element: n, folder })
return folder
},
],
/**
* This won't work, the DOM does not have a list of labels, only the first
*
* ['labels', '.item-secondline > .item-icons > .item-meta-infos > ul', (n) => Array.from(n.childNodes);],
*/
]
let MESSAGE_LIST_PARENT_SELECTOR = '.delight-items-column-list-inner.delight-items-column-list-inner--mail .delight-items-column-list-container div'
let rows = []
let MESSAGE_LIST = document.querySelector(MESSAGE_LIST_PARENT_SELECTOR)
let tryMessageListParentSelector = (selector) => {
const parent = document.querySelectorAll(selector ?? MESSAGE_LIST_PARENT_SELECTOR)[0] ?? []
const shouldBeNonZero = Array.isArray(parent) ? [].length : parent.childNodes.length
if (shouldBeNonZero > 0) {
MESSAGE_LIST = document.querySelector(selector)
MESSAGE_LIST_PARENT_SELECTOR = selector
} else {
const message = `Selector did not find a div with many children for messages`
throw new Error(message)
}
console.log('tryMessageListParent', { selector, parent, shouldBeNonZero, passed: shouldBeNonZero > 0 })
}
tryMessageListParentSelector(MESSAGE_LIST_PARENT_SELECTOR)
/**
* Run the following for each page. Click manually, then invoke this, filling the "rows" array.
*/
var appendToRows = () =>
Array.from(MESSAGE_LIST?.childNodes).forEach((MESSAGE_DOM_NODE, i) => {
console.debug(`row ${i}.0`, { MESSAGE_DOM_NODE, i })
const data = Object.create(null)
for (const [fieldName, rowSel, detailSel, closure] of LINES) {
console.debug(`\n\nrow ${fieldName} ${i}.`)
const rowNode = MESSAGE_DOM_NODE.querySelector(rowSel)
const localSel = `${MESSAGE_LIST_PARENT_SELECTOR} ${detailSel}`
console.debug(`row ${fieldName} ${i}.1`, { rowSel, localSel, detailSel, rowNode })
if (rowNode) {
const detailRowNode = rowNode.querySelector(detailSel) ?? null
console.debug(`row ${fieldName} ${i}.2`, { rowNode, detailRowNode, closure, aweile: detailRowNode !== null })
if (detailRowNode !== null) {
let value = null
try {
value = closure.call(rowNode, detailRowNode)
console.debug(`row ${fieldName} ${i}.2a`, { value })
} catch(e) {
// nothing
console.error(`Error at row ${fieldName} ${i}.3: ${e}`, { value })
value = null
}
Object.assign(data, { [fieldName]: value })
console.debug(`row ${fieldName} ${i}.3`, { value })
}
} else {
const message = `Error at row ${i} for rowNode`
throw new Error(message)
}
}
rows.push(data)
})
/**
* From a message, click on "more" and "message headers"
*
* Run this command.
*/
var extractEmailHeaders = (selector = 'pre') => {
const elRef = document.querySelector(selector)
const headers = new Map()
const eachLineHeaderNameParts = []
var extractHeaderName = (input) => {
// Support when there's maybe more than one ":"
// e.g. "Subject: Re: Fooo"
const splitted = input.split(':')
let headerName = splitted.length > 1 ? splitted[0] : ''
return headerName
}
if (elRef) {
const textContent = elRef.textContent
if (/^[A-Z-]+:/i.test(textContent)) {
let headerNameForContinuation = ''
const RE_HEADER_SEP = /\r?\n/
const lines = textContent.split(RE_HEADER_SEP)
for (let i = 0; i < lines.length; i++) {
const cur = lines[i]
const next = lines[i + 1]
if (cur === '') {
// Next line is an empty string,
// we are no longer in headers
break
}
let headerName = extractHeaderName(cur)
eachLineHeaderNameParts.push(headerName)
const curIsMultiLineContinuation = /^\s/.test(cur)
const nextIsMultiLine = /^\s/.test(next)
if (nextIsMultiLine) {
// TODO?
// Not supporting multi-line as of now
// Also not supporting when more than one header with same name.
if (!curIsMultiLineContinuation) {
headerNameForContinuation = headerName
}
} else {
if (!curIsMultiLineContinuation) {
headerNameForContinuation = ''
if (!/^x-pm/i.test(headerName)) {
const headerData = cur.replace(`${headerName}: `, '')
headerName = headerName.toLocaleLowerCase().trim()
headers.set(headerName, headerData)
}
}
}
}
// console.log('eachLineHeaderNameParts', eachLineHeaderNameParts)
}
}
return headers
}
/**
* From the result of extractEmailHeaders,
* use this to format the headers you want to extract.
*/
var stringifyUsefulHeaders = (map) => {
const lines = []
const copy = new Map(map)
const addLine = (headerName) => {
const headerData = copy.get(headerName)
if (headerData) {
copy.delete(headerName)
lines.push(`${headerName}: ${headerData}`)
}
}
addLine('from')
addLine('subject')
addLine('to')
addLine('reply-to')
addLine('x-original-to')
addLine('delivered-to')
addLine('x-attached')
// for (const [headerName, headerData] of map) {
// console.log('stringifyUsefulHeaders', headerName, headerData)
// }
return lines.join('\n')
}
@renoirb
Copy link
Author

renoirb commented Jan 21, 2024

jQ queries

Finding based on name in one of the senders

jq '.[] | select(.senderNames[0] | contains("amazon"))'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment