Skip to content

Instantly share code, notes, and snippets.

Last active January 21, 2024 20:38
Show Gist options
  • Save renoirb/8d255b9b4a90e83332134dd63af34d1f to your computer and use it in GitHub Desktop.
Save renoirb/8d255b9b4a90e83332134dd63af34d1f to your computer and use it in GitHub Desktop.
Extract data from Proton Mail
* Extract all messages from left panel
* We can click on "next" page, and re-run
* .item-firstline > .item-senders > [data-testid="message-column:sender-address"][title],
* .item-firstline > .item-senders > [data-testid="message-column:sender-address"].textContent,
* .item-secondline > .item-subject > [data-testid="message-column:subject"][title][role="heading"],
* .item-secondline > .item-icons > .item-meta-infos > ul > li,
var LINES = [
// Yesterday
'.delight-item-firstline-infos time[datetime]',
(n) => n.textContent,
// Thursday, August 19th, 2021 at 20:11
'.delight-item-firstline-infos time[datetime]',
(n) => n.getAttribute('datetime'),
'.item-senders [data-testid="message-column:sender-address"][title]',
(n) => n.getAttribute('title').split(', '),
'.item-senders [data-testid="message-column:sender-address"][title]',
(n) => n.textContent.split(','),
'.item-subject [role="heading"][data-testid="message-column:subject"]',
(n) => n.textContent,
// '.item-subject > span:not([role="heading"])' /* To get one or many, but would need more work */,
'.item-subject span.flex span[data-testid^="item-location"]' /* Just get the first one, even though there can be many*/,
(n) => {
let folder = null
try {
const testing = n && n.hasAttribute('data-testid') ? n?.textContent : '';
folder = testing
} catch (e) {
// nothing
throw new Error(e)
console.log('folder', { element: n, folder })
return folder
* This won't work, the DOM does not have a list of labels, only the first
* ['labels', '.item-secondline > .item-icons > .item-meta-infos > ul', (n) => Array.from(n.childNodes);],
let MESSAGE_LIST_PARENT_SELECTOR = '.delight-items-column-list-inner.delight-items-column-list-inner--mail .delight-items-column-list-container div'
let rows = []
let tryMessageListParentSelector = (selector) => {
const parent = document.querySelectorAll(selector ?? MESSAGE_LIST_PARENT_SELECTOR)[0] ?? []
const shouldBeNonZero = Array.isArray(parent) ? [].length : parent.childNodes.length
if (shouldBeNonZero > 0) {
MESSAGE_LIST = document.querySelector(selector)
} else {
const message = `Selector did not find a div with many children for messages`
throw new Error(message)
console.log('tryMessageListParent', { selector, parent, shouldBeNonZero, passed: shouldBeNonZero > 0 })
* Run the following for each page. Click manually, then invoke this, filling the "rows" array.
var appendToRows = () =>
Array.from(MESSAGE_LIST?.childNodes).forEach((MESSAGE_DOM_NODE, i) => {
console.debug(`row ${i}.0`, { MESSAGE_DOM_NODE, i })
const data = Object.create(null)
for (const [fieldName, rowSel, detailSel, closure] of LINES) {
console.debug(`\n\nrow ${fieldName} ${i}.`)
const rowNode = MESSAGE_DOM_NODE.querySelector(rowSel)
const localSel = `${MESSAGE_LIST_PARENT_SELECTOR} ${detailSel}`
console.debug(`row ${fieldName} ${i}.1`, { rowSel, localSel, detailSel, rowNode })
if (rowNode) {
const detailRowNode = rowNode.querySelector(detailSel) ?? null
console.debug(`row ${fieldName} ${i}.2`, { rowNode, detailRowNode, closure, aweile: detailRowNode !== null })
if (detailRowNode !== null) {
let value = null
try {
value =, detailRowNode)
console.debug(`row ${fieldName} ${i}.2a`, { value })
} catch(e) {
// nothing
console.error(`Error at row ${fieldName} ${i}.3: ${e}`, { value })
value = null
Object.assign(data, { [fieldName]: value })
console.debug(`row ${fieldName} ${i}.3`, { value })
} else {
const message = `Error at row ${i} for rowNode`
throw new Error(message)
* From a message, click on "more" and "message headers"
* Run this command.
var extractEmailHeaders = (selector = 'pre') => {
const elRef = document.querySelector(selector)
const headers = new Map()
const eachLineHeaderNameParts = []
var extractHeaderName = (input) => {
// Support when there's maybe more than one ":"
// e.g. "Subject: Re: Fooo"
const splitted = input.split(':')
let headerName = splitted.length > 1 ? splitted[0] : ''
return headerName
if (elRef) {
const textContent = elRef.textContent
if (/^[A-Z-]+:/i.test(textContent)) {
let headerNameForContinuation = ''
const RE_HEADER_SEP = /\r?\n/
const lines = textContent.split(RE_HEADER_SEP)
for (let i = 0; i < lines.length; i++) {
const cur = lines[i]
const next = lines[i + 1]
if (cur === '') {
// Next line is an empty string,
// we are no longer in headers
let headerName = extractHeaderName(cur)
const curIsMultiLineContinuation = /^\s/.test(cur)
const nextIsMultiLine = /^\s/.test(next)
if (nextIsMultiLine) {
// TODO?
// Not supporting multi-line as of now
// Also not supporting when more than one header with same name.
if (!curIsMultiLineContinuation) {
headerNameForContinuation = headerName
} else {
if (!curIsMultiLineContinuation) {
headerNameForContinuation = ''
if (!/^x-pm/i.test(headerName)) {
const headerData = cur.replace(`${headerName}: `, '')
headerName = headerName.toLocaleLowerCase().trim()
headers.set(headerName, headerData)
// console.log('eachLineHeaderNameParts', eachLineHeaderNameParts)
return headers
* From the result of extractEmailHeaders,
* use this to format the headers you want to extract.
var stringifyUsefulHeaders = (map) => {
const lines = []
const copy = new Map(map)
const addLine = (headerName) => {
const headerData = copy.get(headerName)
if (headerData) {
lines.push(`${headerName}: ${headerData}`)
// for (const [headerName, headerData] of map) {
// console.log('stringifyUsefulHeaders', headerName, headerData)
// }
return lines.join('\n')
Copy link

renoirb commented Jan 21, 2024

jQ queries

Finding based on name in one of the senders

jq '.[] | select(.senderNames[0] | contains("amazon"))'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment