Skip to content

Instantly share code, notes, and snippets.

@arthurtsang
Last active August 30, 2019 18:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arthurtsang/557b32c568af6fdfd411312b0850a710 to your computer and use it in GitHub Desktop.
Save arthurtsang/557b32c568af6fdfd411312b0850a710 to your computer and use it in GitHub Desktop.
Extracting HTML from GMail
/**
* search for email with gmailSearchQuery (e.g. "to: someone@gmail.com subject: spam")
* and get the raw content of the mail
*/
async searchEmail(gmailSearchQuery: string): Promise<any[]> {
const auth = await this.authorize();
const gmail = google.gmail({ version: 'v1', auth });
return new Promise((resolve, reject) => {
gmail.users.messages.list(
{
userId: 'me',
q: gmailSearchQuery
},
async (err, res) => {
if (err) reject(err);
if (res.data.messages === undefined) {
resolve([]);
} else {
resolve(
await Promise.all(
res.data.messages.map(m => this.getRawMessage(gmail, m.id))
)
);
}
}
);
});
}
/**
* with the message id, get the raw mail content
*/
private getRawMessage(gmail: gmail_v1.Gmail, message_id: string) {
return new Promise((resolve, reject) => {
gmail.users.messages.get(
{
userId: 'me',
id: message_id,
format: 'raw'
},
(err2, res2) => {
if (err2) reject(err2);
resolve({
id: message_id,
raw: res2.data.raw
});
}
);
});
}
/**
* with the raw mail content, extract the HTML and load it to cheerio
*/
getHtmlFromEmailBody(raw: string) {
const message = Buffer.from(raw, 'base64') //convert base64 to text and clean it up
.toString()
.split(/(\r\n|\n|\r)/)
.map(s => s.trim())
.filter(s => s.length > 0)
.map(s => s.replace(/=3D/g, '='));
const formattedMessage: string[] = [];
let currentLine = '';
let start = false;
for (const line of message) {
if (line.indexOf('DOCTYPE html') !== -1) {
start = true;
}
if (start && line.startsWith('----')) {
break;
}
if (start) {
if (line.slice(-1) === '=') {
currentLine += line.slice(0, -1);
} else {
currentLine += line;
formattedMessage.push(currentLine);
currentLine = '';
}
}
}
return cheerio.load(formattedMessage.join());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment