Skip to content

Instantly share code, notes, and snippets.

@davecra
Created July 6, 2024 04:14
Show Gist options
  • Save davecra/40c6b42237919eb5ef7a9dbf0afcfbef to your computer and use it in GitHub Desktop.
Save davecra/40c6b42237919eb5ef7a9dbf0afcfbef to your computer and use it in GitHub Desktop.
Outlook Email Body Parser
export default class OutlookEmailBodyParser {
/** @type {String} */
#body = null;
/**
* Creates an instance of the Outlook Email Body Parser
* Next you call:
* - getLatestResponse() to get the most recent message
* @param {String} body
*/
constructor(body) {
this.#body = body;
}
/**
* Returns the latest response
*/
getLatestResponse = () => {
const lines = this.#body.replace("/\r/g", "\n").split("\n");
// any line that starts with a word, a color and a space, like From: , to: , Cc:, Date:
const prologLine = /(^[A-Z]{2,8}:\s)/i;
// any line that ends with a colon
const lineEndsWithColon = /^.+(:)$/;
// covers general patterns of first.last@email.domain
const emailRegex = /[a-zA-Z0-9._-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,6}/;
// covers -------, _______, -----original message----, _______PREV______
const breakRegex = /^([-_]+(\w{0,15}(\s|\s{0})){0,3}[-_]+)$/;
// looks for a 4 digit number on the line, we grab it and see if it looks reasonable
// meaning in compare is the 4 digit number starting with 19 or 20.
const containsYear = /([/\s][0-9]{4})/;
// looks for a time in formats 4:44, 16:44, 4:44 AM, 4:44 PM
// we then look at capture groups to verify in range
const containsTime = /\s([0-9]{1,2}):([0-9]{2})(\w{2}|\s\w{2}|:[0-9]{2}|.{0})/;
// -- START --
var breakOnLine = "";
var fFoundLineBreak = false;
var candidateLines = 0;
var prevLine = "";
for (const line of lines) {
var gmailFoundCount = 0;
var outlookFoundCount = 0;
if (breakRegex.test(line)) {
fFoundLineBreak = true;
candidateLines++;
prevLine = line;
continue;
}
if (emailRegex.test(line)) {
gmailFoundCount++;
outlookFoundCount++;
}
if (prologLine.test(line)) outlookFoundCount++;
if (lineEndsWithColon.test(line)) gmailFoundCount++;
if (containsYear.test(line)) {
const year = containsYear.exec(line);
if (year.length === 2) {
const num = Number.parseInt(year[0].trim());
if (!Number.isNaN(num) && num > 1900 && num < 2100) {
gmailFoundCount++;
outlookFoundCount++;);
}
}
}
if (containsTime.test(line)) {
const time = containsTime.exec(line);
if (time.length >= 3) {
const hour = Number.parseInt(time[1].trim());
const min = Number.parseInt(time[2].trim());
if (!Number.isNaN(hour) && !Number.isNaN(min) && hour >= 0 && hour <= 23 && min >= 0 && min <= 59) {
gmailFoundCount++;
outlookFoundCount++;
}
}
}
if (fFoundLineBreak) outlookFoundCount++;
////////////////////////////////////
// VALIDATE OUTLOOK
////////////////////////////////////
if (candidateLines >= 1 && outlookFoundCount > 2) {
breakOnLine = prevLine;
break;
}
////////////////////////////////////
// VALIDATE GMAIL
////////////////////////////////////
if (gmailFoundCount >= 4) {
breakOnLine = line;
break;
}
if (candidateLines === 1 && gmailFoundCount === 1) {
breakOnLine = prevLine;
break;
}
////////////////////////////////////
// reset
////////////////////////////////////
if (candidateLines === 3) candidateLines = 0;
if (outlookFoundCount >= 2) candidateLines++;
if (gmailFoundCount === 3) candidateLines++;
if (candidateLines === 1) prevLine = line;
fFoundLineBreak = false; // must set here
}
var latestResponse = this.#body;
if (breakOnLine) {
const pos = this.#body.indexOf(breakOnLine);
latestResponse = this.#body.substring(0, pos);
}
// return
return latestResponse;
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment