Created
July 6, 2024 04:14
-
-
Save davecra/40c6b42237919eb5ef7a9dbf0afcfbef to your computer and use it in GitHub Desktop.
Outlook Email Body Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export default class OutlookEmailBodyParser { | |
/** @type {String} */ | |
#body = null; | |
/** | |
* Creates an instance of the Outlook Email Body Parser | |
* Next you call: | |
* - getLatestResponse() to get the most recent message | |
* @param {String} body | |
*/ | |
constructor(body) { | |
this.#body = body; | |
} | |
/** | |
* Returns the latest response | |
*/ | |
getLatestResponse = () => { | |
const lines = this.#body.replace("/\r/g", "\n").split("\n"); | |
// any line that starts with a word, a color and a space, like From: , to: , Cc:, Date: | |
const prologLine = /(^[A-Z]{2,8}:\s)/i; | |
// any line that ends with a colon | |
const lineEndsWithColon = /^.+(:)$/; | |
// covers general patterns of first.last@email.domain | |
const emailRegex = /[a-zA-Z0-9._-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,6}/; | |
// covers -------, _______, -----original message----, _______PREV______ | |
const breakRegex = /^([-_]+(\w{0,15}(\s|\s{0})){0,3}[-_]+)$/; | |
// looks for a 4 digit number on the line, we grab it and see if it looks reasonable | |
// meaning in compare is the 4 digit number starting with 19 or 20. | |
const containsYear = /([/\s][0-9]{4})/; | |
// looks for a time in formats 4:44, 16:44, 4:44 AM, 4:44 PM | |
// we then look at capture groups to verify in range | |
const containsTime = /\s([0-9]{1,2}):([0-9]{2})(\w{2}|\s\w{2}|:[0-9]{2}|.{0})/; | |
// -- START -- | |
var breakOnLine = ""; | |
var fFoundLineBreak = false; | |
var candidateLines = 0; | |
var prevLine = ""; | |
for (const line of lines) { | |
var gmailFoundCount = 0; | |
var outlookFoundCount = 0; | |
if (breakRegex.test(line)) { | |
fFoundLineBreak = true; | |
candidateLines++; | |
prevLine = line; | |
continue; | |
} | |
if (emailRegex.test(line)) { | |
gmailFoundCount++; | |
outlookFoundCount++; | |
} | |
if (prologLine.test(line)) outlookFoundCount++; | |
if (lineEndsWithColon.test(line)) gmailFoundCount++; | |
if (containsYear.test(line)) { | |
const year = containsYear.exec(line); | |
if (year.length === 2) { | |
const num = Number.parseInt(year[0].trim()); | |
if (!Number.isNaN(num) && num > 1900 && num < 2100) { | |
gmailFoundCount++; | |
outlookFoundCount++;); | |
} | |
} | |
} | |
if (containsTime.test(line)) { | |
const time = containsTime.exec(line); | |
if (time.length >= 3) { | |
const hour = Number.parseInt(time[1].trim()); | |
const min = Number.parseInt(time[2].trim()); | |
if (!Number.isNaN(hour) && !Number.isNaN(min) && hour >= 0 && hour <= 23 && min >= 0 && min <= 59) { | |
gmailFoundCount++; | |
outlookFoundCount++; | |
} | |
} | |
} | |
if (fFoundLineBreak) outlookFoundCount++; | |
//////////////////////////////////// | |
// VALIDATE OUTLOOK | |
//////////////////////////////////// | |
if (candidateLines >= 1 && outlookFoundCount > 2) { | |
breakOnLine = prevLine; | |
break; | |
} | |
//////////////////////////////////// | |
// VALIDATE GMAIL | |
//////////////////////////////////// | |
if (gmailFoundCount >= 4) { | |
breakOnLine = line; | |
break; | |
} | |
if (candidateLines === 1 && gmailFoundCount === 1) { | |
breakOnLine = prevLine; | |
break; | |
} | |
//////////////////////////////////// | |
// reset | |
//////////////////////////////////// | |
if (candidateLines === 3) candidateLines = 0; | |
if (outlookFoundCount >= 2) candidateLines++; | |
if (gmailFoundCount === 3) candidateLines++; | |
if (candidateLines === 1) prevLine = line; | |
fFoundLineBreak = false; // must set here | |
} | |
var latestResponse = this.#body; | |
if (breakOnLine) { | |
const pos = this.#body.indexOf(breakOnLine); | |
latestResponse = this.#body.substring(0, pos); | |
} | |
// return | |
return latestResponse; | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment