Last active
October 1, 2019 00:52
-
-
Save humphd/8dcfcadd45befbca0c5c6240f770ed52 to your computer and use it in GitHub Desktop.
Sample Email Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NOTE: updated to fix some bugs on Mon Sept 30, 2019 | |
/** | |
* Constructor function for an Email object. | |
* | |
* @param {String} rawEmail - text of a raw email, with headers and body | |
*/ | |
function Email(rawEmail) { | |
// Step 1: split the raw email text into its two main parts: header and body. | |
// The split occurs at the first empty line (i.e., \n\n) | |
// [ headers, body ] | |
let emailParts = rawEmail.split('\n\n'); | |
let headerString = emailParts[0]; | |
let bodyString = emailParts[1]; | |
// Step 2: split the headers into separate lines | |
// [ 'Return-Path : <DUCKWARE@oregon.uoregon.edui>', ... ] | |
let headersLines = headerString.split('\n'); | |
// Step 3: loop over all the header lines, and break them into two parts, | |
// a header name, and header value. Store these key/value pairs on an | |
// empty object. | |
let headers = {}; | |
headersLines.forEach(function(header) { | |
// \s = whitespace | |
// * = 0 or more | |
// ['Return-Path', '<DUCKWARE@oregon.uoregon.edui>'] | |
let headerParts = header.split(/\s*:\s*/); | |
// Convert all header names to lower case to make it easier to request them later. | |
let headerName = headerParts[0].toLowerCase(); | |
let headerValue = headerParts[1]; | |
headers[headerName] = headerValue; | |
}); | |
// Step 4: add some convenience properties to the email object, copying | |
// things we commonly need from the headers onto the email. NOTE: by default | |
// the value of `this` will change inside a .forEach() call, so we first | |
// get a reference to `this` for the instance before we start. | |
let instance = this; | |
['from', 'to', 'subject', 'date'].forEach(function(keyword) { | |
// Only bother if this header is present on the email | |
if(headers[keyword]) { | |
instance[keyword] = headers[keyword]; | |
} | |
}); | |
// Step 5: store the body and headers on the instance object | |
// so tha we want don't lose them when the constructor function exists. | |
this.body = bodyString; | |
this.headers = headers; | |
} | |
/** | |
* The shared toRaw() method takes an email object and turns it back | |
* into a raw email string. | |
*/ | |
Email.prototype.toRaw = function() { | |
// Step 1: we need to go through all the header object's keys one by one | |
// and get the value stored there, combining that into a single line. We | |
// can use Object.entries() to get this list of key/value pair entries on | |
// the object. | |
let headersList = Object.entries(this.headers).map(function(entry) { | |
// The entry will be an array with a [key, value], which we turn into | |
// a String of the form "key: value" | |
let key = entry[0]; | |
let value = entry[1]; | |
return `${key}: ${value}`; | |
}); | |
// Step 2: turn the list of headers back into a string of lines separated by \n | |
let headerString = headersList.join('\n'); | |
// Step 3: combine the headerString and body with a blank line | |
let body = this.body; | |
return `${headerString}\n\n${body}`; | |
}; | |
/** | |
* Email Sample 1 | |
*/ | |
const rawEmail1 = `Return-Path: <DUCKWARE@oregon.uoregon.edui> | |
Date: Mon, 21 Oct 2002 09:12:30 -1000 | |
From: taylors27888p70@hotmail.com | |
Subject: *****SPAM***** #1 DIET PILL! LOSE 10-15 LBS PER WEEK | |
Sender: DUCKWARE@oregon.uoregon.edui | |
The text of the e-mail message is normally shown here`; | |
let email1 = new Email(rawEmail1); | |
console.log(email1) | |
console.log(email1.toRaw()) | |
/** | |
* Email Sample 2 | |
*/ | |
const rawEmail2 = `To: <bc.person@bc.edu> | |
Subject: NERCOMP - Security Training and Risk Assessment - Feb 10 | |
MIME-Version: 1.0 | |
From: <events@nercomp.org> | |
Content-Type: text/html; charset="ISO-8859-1" | |
Content-Transfer-Encoding: quoted-printable | |
Message-ID: <E1W1GyA-0008Uc-2Q@prod3.jjcbigideas.com> | |
Date: Thu, 9 Jan 2014 08:52:26 -0600 | |
Return-Path: bounces+836485-6dca-bc.person=bc.edu@outreach.nercomp.org | |
This is another example email.`; | |
let email2 = new Email(rawEmail2); | |
console.log(email2) | |
console.log(email2.toRaw()) | |
/** | |
* Email Sample 3 | |
*/ | |
const rawEmail3 = `From: Media Temple user (mt.kb.user@gmail.com) | |
Subject: article: How to Trace a Email | |
Date: January 25, 2011 3:30:58 PM PDT | |
To: user@example.com | |
Return-Path: <mt.kb.user@gmail.com> | |
Envelope-To: user@example.com | |
Delivery-Date: Tue, 25 Jan 2011 15:31:01 -0700 | |
Received: from po-out-1718.google.com ([72.14.252.155]:54907) by cl35.gs01.gridserver.com with esmtp (Exim 4.63) (envelope-from <mt.kb.user@gmail.com>) id 1KDoNH-0000f0-RL for user@example.com; Tue, 25 Jan 2011 15:31:01 -0700 | |
Received: by po-out-1718.google.com with SMTP id y22so795146pof.4 for <user@example.com>; Tue, 25 Jan 2011 15:30:58 -0700 (PDT) | |
Received: by 10.141.116.17 with SMTP id t17mr3929916rvm.251.1214951458741; Tue, 25 Jan 2011 15:30:58 -0700 (PDT) | |
Received: by 10.140.188.3 with HTTP; Tue, 25 Jan 2011 15:30:58 -0700 (PDT) | |
Dkim-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:message-id:date:from:to :subject:mime-version:content-type; bh=+JqkmVt+sHDFIGX5jKp3oP18LQf10VQjAmZAKl1lspY=; b=F87jySDZnMayyitVxLdHcQNL073DytKRyrRh84GNsI24IRNakn0oOfrC2luliNvdea LGTk3adIrzt+N96GyMseWz8T9xE6O/sAI16db48q4Iqkd7uOiDvFsvS3CUQlNhybNw8m CH/o8eELTN0zbSbn5Trp0dkRYXhMX8FTAwrH0= | |
Domainkey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=message-id:date:from:to:subject:mime-version:content-type; b=wkbBj0M8NCUlboI6idKooejg0sL2ms7fDPe1tHUkR9Ht0qr5lAJX4q9PMVJeyjWalH 36n4qGLtC2euBJY070bVra8IBB9FeDEW9C35BC1vuPT5XyucCm0hulbE86+uiUTXCkaB 6ykquzQGCer7xPAcMJqVfXDkHo3H61HM9oCQM= | |
Message-Id: <c8f49cec0807011530k11196ad4p7cb4b9420f2ae752@mail.gmail.com> | |
Mime-Version: 1.0 | |
Content-Type: multipart/alternative; boundary="----=_Part_3927_12044027.1214951458678" | |
X-Spam-Status: score=3.7 tests=DNS_FROM_RFC_POST, HTML_00_10, HTML_MESSAGE, HTML_SHORT_LENGTH version=3.1.7 | |
X-Spam-Level: *** | |
This is a KnowledgeBase article that provides information on how to find email headers and use the data to trace a email. See https://mediatemple.net/community/products/dv/204643950/understanding-an-email-header.`; | |
let email3 = new Email(rawEmail3); | |
console.log(email3) | |
console.log(email3.toRaw()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Email Sample 1 | |
*/ | |
const email1 = `Return-Path: <DUCKWARE@oregon.uoregon.edui> | |
Date: Mon, 21 Oct 2002 09:12:30 -1000 | |
From: taylors27888p70@hotmail.com | |
Subject: *****SPAM***** #1 DIET PILL! LOSE 10-15 LBS PER WEEK | |
Sender: DUCKWARE@oregon.uoregon.edui | |
The text of the e-mail message is normally shown here`; | |
/** | |
* Email Sample 2 | |
*/ | |
const email2 = `To: <bc.person@bc.edu> | |
Subject: NERCOMP - Security Training and Risk Assessment - Feb 10 | |
MIME-Version: 1.0 | |
From: <events@nercomp.org> | |
Content-Type: text/html; charset="ISO-8859-1" | |
Content-Transfer-Encoding: quoted-printable | |
Message-ID: <E1W1GyA-0008Uc-2Q@prod3.jjcbigideas.com> | |
Date: Thu, 9 Jan 2014 08:52:26 -0600 | |
Return-Path: bounces+836485-6dca-bc.person=bc.edu@outreach.nercomp.org | |
This is another example email.`; | |
/** | |
* Email Sample 3 | |
*/ | |
const email3 = `From: Media Temple user (mt.kb.user@gmail.com) | |
Subject: article: How to Trace a Email | |
Date: January 25, 2011 3:30:58 PM PDT | |
To: user@example.com | |
Return-Path: <mt.kb.user@gmail.com> | |
Envelope-To: user@example.com | |
Delivery-Date: Tue, 25 Jan 2011 15:31:01 -0700 | |
Received: from po-out-1718.google.com ([72.14.252.155]:54907) by cl35.gs01.gridserver.com with esmtp (Exim 4.63) (envelope-from <mt.kb.user@gmail.com>) id 1KDoNH-0000f0-RL for user@example.com; Tue, 25 Jan 2011 15:31:01 -0700 | |
Received: by po-out-1718.google.com with SMTP id y22so795146pof.4 for <user@example.com>; Tue, 25 Jan 2011 15:30:58 -0700 (PDT) | |
Received: by 10.141.116.17 with SMTP id t17mr3929916rvm.251.1214951458741; Tue, 25 Jan 2011 15:30:58 -0700 (PDT) | |
Received: by 10.140.188.3 with HTTP; Tue, 25 Jan 2011 15:30:58 -0700 (PDT) | |
Dkim-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:message-id:date:from:to :subject:mime-version:content-type; bh=+JqkmVt+sHDFIGX5jKp3oP18LQf10VQjAmZAKl1lspY=; b=F87jySDZnMayyitVxLdHcQNL073DytKRyrRh84GNsI24IRNakn0oOfrC2luliNvdea LGTk3adIrzt+N96GyMseWz8T9xE6O/sAI16db48q4Iqkd7uOiDvFsvS3CUQlNhybNw8m CH/o8eELTN0zbSbn5Trp0dkRYXhMX8FTAwrH0= | |
Domainkey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=message-id:date:from:to:subject:mime-version:content-type; b=wkbBj0M8NCUlboI6idKooejg0sL2ms7fDPe1tHUkR9Ht0qr5lAJX4q9PMVJeyjWalH 36n4qGLtC2euBJY070bVra8IBB9FeDEW9C35BC1vuPT5XyucCm0hulbE86+uiUTXCkaB 6ykquzQGCer7xPAcMJqVfXDkHo3H61HM9oCQM= | |
Message-Id: <c8f49cec0807011530k11196ad4p7cb4b9420f2ae752@mail.gmail.com> | |
Mime-Version: 1.0 | |
Content-Type: multipart/alternative; boundary="----=_Part_3927_12044027.1214951458678" | |
X-Spam-Status: score=3.7 tests=DNS_FROM_RFC_POST, HTML_00_10, HTML_MESSAGE, HTML_SHORT_LENGTH version=3.1.7 | |
X-Spam-Level: *** | |
This is a KnowledgeBase article that provides information on how to find email headers and use the data to trace a email. See https://mediatemple.net/community/products/dv/204643950/understanding-an-email-header.`; | |
/** | |
* Simple Email Class | |
*/ | |
function Email(text) { | |
// Parse text into header and body | |
} | |
// Add methods for reading the headers and body. | |
// Also add common things like From, To, Subject, Body | |
// Add method for parsing/validating a simple email address | |
// Add method to get back a raw dump of the text of the email |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment