Skip to content

Instantly share code, notes, and snippets.

@humphd
Last active October 1, 2019 00:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save humphd/8dcfcadd45befbca0c5c6240f770ed52 to your computer and use it in GitHub Desktop.
Save humphd/8dcfcadd45befbca0c5c6240f770ed52 to your computer and use it in GitHub Desktop.
Sample Email Parser
// NOTE: updated to fix some bugs on Mon Sept 30, 2019
/**
* Constructor function for an Email object.
*
* @param {String} rawEmail - text of a raw email, with headers and body
*/
function Email(rawEmail) {
// Step 1: split the raw email text into its two main parts: header and body.
// The split occurs at the first empty line (i.e., \n\n)
// [ headers, body ]
let emailParts = rawEmail.split('\n\n');
let headerString = emailParts[0];
let bodyString = emailParts[1];
// Step 2: split the headers into separate lines
// [ 'Return-Path : <DUCKWARE@oregon.uoregon.edui>', ... ]
let headersLines = headerString.split('\n');
// Step 3: loop over all the header lines, and break them into two parts,
// a header name, and header value. Store these key/value pairs on an
// empty object.
let headers = {};
headersLines.forEach(function(header) {
// \s = whitespace
// * = 0 or more
// ['Return-Path', '<DUCKWARE@oregon.uoregon.edui>']
let headerParts = header.split(/\s*:\s*/);
// Convert all header names to lower case to make it easier to request them later.
let headerName = headerParts[0].toLowerCase();
let headerValue = headerParts[1];
headers[headerName] = headerValue;
});
// Step 4: add some convenience properties to the email object, copying
// things we commonly need from the headers onto the email. NOTE: by default
// the value of `this` will change inside a .forEach() call, so we first
// get a reference to `this` for the instance before we start.
let instance = this;
['from', 'to', 'subject', 'date'].forEach(function(keyword) {
// Only bother if this header is present on the email
if(headers[keyword]) {
instance[keyword] = headers[keyword];
}
});
// Step 5: store the body and headers on the instance object
// so tha we want don't lose them when the constructor function exists.
this.body = bodyString;
this.headers = headers;
}
/**
* The shared toRaw() method takes an email object and turns it back
* into a raw email string.
*/
Email.prototype.toRaw = function() {
// Step 1: we need to go through all the header object's keys one by one
// and get the value stored there, combining that into a single line. We
// can use Object.entries() to get this list of key/value pair entries on
// the object.
let headersList = Object.entries(this.headers).map(function(entry) {
// The entry will be an array with a [key, value], which we turn into
// a String of the form "key: value"
let key = entry[0];
let value = entry[1];
return `${key}: ${value}`;
});
// Step 2: turn the list of headers back into a string of lines separated by \n
let headerString = headersList.join('\n');
// Step 3: combine the headerString and body with a blank line
let body = this.body;
return `${headerString}\n\n${body}`;
};
/**
* Email Sample 1
*/
const rawEmail1 = `Return-Path: <DUCKWARE@oregon.uoregon.edui>
Date: Mon, 21 Oct 2002 09:12:30 -1000
From: taylors27888p70@hotmail.com
Subject: *****SPAM***** #1 DIET PILL! LOSE 10-15 LBS PER WEEK
Sender: DUCKWARE@oregon.uoregon.edui
The text of the e-mail message is normally shown here`;
let email1 = new Email(rawEmail1);
console.log(email1)
console.log(email1.toRaw())
/**
* Email Sample 2
*/
const rawEmail2 = `To: <bc.person@bc.edu>
Subject: NERCOMP - Security Training and Risk Assessment - Feb 10
MIME-Version: 1.0
From: <events@nercomp.org>
Content-Type: text/html; charset="ISO-8859-1"
Content-Transfer-Encoding: quoted-printable
Message-ID: <E1W1GyA-0008Uc-2Q@prod3.jjcbigideas.com>
Date: Thu, 9 Jan 2014 08:52:26 -0600
Return-Path: bounces+836485-6dca-bc.person=bc.edu@outreach.nercomp.org
This is another example email.`;
let email2 = new Email(rawEmail2);
console.log(email2)
console.log(email2.toRaw())
/**
* Email Sample 3
*/
const rawEmail3 = `From: Media Temple user (mt.kb.user@gmail.com)
Subject: article: How to Trace a Email
Date: January 25, 2011 3:30:58 PM PDT
To: user@example.com
Return-Path: <mt.kb.user@gmail.com>
Envelope-To: user@example.com
Delivery-Date: Tue, 25 Jan 2011 15:31:01 -0700
Received: from po-out-1718.google.com ([72.14.252.155]:54907) by cl35.gs01.gridserver.com with esmtp (Exim 4.63) (envelope-from <mt.kb.user@gmail.com>) id 1KDoNH-0000f0-RL for user@example.com; Tue, 25 Jan 2011 15:31:01 -0700
Received: by po-out-1718.google.com with SMTP id y22so795146pof.4 for <user@example.com>; Tue, 25 Jan 2011 15:30:58 -0700 (PDT)
Received: by 10.141.116.17 with SMTP id t17mr3929916rvm.251.1214951458741; Tue, 25 Jan 2011 15:30:58 -0700 (PDT)
Received: by 10.140.188.3 with HTTP; Tue, 25 Jan 2011 15:30:58 -0700 (PDT)
Dkim-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:message-id:date:from:to :subject:mime-version:content-type; bh=+JqkmVt+sHDFIGX5jKp3oP18LQf10VQjAmZAKl1lspY=; b=F87jySDZnMayyitVxLdHcQNL073DytKRyrRh84GNsI24IRNakn0oOfrC2luliNvdea LGTk3adIrzt+N96GyMseWz8T9xE6O/sAI16db48q4Iqkd7uOiDvFsvS3CUQlNhybNw8m CH/o8eELTN0zbSbn5Trp0dkRYXhMX8FTAwrH0=
Domainkey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=message-id:date:from:to:subject:mime-version:content-type; b=wkbBj0M8NCUlboI6idKooejg0sL2ms7fDPe1tHUkR9Ht0qr5lAJX4q9PMVJeyjWalH 36n4qGLtC2euBJY070bVra8IBB9FeDEW9C35BC1vuPT5XyucCm0hulbE86+uiUTXCkaB 6ykquzQGCer7xPAcMJqVfXDkHo3H61HM9oCQM=
Message-Id: <c8f49cec0807011530k11196ad4p7cb4b9420f2ae752@mail.gmail.com>
Mime-Version: 1.0
Content-Type: multipart/alternative; boundary="----=_Part_3927_12044027.1214951458678"
X-Spam-Status: score=3.7 tests=DNS_FROM_RFC_POST, HTML_00_10, HTML_MESSAGE, HTML_SHORT_LENGTH version=3.1.7
X-Spam-Level: ***
This is a KnowledgeBase article that provides information on how to find email headers and use the data to trace a email. See https://mediatemple.net/community/products/dv/204643950/understanding-an-email-header.`;
let email3 = new Email(rawEmail3);
console.log(email3)
console.log(email3.toRaw())
/**
* Email Sample 1
*/
const email1 = `Return-Path: <DUCKWARE@oregon.uoregon.edui>
Date: Mon, 21 Oct 2002 09:12:30 -1000
From: taylors27888p70@hotmail.com
Subject: *****SPAM***** #1 DIET PILL! LOSE 10-15 LBS PER WEEK
Sender: DUCKWARE@oregon.uoregon.edui
The text of the e-mail message is normally shown here`;
/**
* Email Sample 2
*/
const email2 = `To: <bc.person@bc.edu>
Subject: NERCOMP - Security Training and Risk Assessment - Feb 10
MIME-Version: 1.0
From: <events@nercomp.org>
Content-Type: text/html; charset="ISO-8859-1"
Content-Transfer-Encoding: quoted-printable
Message-ID: <E1W1GyA-0008Uc-2Q@prod3.jjcbigideas.com>
Date: Thu, 9 Jan 2014 08:52:26 -0600
Return-Path: bounces+836485-6dca-bc.person=bc.edu@outreach.nercomp.org
This is another example email.`;
/**
* Email Sample 3
*/
const email3 = `From: Media Temple user (mt.kb.user@gmail.com)
Subject: article: How to Trace a Email
Date: January 25, 2011 3:30:58 PM PDT
To: user@example.com
Return-Path: <mt.kb.user@gmail.com>
Envelope-To: user@example.com
Delivery-Date: Tue, 25 Jan 2011 15:31:01 -0700
Received: from po-out-1718.google.com ([72.14.252.155]:54907) by cl35.gs01.gridserver.com with esmtp (Exim 4.63) (envelope-from <mt.kb.user@gmail.com>) id 1KDoNH-0000f0-RL for user@example.com; Tue, 25 Jan 2011 15:31:01 -0700
Received: by po-out-1718.google.com with SMTP id y22so795146pof.4 for <user@example.com>; Tue, 25 Jan 2011 15:30:58 -0700 (PDT)
Received: by 10.141.116.17 with SMTP id t17mr3929916rvm.251.1214951458741; Tue, 25 Jan 2011 15:30:58 -0700 (PDT)
Received: by 10.140.188.3 with HTTP; Tue, 25 Jan 2011 15:30:58 -0700 (PDT)
Dkim-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:message-id:date:from:to :subject:mime-version:content-type; bh=+JqkmVt+sHDFIGX5jKp3oP18LQf10VQjAmZAKl1lspY=; b=F87jySDZnMayyitVxLdHcQNL073DytKRyrRh84GNsI24IRNakn0oOfrC2luliNvdea LGTk3adIrzt+N96GyMseWz8T9xE6O/sAI16db48q4Iqkd7uOiDvFsvS3CUQlNhybNw8m CH/o8eELTN0zbSbn5Trp0dkRYXhMX8FTAwrH0=
Domainkey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=message-id:date:from:to:subject:mime-version:content-type; b=wkbBj0M8NCUlboI6idKooejg0sL2ms7fDPe1tHUkR9Ht0qr5lAJX4q9PMVJeyjWalH 36n4qGLtC2euBJY070bVra8IBB9FeDEW9C35BC1vuPT5XyucCm0hulbE86+uiUTXCkaB 6ykquzQGCer7xPAcMJqVfXDkHo3H61HM9oCQM=
Message-Id: <c8f49cec0807011530k11196ad4p7cb4b9420f2ae752@mail.gmail.com>
Mime-Version: 1.0
Content-Type: multipart/alternative; boundary="----=_Part_3927_12044027.1214951458678"
X-Spam-Status: score=3.7 tests=DNS_FROM_RFC_POST, HTML_00_10, HTML_MESSAGE, HTML_SHORT_LENGTH version=3.1.7
X-Spam-Level: ***
This is a KnowledgeBase article that provides information on how to find email headers and use the data to trace a email. See https://mediatemple.net/community/products/dv/204643950/understanding-an-email-header.`;
/**
* Simple Email Class
*/
function Email(text) {
// Parse text into header and body
}
// Add methods for reading the headers and body.
// Also add common things like From, To, Subject, Body
// Add method for parsing/validating a simple email address
// Add method to get back a raw dump of the text of the email
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment