Created
May 29, 2014 22:33
-
-
Save whitelynx/2e44e2af82bb9f51230d to your computer and use it in GitHub Desktop.
Parse (mostly) RFC822-compliant headers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//--------------------------------------------------------------------------------------------------------------------- | |
// Parse (mostly) RFC822-compliant headers. | |
// | |
// This will parse header blocks that conform to [RFC822 Section 3][], as well as a few variations of that standard. | |
// One major deviation is that this library treats '\r\n' and '\n' as the same, so headers separated by either will be | |
// parsed. | |
// | |
// [RFC822 Section 3]: https://tools.ietf.org/html/rfc822#section-3 | |
// | |
// @module header-parse | |
//--------------------------------------------------------------------------------------------------------------------- | |
/** | |
* A parsed document. | |
* | |
* @typedef {object} HeaderParseDocument | |
* | |
* @property {?string} headerBlock - the raw, unparsed header block of the document, if one was present | |
* @property {?Object.<string, string>} headers - the parsed headers, if headers were present and parsing was performed | |
* @property {string} body - the body of the document | |
*/ | |
//--------------------------------------------------------------------------------------------------------------------- | |
var regexes = { | |
strict: { | |
headerBlock: /^((?:\S+:(?:.*\r\n[ \t])*.*\r\n)*)\r\n/, | |
header: /^(\S+):(.*)$/gm, | |
fold: /\r\n([ \t])/g, | |
trim: /^\s*(.*\S)?\s*$/, | |
}, | |
loose: { | |
headerBlock: /^((?:\S+:(?:.*\r?\n[ \t])*.*\r?\n)*)\r?\n/, | |
header: /^(\S+):(.*)$/gm, | |
fold: /\r?\n([ \t])/g, | |
trim: /^\s*(.*\S)?\s*$/, | |
}, | |
}; | |
//--------------------------------------------------------------------------------------------------------------------- | |
/** | |
* If the given data contains a header block, separate the headers and body. | |
* | |
* @param {(string|Buffer)} data | |
* @param {boolean} strict - `true` for strict RFC822 compliance (don't treat `\n` without `\r` as line breaks) | |
* @param {boolean} parse - `false` to disable actually parsing headers (just separate the header block and the body) | |
* @returns {HeaderParseDocument} | |
*/ | |
function extractHeaderBlock(data, strict, parse) | |
{ | |
data = data.toString(); | |
var re = strict ? regexes.strict : regexes.loose; | |
var match = re.headerBlock.exec(data); | |
if(match) | |
{ | |
var doc = { | |
headerBlock: match[1], | |
body: data.slice(match[0].length), | |
}; | |
if(parse === undefined || parse) | |
{ | |
doc.headers = parseHeaders(doc.headerBlock, strict); | |
} // end if | |
return doc; | |
} // end if | |
return {body: data}; | |
} // end extractHeaderBlock | |
/** | |
* Parse all headers out of the given header block data. | |
* | |
* @param {(string|Buffer)} data | |
* @param {boolean} strict - `true` for strict RFC822 compliance (don't treat `\n` without `\r` as line breaks) | |
* @returns {Object.<string, string>} parsed headers | |
*/ | |
function parseHeaders(data, strict) | |
{ | |
data = unfold(data, strict); | |
var re = strict ? regexes.strict : regexes.loose; | |
var headers = {}; | |
var match = re.header.exec(data); | |
while(match) | |
{ | |
headers[match[1]] = match[2].replace(re.trim, '$1'); | |
match = re.header.exec(data); | |
} // end while | |
return headers; | |
} // end parseHeaders | |
/** | |
* Unfold all folded lines in the given data. (as defined by [RFC822 Section 3.1.1][]) | |
* | |
* [RFC822 Section 3.1.1]: https://tools.ietf.org/html/rfc822#section-3.1.1 | |
* | |
* @param {(string|Buffer)} data | |
* @param {boolean} strict - `true` for strict RFC822 compliance (don't treat `\n` without `\r` as line breaks) | |
* @returns {string} unfolded data | |
*/ | |
function unfold(data, strict) | |
{ | |
data = data.toString(); | |
var re = strict ? regexes.strict : regexes.loose; | |
return data.replace(re.fold, '$1'); | |
} // end unfold | |
//--------------------------------------------------------------------------------------------------------------------- | |
module.exports = { | |
extractHeaderBlock: extractHeaderBlock, | |
parseHeaders: parseHeaders, | |
unfold: unfold, | |
regexes: regexes, | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks. Here made an Angular JS module: https://github.com/dbtek/angular-email-parser.