whitelynx/header-parse.js

## header-parse.js
//---------------------------------------------------------------------------------------------------------------------
// Parse (mostly) RFC822-compliant headers.
//
// This will parse header blocks that conform to [RFC822 Section 3][], as well as a few variations of that standard.
// One major deviation is that this library treats '\r\n' and '\n' as the same, so headers separated by either will be
// parsed.
//
// [RFC822 Section 3]: https://tools.ietf.org/html/rfc822#section-3
//
// @module header-parse
//---------------------------------------------------------------------------------------------------------------------

/**
 * A parsed document.
 *
 * @typedef {object} HeaderParseDocument
 *
 * @property {?string} headerBlock - the raw, unparsed header block of the document, if one was present
 * @property {?Object.<string, string>} headers - the parsed headers, if headers were present and parsing was performed
 * @property {string} body - the body of the document
 */

//---------------------------------------------------------------------------------------------------------------------

var regexes = {
    strict: {
        headerBlock: /^((?:\S+:(?:.*\r\n[ \t])*.*\r\n)*)\r\n/,
        header: /^(\S+):(.*)$/gm,
        fold: /\r\n([ \t])/g,
        trim: /^\s*(.*\S)?\s*$/,
    },
    loose: {
        headerBlock: /^((?:\S+:(?:.*\r?\n[ \t])*.*\r?\n)*)\r?\n/,
        header: /^(\S+):(.*)$/gm,
        fold: /\r?\n([ \t])/g,
        trim: /^\s*(.*\S)?\s*$/,
    },
};

//---------------------------------------------------------------------------------------------------------------------

/**
 * If the given data contains a header block, separate the headers and body.
 *
 * @param {(string|Buffer)} data
 * @param {boolean} strict - `true` for strict RFC822 compliance (don't treat `\n` without `\r` as line breaks)
 * @param {boolean} parse - `false` to disable actually parsing headers (just separate the header block and the body)
 * @returns {HeaderParseDocument}
 */
function extractHeaderBlock(data, strict, parse)
{
    data = data.toString();
    var re = strict ? regexes.strict : regexes.loose;

    var match = re.headerBlock.exec(data);
    if(match)
    {
        var doc = {
            headerBlock: match[1],
            body: data.slice(match[0].length),
        };

        if(parse === undefined || parse)
        {
            doc.headers = parseHeaders(doc.headerBlock, strict);
        } // end if

        return doc;
    } // end if

    return {body: data};
} // end extractHeaderBlock

/**
 * Parse all headers out of the given header block data.
 *
 * @param {(string|Buffer)} data
 * @param {boolean} strict - `true` for strict RFC822 compliance (don't treat `\n` without `\r` as line breaks)
 * @returns {Object.<string, string>} parsed headers
 */
function parseHeaders(data, strict)
{
    data = unfold(data, strict);
    var re = strict ? regexes.strict : regexes.loose;

    var headers = {};

    var match = re.header.exec(data);
    while(match)
    {
        headers[match[1]] = match[2].replace(re.trim, '$1');

        match = re.header.exec(data);
    } // end while

    return headers;
} // end parseHeaders

/**
 * Unfold all folded lines in the given data. (as defined by [RFC822 Section 3.1.1][])
 *
 * [RFC822 Section 3.1.1]: https://tools.ietf.org/html/rfc822#section-3.1.1
 *
 * @param {(string|Buffer)} data
 * @param {boolean} strict - `true` for strict RFC822 compliance (don't treat `\n` without `\r` as line breaks)
 * @returns {string} unfolded data
 */
function unfold(data, strict)
{
    data = data.toString();
    var re = strict ? regexes.strict : regexes.loose;

    return data.replace(re.fold, '$1');
} // end unfold

//---------------------------------------------------------------------------------------------------------------------

module.exports = {
    extractHeaderBlock: extractHeaderBlock,
    parseHeaders: parseHeaders,
    unfold: unfold,
    regexes: regexes,
};