bmcminn/keyword-estimator.js

## readme.md

      
    Raw
  

              readme.md
            
          
    Keyword Density Estimator


This is an overly simplistic and highly opinionated concept of keyword density and does not reflect any actual research into the nuances of SEO content optimization.
You have been warned.

Getting started

Eventually this will be an NPM module, so things like npm i keyword-checker will work :P
Settings


topWordCount: Returns the N most used words in your content. (default: 0)
ignore: An array list of words to ignore in the calculations. (default: [])
ignoreDefaults: Flag to ignore the base ignore words list. (default: false`)

Usage

Here is a basic use-case where-in I read a sample file and parse its contents to determine its keyword density.
// devise our callback for when we read the file
function parseFileKeywords(err, content) {
    if (err) {
        console.error(err);
        return;
    }

    // run the keyword parser on our file contents
    var keywordsList = keywords(content, {
        ignore: [
            'word'
        ,   'pants'
        ,   'html5'
        ,   '2013'
        ,   'after'
        ]
    });

    // echo the keyword stats to the console
    keywordsList.map(function(word) {
        console.log(JSON.stringify(word));
    });
}


// define require modules
var fs          = require('fs')
,   keywords    = require('./keywords')
;


var file = fs.readFile('./readme.md', { encoding: 'utf8' }, parseFileKeywords);
TODO


 Come up with a better name for this module


## keyword-estimator.js
var _       = require('lodash')
,   chalk   = require('chalk')
;


var keywordAnalysis = function keywordAnalysis(content, options) {


    // configure options
    options = options || {};

    var BASE_OPTIONS = {
            topWordCount: 0         // returns top N words from parsed list, 0 for all words
        ,   ignore: []              // array list of words to ignore (appended to end of default ignore list)
        ,   ignoreDefaults: false   // forces module to only use custom ignore word list
        }
    ;


    // compose new options object with overrides
    options = Object.assign({}, BASE_OPTIONS, options);


    // configure ignored words list
    var BASE_IGNORE_WORDS = [
            'a'
        ,   'an'
        ,   'and'
        ,   'are'
        ,   'as'
        ,   'at'
        ,   'be'
        ,   'buy'
        ,   'by'
        ,   'can'
        ,   'cant'
        ,   'didnt'
        ,   'do'
        ,   'does'
        ,   'dont'
        ,   'each'
        ,   'else'
        ,   'etc'
        ,   'for'
        ,   'did'
        ,   'from'
        ,   'gave'
        ,   'get'
        ,   'h1'
        ,   'h2'
        ,   'h3'
        ,   'h4'
        ,   'h5'
        ,   'h6'
        ,   'h7'
        ,   'has'
        ,   'have'
        ,   'he'
        ,   'how'
        ,   'if'
        ,   'in'
        ,   'is'
        ,   'isnt'
        ,   'it'
        ,   'its'
        ,   'me'
        ,   'most'
        ,   'much'
        ,   'my'
        ,   'not'
        ,   'of'
        ,   'on'
        ,   'or'
        ,   'other'
        ,   'our'
        ,   'ours'
        ,   'out'
        ,   'own'
        ,   'p'
        ,   'put'
        ,   'puts'
        ,   'set'
        ,   'she'
        ,   'should'
        ,   'so'
        ,   'some'
        ,   'span'
        ,   't'
        ,   'than'
        ,   'that'
        ,   'the'
        ,   'their'
        ,   'theirs'
        ,   'then'
        ,   'they'
        ,   'theyre'
        ,   'to'
        ,   'to'
        ,   'us'
        ,   'use'
        ,   'via'
        ,   'was'
        ,   'we'
        ,   'were'
        ,   'what'
        ,   'who'
        ,   'whose'
        ,   'wont'
        ,   'you'
        ,   'your'
        ,   'youre'
        ]
    ;


    // if we're overriding the ignore word list
    if (options.ignore.length > 0
    && options.ignoreDefaults) {
        BASE_IGNORE_WORDS = [];
    }

    // merge our ignore lists
    options.ignore = BASE_IGNORE_WORDS.concat(options.ignore);


    /**
     * removes duplicate ensuring unique values
     * @sauce   http://stackoverflow.com/a/1961068
     * @return  {Array}  { description_of_the_return_value }
     */
    Array.prototype.uniq = function() {
        var u = {}, a = [];
        for (var i = 0, l = this.length; i < l; ++i) {
            if(u.hasOwnProperty(this[i])) {
                continue;
            }

            a.push(this[i]);
            u[this[i]] = 1;
        }
        return a;
    }

    // strip duplicate words
    options.ignore = options.ignore.uniq();

    // we always need to ignore empty "words" upfront
    options.ignore.unshift('', '');
    options.ignore.push('');

    // convert our ignored word list to a string with each word wrapped by a pipe (|)
    //      the pipe (|) makes string partial checks much more accurate to avoid greedy matches
    //          (ex: |use|user| === |use| vs |user|)
    options.ignore = options.ignore.join('|');

    // parse content file
    var contentWords = content
        .replace(/[\s\t\\\/]{1,}/gi, ' ')   // normalize white space
        .replace(/(\S)-(\S)/gi, '$1 $2')    // break up hyphenated words
        .replace(/[(){}[\]+\—\-_!@#$%^&*.,?'":“”‘’><;`~]/gi, '')
        .replace(/https?:\/\/\S+\//gi, '')
        .replace(/\s{2,}/gi, ' ')
        .split(' ')
    ;


    var wordCounts = {};


    // for each word in our content
    for (var i = contentWords.length-1; i>=0; i--) {

        var word = contentWords[i].toLowerCase();

        // ignore it if it's in our ignore list
        if (options.ignore.indexOf('|' + word + '|') > -1) {
            continue;
        }

        // add the word to our index if it doesn't exist already
        if (!wordCounts[word]) {
            wordCounts[word] = {
                word: word
            ,   count: 0
            };
        }

        // increment word count
        wordCounts[word].count++;

    }


    // determine actual word density and relative word density per ignore list
    _.map(wordCounts, function(word) {

        var count = word.count;

        word.density           = Math.round((count / contentWords.length) * 100, 2) + '%';
        word.relativeDensity   = Math.round((count / _.size(wordCounts)) * 100, 2) + '%';

        return word;

    });


    // sort our data
    wordCounts = _.reverse(_.sortBy(wordCounts, ['count']));


    if (options.topWordCount > 0) {
        wordCounts = _.slice(wordCounts, 0, options.topWordCount);
    }


    return wordCounts;

};


module.exports = exports = keywordAnalysis;

## reading-score.js
/**
 * [countSyllables description]
 * @sauce  https://github.com/EndaHallahan/syllabificate/blob/master/index.js
 * @param  {string} inString [description]
 * @return {string}          [description]
 */
function countSyllables(inString) {
    let syllablesTotal = 0;
    let wordList = inString.match(/(?:(?:\w-\w)|[\wÀ-ÿ'’])+/g);
    if (wordList) {wordList.forEach((word) => {
        if (word === "'"||word==="’") {return;} //bandaid solution.
        if (word.length <= 2) {syllablesTotal += 1; return;} //quick return on short words
        let syllables = 0;
        if (word.endsWith("s'")||word.endsWith("s’")) {word.slice(-1);} //ending with s'
        if (word.endsWith("s's")||word.endsWith("s’s")) {word.slice(-1,-3);} //ending with s's
        const cEndings = word.match(/(?<=\w{3})(side|\wess|(?<!ed)ly|ment|ship|board|ground|(?<![^u]de)ville|port|ful(ly)?|berry|box|nesse?|such|m[ae]n|wom[ae]n|anne)s?$/mi);
        if (cEndings) {word = word.replace(cEndings[0],"\n" + cEndings[0]);} //Splits into two words and evaluates them as such
        const cBeginnings = word.match(/^(ware|side(?![sd]$)|p?re(?!ach|agan|al|au)|[rf]ace(?!([sd]|tte)$)|place[^nsd])/mi);
        if (cBeginnings) {word = word.replace(cBeginnings[0],""); syllables++;}
        const esylp = word.match(/ie($|l|t|rg)|([cb]|tt|pp)le$|phe$|kle(s|$)|[^n]scien|sue|aybe$|[^aeiou]shed|[^lsoai]les$|([^e]r|g)ge$|(gg|ck|yw|etch)ed$|(sc|o)he$|seer|^re[eiuy]/gmi);
        if (esylp) {syllables += esylp.length;} //E clustered positive
        const esylm = word.match(/every|some([^aeiouyr]|$)|[^trb]ere(?!d|$|o|r|t|a[^v]|n|s|x)|[^g]eous|niet/gmi);
        if (esylm) {syllables -= esylm.length;} //E clustered negative
        const isylp = word.match(/rie[^sndfvtl]|(?<=^|[^tcs]|st)ia|siai|[^ct]ious|quie|[lk]ier|settli|[^cn]ien[^d]|[aeio]ing$|dei[tf]|isms?$/gmi);
        if (isylp) {syllables += isylp.length;} //I clustered positive
        const osylp = word.match(/nyo|osm(s$|$)|oinc|ored(?!$)|(^|[^ts])io|oale|[aeiou]yoe|^m[ia]cro([aiouy]|e)|roe(v|$)|ouel|^proa|oolog/gmi);
        if (osylp) {syllables += osylp.length;} //O clustered positive
        const osylm = word.match(/[^f]ore(?!$|[vcaot]|d$|tte)|fore|llio/gmi);
        if (osylm) {syllables -= osylm.length;} //O clustered negative
        const asylp = word.match(/asm(s$|$)|ausea|oa$|anti[aeiou]|raor|intra[ou]|iae|ahe$|dais|(?<!p)ea(l(?!m)|$)|(?<!j)ean|(?<!il)eage/gmi);
        if (asylp) {syllables += asylp.length;} //A clustered positive
        const asylm = word.match(/aste(?!$|ful|s$|r)|[^r]ared$/gmi);
        if (asylm) {syllables -= asylm.length;} //A clustered negative
        const usylp = word.match(/uo[^y]|[^gq]ua(?!r)|uen|[^g]iu|uis(?![aeiou]|se)|ou(et|ille)|eu(ing|er)|uye[dh]|nuine|ucle[aeiuy]/gmi);
        if (usylp) {syllables += usylp.length;} //U clustered positive
        const usylm = word.match(/geous|busi|logu(?!e|i)/gmi);
        if (usylm) {syllables -= usylm.length;} //U clustered negative
        const ysylp = word.match(/[ibcmrluhp]ya|nyac|[^e]yo|[aiou]y[aiou]|[aoruhm]ye(tt|l|n|v|z)|pye|dy[ae]|oye[exu]|lye[nlrs]|olye|aye(k|r|$|u[xr]|da)|saye\w|iye|wy[ae]|[^aiou]ying/gmi);
        if (ysylp) {syllables += ysylp.length;} //Y clustered positive
        const ysylm = word.match(/arley|key|ney$/gmi);
        if (ysylm) {syllables -= ysylm.length;}
        const essuffix = word.match(/((?<!c[hrl]|sh|[iszxgej]|[niauery]c|do)es$)/gmi);
        if (essuffix) {syllables--;}//es suffix
        const edsuffix = word.match(/([aeiouy][^aeiouyrdt]|[^aeiouy][^laeiouyrdtbm]|ll|bb|ield|[ou]rb)ed$|[^cbda]red$/gmi);
        if (edsuffix) {syllables--}
        const csylp = word.match(/chn[^eai]|mc|thm/gmi);
        if (csylp) {syllables += csylp.length;} //Consonant clustered negative
        const eVowels = word.match(/[aiouy](?![aeiouy])|ee|e(?!$|-|[iua])/gmi);
        if (eVowels) {syllables += eVowels.length;} //Applicable vowel count (all but e at end of word)
        if (syllables <= 0) {syllables = 1;} //catch-all
        if (word.match(/[^aeiou]n['’]t$/i)) {syllables ++;} //ending in n't, but not en't
        if (word.match(/en['’]t$/i)) {syllables --;} //ending in en't
        syllablesTotal += syllables;
    });}
    return syllablesTotal;
}


/**
 * [countPolys description]
 * @sauce  https://github.com/EndaHallahan/syllabificate/blob/master/index.js
 * @param  {string} inString [description]
 * @return {string}          [description]
 */
function countPolys(inString) {
    let polysTotal = 0;
    let wordList = inString.match(/(?:(?:\w-\w)|[\wÀ-ÿ'’])+/g);
    if (wordList) {wordList.forEach((word) => {
        if (word === "'"||word==="’") {return;} //bandaid solution.
        if (word.length <= 3) {return;} //quick return on short words
        let syllables = 0;
        if (word.endsWith("s'")||word.endsWith("s’")) {word.slice(-1);} //ending with s'
        if (word.endsWith("s's")||word.endsWith("s’s")) {word.slice(-1,-3);} //ending with s's
        const cEndings = word.match(/(?<=\w{3})(side|\wess|(?<!ed)ly|ment|ship|board|ground|(?<![^u]de)ville|port|ful(ly)?|berry|box|nesse?|such|m[ae]n|wom[ae]n|horse|anne)s?$/mi);
        if (cEndings) {word = word.replace(cEndings[0],"\n" + cEndings[0]);} //Splits into two words and evaluates them as such
        const cBeginnings = word.match(/^(ware|side|p?re(?!ach|agan|al|au))/mi);
        if (cBeginnings) {word = word.replace(cBeginnings[0],""); syllables++;}
        const esylp = word.match(/ie($|l|t|rg)|([cb]|tt|pp)le$|phe$|kle(s|$)|[^n]scien|sue|aybe$|[^aeiou]shed|[^lsoai]les$|([^e]r|g)ge$|(gg|ck|yw|etch)ed$|(sc|o)he$|seer|^re[eiuy]/gmi);
        if (esylp) {syllables += esylp.length;} //E clustered positive
        const esylm = word.match(/every|some([^aeiouyr]|$)|[^trb]ere(?!d|$|o|r|t|a[^v]|n|s|x)|[^g]eous|niet/gmi);
        if (esylm) {syllables -= esylm.length;} //E clustered negative
        const isylp = word.match(/rie[^sndfvtl]|(?<=^|[^tcs]|st)ia|siai|[^ct]ious|quie|[lk]ier|settli|[^cn]ien[^d]|[aeio]ing$|dei[tf]|isms?$/gmi);
        if (isylp) {syllables += isylp.length;} //I clustered positive
        const osylp = word.match(/nyo|osm(s$|$)|oinc|ored(?!$)|(^|[^ts])io|oale|[aeiou]yoe|^m[ia]cro([aiouy]|e)|roe(v|$)|ouel|^proa|oolog/gmi);
        if (osylp) {syllables += osylp.length;} //O clustered positive
        const osylm = word.match(/[^f]ore(?!$|[vcaot]|d$|tte)|fore|llio/gmi);
        if (osylm) {syllables -= osylm.length;} //O clustered negative
        const asylp = word.match(/asm(s$|$)|ausea|oa$|anti[aeiou]|raor|intra[ou]|iae|ahe$|dais|(?<!p)ea(l(?!m)|$)|(?<!j)ean|(?<!il)eage/gmi);
        if (asylp) {syllables += asylp.length;} //A clustered positive
        const asylm = word.match(/aste(?!$|ful|s$|r)|[^r]ared$/gmi);
        if (asylm) {syllables -= asylm.length;} //A clustered negative
        const usylp = word.match(/uo[^y]|[^gq]ua(?!r)|uen|[^g]iu|uis(?![aeiou]|se)|ou(et|ille)|eu(ing|er)|uye[dh]|nuine|ucle[aeiuy]/gmi);
        if (usylp) {syllables += usylp.length;} //U clustered positive
        const usylm = word.match(/geous|busi|logu(?!e|i)/gmi);
        if (usylm) {syllables -= usylm.length;} //U clustered negative
        const ysylp = word.match(/[ibcmrluhp]ya|nyac|[^e]yo|[aiou]y[aiou]|[aoruhm]ye(tt|l|n|v|z)|pye|dy[ae]|oye[exu]|lye[nlrs]|olye|aye(k|r|$|u[xr]|da)|saye\w|iye|wy[ae]|[^aiou]ying/gmi);
        if (ysylp) {syllables += ysylp.length;} //Y clustered positive
        const ysylm = word.match(/arley|key|ney$/gmi);
        if (ysylm) {syllables -= ysylm.length;}
        const essuffix = word.match(/((?<!c[hrl]|sh|[iszxgej]|[niauery]c|do)es$)/gmi);
        if (essuffix) {syllables--;}//es suffix
        const edsuffix = word.match(/([aeiouy][^aeiouyrdt]|[^aeiouy][^laeiouyrdtbm]|ll|bb|ield|[ou]rb)ed$|[^cbda]red$/gmi);
        if (edsuffix) {syllables--}
        const csylp = word.match(/chn[^eai]|mc|thm/gmi);
        if (csylp) {syllables += csylp.length;} //Consonant clustered negative
        const eVowels = word.match(/[aiouy](?![aeiouy])|ee|e(?!$|-|[iua])/gmi);
        if (eVowels) {syllables += eVowels.length;} //Applicable vowel count (all but e at end of word)
        if (syllables <= 0) {syllables = 1;} //catch-all
        if (word.match(/[^aeiou]n['’]t$/i)) {syllables ++;} //ending in n't, but not en't
        if (word.match(/en['’]t$/i)) {syllables --;} //ending in en't
        if (syllables >= 3) {polysTotal++;}
    });}
    return polysTotal;
}


function countLetters(text) {
    return text.replace(/[^\w\d]/gi, '').length
}


function countWords(text) {
    return text.replace(/\s+/g, '\s').split('\s').length
}


function countSentences(text) {
    return text.split('.').length
}


function countComplexWords(text) {
    let words = text.replace(/\s+/g, '\s').split('\s')

    // words of 3+ syllables, not including common suffixes (-es, -ed, -ing)
    return words.filter(el => countSyllables(el) > 2).length
}


/**
 * [ARI description]
 * @sauce https://en.wikipedia.org/wiki/Automated_readability_index
 * @param {string} sentences  [description]
 */
function ARI(text) {

    let letters     = countLetters(text)
    let words       = countWords(text)
    let sentences   = countSentences(text)

    const A = (letters / words)
    const B = (words / sentences)

    const score = Math.round((4.71 * A) + (0.5 * B) - 21.43)

    const grade = score

    return [ score, grade ]
}


/**
 * [FleschKinkaid description]
 * @sauce https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests
 * @param {string} syllables [description]
 * @param {string} words     [description]
 * @param {string} sentences [description]
 */
function FleschKinkaid(text) {

    let words       = countWords(text)
    let sentences   = countSentences(text)
    let syllables   = countSyllables(text)

    const A = (words / sentences)
    const B = (syllables / words)

    const score = Math.round(206.835 - (1.015 * A) - (84.6 * B))

    let grade = 0

         if (100 > score && score >= 90) { grade = 5 }
    else if ( 90 > score && score >= 80)  { grade = 6 }
    else if ( 80 > score && score >= 70)  { grade = 7 }
    else if ( 70 > score && score >= 65)  { grade = 8 }
    else if ( 65 > score && score >= 60)  { grade = 9 }
    else if ( 60 > score && score >= 57)  { grade = 10 }
    else if ( 57 > score && score >= 53)  { grade = 11 }
    else if ( 53 > score && score >= 50)  { grade = 12 }
    else if ( 50 > score && score >= 40)  { grade = 13 }
    else if ( 40 > score && score >= 30)  { grade = 14 }
    else if ( 30 > score && score >= 20)  { grade = 15 }
    else if ( 20 > score && score >= 10)  { grade = 16 }
    else if ( 10 > score && score >= 0)   { grade = 17 }

    return [ score, grade ]
}


/**
 * [GunningFogIndex description]
 * https://en.wikipedia.org/wiki/Gunning_fog_index
 * @param {string} words        [description]
 * @param {string} complexWords [description]
 * @param {string} sentences    [description]
 */
function GunningFogIndex(text) {

    let sentences       = countSentences(text)
    let words           = countWords(text)
    let complexWords    = countComplexWords(text)

    // TODO: resolve missing criteria of max 100 words with complete sentences

    const A = (words / sentences)
    const B = (complexWords / words)

    const score = Math.round(0.4 * (A + (100 * B)))

    const grade = score

    return [ score, grade ]
}


/**
 * [SMOG description]
 * @sauce https://en.wikipedia.org/wiki/SMOG
 * @param {string} text [description]
 */
function SMOG(text) {
    const polysyllables   = countPolys(text)
    const sentences       = countSentences(text)

    // TODO: resolve missing criteria of max 100 words with complete sentences

    const A = (30 / sentences)

    const score = Math.round(1.0430 * Math.sqrt(polysyllables * A) + 3.1291)

    const grade = score

    return [ score, grade ]
}


/**
 * [FryIndex description]
 * @sauce https://en.wikipedia.org/wiki/Fry_readability_formula
 * @param {string} text [description]
 */
function FryIndex(text) {

    let letters      = countLetters(text)
    let words        = countWords(text)
    let sentences    = countSentences(text)

    const L = (letters / words * 100)
    const S = (sentences / words * 100)

    // TODO: implement this algorithm, need some calculous to derive the overlaps

    const score = null
    const grade = score

    return [ score, grade ]
}


/**
 * [ColemanLiauIndex description]
 * @sauce https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index
 * @param {string} text [description]
 */
function ColemanLiauIndex(text) {
    let letters      = countLetters(text)
    let words        = countWords(text)
    let sentences    = countSentences(text)

    const L = (letters / words * 100)
    const S = (sentences / words * 100)

    const score = Math.round( (0.0588 * L) - (0.296 * S) - 15.8 )

    const grade = score

    return [ score, grade ]
}


var text = "The automated readability index (ARI) is a readability test for English texts, designed to gauge the understandability of a text. Like the Flesch–Kincaid grade level, Gunning fog index, SMOG index, Fry readability formula, and Coleman–Liau index, it produces an approximate representation of the US grade level needed to comprehend the text."


console.log('ARI                :', ARI(text))
console.log('FleschKinkaid      :', FleschKinkaid(text))
console.log('GunningFogIndex    :', GunningFogIndex(text))
console.log('SMOG               :', SMOG(text))
console.log('ColemanLiauIndex   :', ColemanLiauIndex(text))
console.log('FryIndex           :', FryIndex(text))
	var _ = require('lodash')
	, chalk = require('chalk')
	;



	var keywordAnalysis = function keywordAnalysis(content, options) {


	// configure options
	options = options \|\| {};

	var BASE_OPTIONS = {
	topWordCount: 0 // returns top N words from parsed list, 0 for all words
	, ignore: [] // array list of words to ignore (appended to end of default ignore list)
	, ignoreDefaults: false // forces module to only use custom ignore word list
	}
	;


	// compose new options object with overrides
	options = Object.assign({}, BASE_OPTIONS, options);


	// configure ignored words list
	var BASE_IGNORE_WORDS = [
	'a'
	, 'an'
	, 'and'
	, 'are'
	, 'as'
	, 'at'
	, 'be'
	, 'buy'
	, 'by'
	, 'can'
	, 'cant'
	, 'didnt'
	, 'do'
	, 'does'
	, 'dont'
	, 'each'
	, 'else'
	, 'etc'
	, 'for'
	, 'did'
	, 'from'
	, 'gave'
	, 'get'
	, 'h1'
	, 'h2'
	, 'h3'
	, 'h4'
	, 'h5'
	, 'h6'
	, 'h7'
	, 'has'
	, 'have'
	, 'he'
	, 'how'
	, 'if'
	, 'in'
	, 'is'
	, 'isnt'
	, 'it'
	, 'its'
	, 'me'
	, 'most'
	, 'much'
	, 'my'
	, 'not'
	, 'of'
	, 'on'
	, 'or'
	, 'other'
	, 'our'
	, 'ours'
	, 'out'
	, 'own'
	, 'p'
	, 'put'
	, 'puts'
	, 'set'
	, 'she'
	, 'should'
	, 'so'
	, 'some'
	, 'span'
	, 't'
	, 'than'
	, 'that'
	, 'the'
	, 'their'
	, 'theirs'
	, 'then'
	, 'they'
	, 'theyre'
	, 'to'
	, 'to'
	, 'us'
	, 'use'
	, 'via'
	, 'was'
	, 'we'
	, 'were'
	, 'what'
	, 'who'
	, 'whose'
	, 'wont'
	, 'you'
	, 'your'
	, 'youre'
	]
	;


	// if we're overriding the ignore word list
	if (options.ignore.length > 0
	&& options.ignoreDefaults) {
	BASE_IGNORE_WORDS = [];
	}

	// merge our ignore lists
	options.ignore = BASE_IGNORE_WORDS.concat(options.ignore);


	/**
	* removes duplicate ensuring unique values
	* @sauce http://stackoverflow.com/a/1961068
	* @return {Array} { description_of_the_return_value }
	*/
	Array.prototype.uniq = function() {
	var u = {}, a = [];
	for (var i = 0, l = this.length; i < l; ++i) {
	if(u.hasOwnProperty(this[i])) {
	continue;
	}

	a.push(this[i]);
	u[this[i]] = 1;
	}
	return a;
	}

	// strip duplicate words
	options.ignore = options.ignore.uniq();

	// we always need to ignore empty "words" upfront
	options.ignore.unshift('', '');
	options.ignore.push('');

	// convert our ignored word list to a string with each word wrapped by a pipe (\|)
	// the pipe (\|) makes string partial checks much more accurate to avoid greedy matches
	// (ex: \|use\|user\| === \|use\| vs \|user\|)
	options.ignore = options.ignore.join('\|');

	// parse content file
	var contentWords = content
	.replace(/[\s\t\\\/]{1,}/gi, ' ') // normalize white space
	.replace(/(\S)-(\S)/gi, '$1 $2') // break up hyphenated words
	.replace(/[(){}[\]+\—\-_!@#$%^&*.,?'":“”‘’><;`~]/gi, '')
	.replace(/https?:\/\/\S+\//gi, '')
	.replace(/\s{2,}/gi, ' ')
	.split(' ')
	;


	var wordCounts = {};


	// for each word in our content
	for (var i = contentWords.length-1; i>=0; i--) {

	var word = contentWords[i].toLowerCase();

	// ignore it if it's in our ignore list
	if (options.ignore.indexOf('\|' + word + '\|') > -1) {
	continue;
	}

	// add the word to our index if it doesn't exist already
	if (!wordCounts[word]) {
	wordCounts[word] = {
	word: word
	, count: 0
	};
	}

	// increment word count
	wordCounts[word].count++;

	}


	// determine actual word density and relative word density per ignore list
	_.map(wordCounts, function(word) {

	var count = word.count;

	word.density = Math.round((count / contentWords.length) * 100, 2) + '%';
	word.relativeDensity = Math.round((count / _.size(wordCounts)) * 100, 2) + '%';

	return word;

	});


	// sort our data
	wordCounts = _.reverse(_.sortBy(wordCounts, ['count']));


	if (options.topWordCount > 0) {
	wordCounts = _.slice(wordCounts, 0, options.topWordCount);
	}


	return wordCounts;

	};



	module.exports = exports = keywordAnalysis;
	/**
	* [countSyllables description]
	* @sauce https://github.com/EndaHallahan/syllabificate/blob/master/index.js
	* @param {string} inString [description]
	* @return {string} [description]
	*/
	function countSyllables(inString) {
	let syllablesTotal = 0;
	let wordList = inString.match(/(?:(?:\w-\w)\|[\wÀ-ÿ'’])+/g);
	if (wordList) {wordList.forEach((word) => {
	if (word === "'"\|\|word==="’") {return;} //bandaid solution.
	if (word.length <= 2) {syllablesTotal += 1; return;} //quick return on short words
	let syllables = 0;
	if (word.endsWith("s'")\|\|word.endsWith("s’")) {word.slice(-1);} //ending with s'
	if (word.endsWith("s's")\|\|word.endsWith("s’s")) {word.slice(-1,-3);} //ending with s's
	const cEndings = word.match(/(?<=\w{3})(side\|\wess\|(?<!ed)ly\|ment\|ship\|board\|ground\|(?<![^u]de)ville\|port\|ful(ly)?\|berry\|box\|nesse?\|such\|m[ae]n\|wom[ae]n\|anne)s?$/mi);
	if (cEndings) {word = word.replace(cEndings[0],"\n" + cEndings[0]);} //Splits into two words and evaluates them as such
	const cBeginnings = word.match(/^(ware\|side(?![sd]$)\|p?re(?!ach\|agan\|al\|au)\|[rf]ace(?!([sd]\|tte)$)\|place[^nsd])/mi);
	if (cBeginnings) {word = word.replace(cBeginnings[0],""); syllables++;}
	const esylp = word.match(/ie($\|l\|t\|rg)\|([cb]\|tt\|pp)le$\|phe$\|kle(s\|$)\|[^n]scien\|sue\|aybe$\|[^aeiou]shed\|[^lsoai]les$\|([^e]r\|g)ge$\|(gg\|ck\|yw\|etch)ed$\|(sc\|o)he$\|seer\|^re[eiuy]/gmi);
	if (esylp) {syllables += esylp.length;} //E clustered positive
	const esylm = word.match(/every\|some([^aeiouyr]\|$)\|[^trb]ere(?!d\|$\|o\|r\|t\|a[^v]\|n\|s\|x)\|[^g]eous\|niet/gmi);
	if (esylm) {syllables -= esylm.length;} //E clustered negative
	const isylp = word.match(/rie[^sndfvtl]\|(?<=^\|[^tcs]\|st)ia\|siai\|[^ct]ious\|quie\|[lk]ier\|settli\|[^cn]ien[^d]\|[aeio]ing$\|dei[tf]\|isms?$/gmi);
	if (isylp) {syllables += isylp.length;} //I clustered positive
	const osylp = word.match(/nyo\|osm(s$\|$)\|oinc\|ored(?!$)\|(^\|[^ts])io\|oale\|[aeiou]yoe\|^m[ia]cro([aiouy]\|e)\|roe(v\|$)\|ouel\|^proa\|oolog/gmi);
	if (osylp) {syllables += osylp.length;} //O clustered positive
	const osylm = word.match(/[^f]ore(?!$\|[vcaot]\|d$\|tte)\|fore\|llio/gmi);
	if (osylm) {syllables -= osylm.length;} //O clustered negative
	const asylp = word.match(/asm(s$\|$)\|ausea\|oa$\|anti[aeiou]\|raor\|intra[ou]\|iae\|ahe$\|dais\|(?<!p)ea(l(?!m)\|$)\|(?<!j)ean\|(?<!il)eage/gmi);
	if (asylp) {syllables += asylp.length;} //A clustered positive
	const asylm = word.match(/aste(?!$\|ful\|s$\|r)\|[^r]ared$/gmi);
	if (asylm) {syllables -= asylm.length;} //A clustered negative
	const usylp = word.match(/uo[^y]\|[^gq]ua(?!r)\|uen\|[^g]iu\|uis(?![aeiou]\|se)\|ou(et\|ille)\|eu(ing\|er)\|uye[dh]\|nuine\|ucle[aeiuy]/gmi);
	if (usylp) {syllables += usylp.length;} //U clustered positive
	const usylm = word.match(/geous\|busi\|logu(?!e\|i)/gmi);
	if (usylm) {syllables -= usylm.length;} //U clustered negative
	const ysylp = word.match(/[ibcmrluhp]ya\|nyac\|[^e]yo\|[aiou]y[aiou]\|[aoruhm]ye(tt\|l\|n\|v\|z)\|pye\|dy[ae]\|oye[exu]\|lye[nlrs]\|olye\|aye(k\|r\|$\|u[xr]\|da)\|saye\w\|iye\|wy[ae]\|[^aiou]ying/gmi);
	if (ysylp) {syllables += ysylp.length;} //Y clustered positive
	const ysylm = word.match(/arley\|key\|ney$/gmi);
	if (ysylm) {syllables -= ysylm.length;}
	const essuffix = word.match(/((?<!c[hrl]\|sh\|[iszxgej]\|[niauery]c\|do)es$)/gmi);
	if (essuffix) {syllables--;}//es suffix
	const edsuffix = word.match(/([aeiouy][^aeiouyrdt]\|[^aeiouy][^laeiouyrdtbm]\|ll\|bb\|ield\|[ou]rb)ed$\|[^cbda]red$/gmi);
	if (edsuffix) {syllables--}
	const csylp = word.match(/chn[^eai]\|mc\|thm/gmi);
	if (csylp) {syllables += csylp.length;} //Consonant clustered negative
	const eVowels = word.match(/[aiouy](?![aeiouy])\|ee\|e(?!$\|-\|[iua])/gmi);
	if (eVowels) {syllables += eVowels.length;} //Applicable vowel count (all but e at end of word)
	if (syllables <= 0) {syllables = 1;} //catch-all
	if (word.match(/[^aeiou]n['’]t$/i)) {syllables ++;} //ending in n't, but not en't
	if (word.match(/en['’]t$/i)) {syllables --;} //ending in en't
	syllablesTotal += syllables;
	});}
	return syllablesTotal;
	}



	/**
	* [countPolys description]
	* @sauce https://github.com/EndaHallahan/syllabificate/blob/master/index.js
	* @param {string} inString [description]
	* @return {string} [description]
	*/
	function countPolys(inString) {
	let polysTotal = 0;
	let wordList = inString.match(/(?:(?:\w-\w)\|[\wÀ-ÿ'’])+/g);
	if (wordList) {wordList.forEach((word) => {
	if (word === "'"\|\|word==="’") {return;} //bandaid solution.
	if (word.length <= 3) {return;} //quick return on short words
	let syllables = 0;
	if (word.endsWith("s'")\|\|word.endsWith("s’")) {word.slice(-1);} //ending with s'
	if (word.endsWith("s's")\|\|word.endsWith("s’s")) {word.slice(-1,-3);} //ending with s's
	const cEndings = word.match(/(?<=\w{3})(side\|\wess\|(?<!ed)ly\|ment\|ship\|board\|ground\|(?<![^u]de)ville\|port\|ful(ly)?\|berry\|box\|nesse?\|such\|m[ae]n\|wom[ae]n\|horse\|anne)s?$/mi);
	if (cEndings) {word = word.replace(cEndings[0],"\n" + cEndings[0]);} //Splits into two words and evaluates them as such
	const cBeginnings = word.match(/^(ware\|side\|p?re(?!ach\|agan\|al\|au))/mi);
	if (cBeginnings) {word = word.replace(cBeginnings[0],""); syllables++;}
	const esylp = word.match(/ie($\|l\|t\|rg)\|([cb]\|tt\|pp)le$\|phe$\|kle(s\|$)\|[^n]scien\|sue\|aybe$\|[^aeiou]shed\|[^lsoai]les$\|([^e]r\|g)ge$\|(gg\|ck\|yw\|etch)ed$\|(sc\|o)he$\|seer\|^re[eiuy]/gmi);
	if (esylp) {syllables += esylp.length;} //E clustered positive
	const esylm = word.match(/every\|some([^aeiouyr]\|$)\|[^trb]ere(?!d\|$\|o\|r\|t\|a[^v]\|n\|s\|x)\|[^g]eous\|niet/gmi);
	if (esylm) {syllables -= esylm.length;} //E clustered negative
	const isylp = word.match(/rie[^sndfvtl]\|(?<=^\|[^tcs]\|st)ia\|siai\|[^ct]ious\|quie\|[lk]ier\|settli\|[^cn]ien[^d]\|[aeio]ing$\|dei[tf]\|isms?$/gmi);
	if (isylp) {syllables += isylp.length;} //I clustered positive
	const osylp = word.match(/nyo\|osm(s$\|$)\|oinc\|ored(?!$)\|(^\|[^ts])io\|oale\|[aeiou]yoe\|^m[ia]cro([aiouy]\|e)\|roe(v\|$)\|ouel\|^proa\|oolog/gmi);
	if (osylp) {syllables += osylp.length;} //O clustered positive
	const osylm = word.match(/[^f]ore(?!$\|[vcaot]\|d$\|tte)\|fore\|llio/gmi);
	if (osylm) {syllables -= osylm.length;} //O clustered negative
	const asylp = word.match(/asm(s$\|$)\|ausea\|oa$\|anti[aeiou]\|raor\|intra[ou]\|iae\|ahe$\|dais\|(?<!p)ea(l(?!m)\|$)\|(?<!j)ean\|(?<!il)eage/gmi);
	if (asylp) {syllables += asylp.length;} //A clustered positive
	const asylm = word.match(/aste(?!$\|ful\|s$\|r)\|[^r]ared$/gmi);
	if (asylm) {syllables -= asylm.length;} //A clustered negative
	const usylp = word.match(/uo[^y]\|[^gq]ua(?!r)\|uen\|[^g]iu\|uis(?![aeiou]\|se)\|ou(et\|ille)\|eu(ing\|er)\|uye[dh]\|nuine\|ucle[aeiuy]/gmi);
	if (usylp) {syllables += usylp.length;} //U clustered positive
	const usylm = word.match(/geous\|busi\|logu(?!e\|i)/gmi);
	if (usylm) {syllables -= usylm.length;} //U clustered negative
	const ysylp = word.match(/[ibcmrluhp]ya\|nyac\|[^e]yo\|[aiou]y[aiou]\|[aoruhm]ye(tt\|l\|n\|v\|z)\|pye\|dy[ae]\|oye[exu]\|lye[nlrs]\|olye\|aye(k\|r\|$\|u[xr]\|da)\|saye\w\|iye\|wy[ae]\|[^aiou]ying/gmi);
	if (ysylp) {syllables += ysylp.length;} //Y clustered positive
	const ysylm = word.match(/arley\|key\|ney$/gmi);
	if (ysylm) {syllables -= ysylm.length;}
	const essuffix = word.match(/((?<!c[hrl]\|sh\|[iszxgej]\|[niauery]c\|do)es$)/gmi);
	if (essuffix) {syllables--;}//es suffix
	const edsuffix = word.match(/([aeiouy][^aeiouyrdt]\|[^aeiouy][^laeiouyrdtbm]\|ll\|bb\|ield\|[ou]rb)ed$\|[^cbda]red$/gmi);
	if (edsuffix) {syllables--}
	const csylp = word.match(/chn[^eai]\|mc\|thm/gmi);
	if (csylp) {syllables += csylp.length;} //Consonant clustered negative
	const eVowels = word.match(/[aiouy](?![aeiouy])\|ee\|e(?!$\|-\|[iua])/gmi);
	if (eVowels) {syllables += eVowels.length;} //Applicable vowel count (all but e at end of word)
	if (syllables <= 0) {syllables = 1;} //catch-all
	if (word.match(/[^aeiou]n['’]t$/i)) {syllables ++;} //ending in n't, but not en't
	if (word.match(/en['’]t$/i)) {syllables --;} //ending in en't
	if (syllables >= 3) {polysTotal++;}
	});}
	return polysTotal;
	}



	function countLetters(text) {
	return text.replace(/[^\w\d]/gi, '').length
	}


	function countWords(text) {
	return text.replace(/\s+/g, '\s').split('\s').length
	}


	function countSentences(text) {
	return text.split('.').length
	}


	function countComplexWords(text) {
	let words = text.replace(/\s+/g, '\s').split('\s')

	// words of 3+ syllables, not including common suffixes (-es, -ed, -ing)
	return words.filter(el => countSyllables(el) > 2).length
	}



	/**
	* [ARI description]
	* @sauce https://en.wikipedia.org/wiki/Automated_readability_index
	* @param {string} sentences [description]
	*/
	function ARI(text) {

	let letters = countLetters(text)
	let words = countWords(text)
	let sentences = countSentences(text)

	const A = (letters / words)
	const B = (words / sentences)

	const score = Math.round((4.71 * A) + (0.5 * B) - 21.43)

	const grade = score

	return [ score, grade ]
	}



	/**
	* [FleschKinkaid description]
	* @sauce https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests
	* @param {string} syllables [description]
	* @param {string} words [description]
	* @param {string} sentences [description]
	*/
	function FleschKinkaid(text) {

	let words = countWords(text)
	let sentences = countSentences(text)
	let syllables = countSyllables(text)

	const A = (words / sentences)
	const B = (syllables / words)

	const score = Math.round(206.835 - (1.015 * A) - (84.6 * B))

	let grade = 0

	if (100 > score && score >= 90) { grade = 5 }
	else if ( 90 > score && score >= 80) { grade = 6 }
	else if ( 80 > score && score >= 70) { grade = 7 }
	else if ( 70 > score && score >= 65) { grade = 8 }
	else if ( 65 > score && score >= 60) { grade = 9 }
	else if ( 60 > score && score >= 57) { grade = 10 }
	else if ( 57 > score && score >= 53) { grade = 11 }
	else if ( 53 > score && score >= 50) { grade = 12 }
	else if ( 50 > score && score >= 40) { grade = 13 }
	else if ( 40 > score && score >= 30) { grade = 14 }
	else if ( 30 > score && score >= 20) { grade = 15 }
	else if ( 20 > score && score >= 10) { grade = 16 }
	else if ( 10 > score && score >= 0) { grade = 17 }

	return [ score, grade ]
	}



	/**
	* [GunningFogIndex description]
	* https://en.wikipedia.org/wiki/Gunning_fog_index
	* @param {string} words [description]
	* @param {string} complexWords [description]
	* @param {string} sentences [description]
	*/
	function GunningFogIndex(text) {

	let sentences = countSentences(text)
	let words = countWords(text)
	let complexWords = countComplexWords(text)

	// TODO: resolve missing criteria of max 100 words with complete sentences

	const A = (words / sentences)
	const B = (complexWords / words)

	const score = Math.round(0.4 * (A + (100 * B)))

	const grade = score

	return [ score, grade ]
	}



	/**
	* [SMOG description]
	* @sauce https://en.wikipedia.org/wiki/SMOG
	* @param {string} text [description]
	*/
	function SMOG(text) {
	const polysyllables = countPolys(text)
	const sentences = countSentences(text)

	// TODO: resolve missing criteria of max 100 words with complete sentences

	const A = (30 / sentences)

	const score = Math.round(1.0430 * Math.sqrt(polysyllables * A) + 3.1291)

	const grade = score

	return [ score, grade ]
	}



	/**
	* [FryIndex description]
	* @sauce https://en.wikipedia.org/wiki/Fry_readability_formula
	* @param {string} text [description]
	*/
	function FryIndex(text) {

	let letters = countLetters(text)
	let words = countWords(text)
	let sentences = countSentences(text)

	const L = (letters / words * 100)
	const S = (sentences / words * 100)

	// TODO: implement this algorithm, need some calculous to derive the overlaps

	const score = null
	const grade = score

	return [ score, grade ]
	}



	/**
	* [ColemanLiauIndex description]
	* @sauce https://en.wikipedia.org/wiki/Coleman%E2%80%93Liau_index
	* @param {string} text [description]
	*/
	function ColemanLiauIndex(text) {
	let letters = countLetters(text)
	let words = countWords(text)
	let sentences = countSentences(text)

	const L = (letters / words * 100)
	const S = (sentences / words * 100)

	const score = Math.round( (0.0588 * L) - (0.296 * S) - 15.8 )

	const grade = score

	return [ score, grade ]
	}


	var text = "The automated readability index (ARI) is a readability test for English texts, designed to gauge the understandability of a text. Like the Flesch–Kincaid grade level, Gunning fog index, SMOG index, Fry readability formula, and Coleman–Liau index, it produces an approximate representation of the US grade level needed to comprehend the text."


	console.log('ARI :', ARI(text))
	console.log('FleschKinkaid :', FleschKinkaid(text))
	console.log('GunningFogIndex :', GunningFogIndex(text))
	console.log('SMOG :', SMOG(text))
	console.log('ColemanLiauIndex :', ColemanLiauIndex(text))
	console.log('FryIndex :', FryIndex(text))