Created
July 6, 2016 21:59
-
-
Save apaleslimghost/544237343be39ebe62e0e3082b1c1581 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const countBy = require('lodash.countby'); | |
const words = require('lodash.words'); | |
const includes = require('lodash.includes'); | |
const reject = require('lodash.reject'); | |
const assignWith = require('lodash.assignwith'); | |
const mapValues = require('lodash.mapvalues'); | |
const sumBy = require('lodash.sumby'); | |
const sum = require('lodash.sum'); | |
const values = require('lodash.values'); | |
const pairs = require('lodash.pairs'); | |
const size = require('lodash.size'); | |
const stopwords = require('stopwords').english; | |
const addFallback = f => (a, b) => (typeof a !== 'undefined' ? (typeof b !== 'undefined' ? a + b : a) : b) || f; | |
export const getTerms = (text) => reject(words(text).map(t => t.toLowerCase()), word => includes(stopwords, word)); | |
export const collateTerms = entries => entries.reduce((matrix, {terms, tags}) => { | |
tags.forEach(tag => { | |
matrix[tag] = assignWith(matrix[tag] || {}, countBy(terms), addFallback(0)) | |
}); | |
return matrix; | |
}, {}); | |
export const getTagScores = (terms, matrix) => mapValues(matrix, vec => sumBy(terms, term => vec[term]) || 0); | |
const mean = c => sum(values(c))/size(c); | |
export const getLikelyTags = (text, matrix) => { | |
const scores = getTagScores(getTerms(text), matrix); | |
const μ = mean(scores); | |
return reject(pairs(scores), t => t[1] < μ).map(t => t[0]); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment