Skip to content

Instantly share code, notes, and snippets.

@apaleslimghost
Created July 6, 2016 21:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save apaleslimghost/544237343be39ebe62e0e3082b1c1581 to your computer and use it in GitHub Desktop.
Save apaleslimghost/544237343be39ebe62e0e3082b1c1581 to your computer and use it in GitHub Desktop.
const countBy = require('lodash.countby');
const words = require('lodash.words');
const includes = require('lodash.includes');
const reject = require('lodash.reject');
const assignWith = require('lodash.assignwith');
const mapValues = require('lodash.mapvalues');
const sumBy = require('lodash.sumby');
const sum = require('lodash.sum');
const values = require('lodash.values');
const pairs = require('lodash.pairs');
const size = require('lodash.size');
const stopwords = require('stopwords').english;
const addFallback = f => (a, b) => (typeof a !== 'undefined' ? (typeof b !== 'undefined' ? a + b : a) : b) || f;
export const getTerms = (text) => reject(words(text).map(t => t.toLowerCase()), word => includes(stopwords, word));
export const collateTerms = entries => entries.reduce((matrix, {terms, tags}) => {
tags.forEach(tag => {
matrix[tag] = assignWith(matrix[tag] || {}, countBy(terms), addFallback(0))
});
return matrix;
}, {});
export const getTagScores = (terms, matrix) => mapValues(matrix, vec => sumBy(terms, term => vec[term]) || 0);
const mean = c => sum(values(c))/size(c);
export const getLikelyTags = (text, matrix) => {
const scores = getTagScores(getTerms(text), matrix);
const μ = mean(scores);
return reject(pairs(scores), t => t[1] < μ).map(t => t[0]);
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment