Last active
November 1, 2017 22:42
-
-
Save johndigital/26d8d36960c21169b20b0110f4b479ee to your computer and use it in GitHub Desktop.
"Naive" Solution for finding related posts on a blog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const _ = require('lodash') | |
const tokenize = string => { | |
return string.split(' ').map(w => { | |
return w.trim().toLowerCase().replace(/[^a-zA-Z]/g, '') | |
}).filter(w => w) | |
} | |
const similarPosts = (posts, targetPost) => { | |
// split target title | |
const titleWords = tokenize(targetPost.title) | |
// sort posts by number of common words | |
const sorted = posts.sort((p1, p2) => { | |
// get array intersection of target title words and | |
// post body words. Run for both posts | |
const p1_matches = tokenize(p1.content).filter(word => { | |
return titleWords.includes(word) | |
}) | |
const p2_matches = tokenize(p2.content).filter(word => { | |
return titleWords.includes(word) | |
}) | |
// return difference of intersection length (create descending results) | |
return _.uniq(p2_matches).length - _.uniq(p1_matches).length | |
}) | |
// return top 5 matches | |
return sorted.slice(0, 5) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment