Skip to content

Instantly share code, notes, and snippets.

@seancdavis
Last active March 24, 2017 07:23
Show Gist options
  • Save seancdavis/fa07542fa8dab0310b9c to your computer and use it in GitHub Desktop.
Save seancdavis/fa07542fa8dab0310b9c to your computer and use it in GitHub Desktop.
Related Content (without metadata) in Rails using tf-idf
$ bundle exec rails g migration add_words_to_posts words:text
$ bundle exec rake db:migrate
$ bundle install
$ bundle exec rails g migration add_related_posts_to_posts related_posts
$ bundle exec rake db:migrate
gem 'htmlentities'
gem 'nokogiri'
# config/initializers/array.rb
class Array
def multiset(arr)
result=[]
h1,h2=Hash.new(0),Hash.new(0)
self.each { |x| h1[x] += 1 }
arr.each { |x| h2[x] += 1 }
h1.each_pair { |k,v| result << [k] * [v, h2[k]].min if h2[k] != 0 }
result.flatten
end
end
# app/models/post.rb
include ActionView::Helpers::SanitizeHelper
class Post < ActiveRecord::Base
after_save :update_words!
def update_words!
require 'htmlentities'; require 'nokogiri'
doc = Nokogiri::HTML.parse(body)
doc.xpath("//pre").remove.xpath("//code").remove
words = doc.text.gsub(/\n/, '').downcase
words = HTMLEntities.new.decode(sanitize(words, :tags => []))
words = words.split(/\ |\.|\,|\!|\?|\//).reject(&:blank?).sort.join(',')
words.gsub(/[^a-z\,]/i, '').split(',').reject(&:blank?).sort.join(',')
update_columns(:words => words)
end
def update_related!
posts = Post.all; related = {}
ifd = inverse_document_frequency(posts)
(posts.select(&:published?) - [self]).each do |post|
score = 0
intersection = self.words.split(',').multiset(post.words.split(','))
intersection.each { |word| score += ifd[word] }
related[post.id] = score
end
related = related.sort_by { |k,v| v }.reverse
related = related.collect { |k,v| k }.first(3).join(',')
update_columns(:related_posts => related)
end
def related
Post.published.where(:id => related_posts.split(','))
end
private
def inverse_document_frequency(posts)
words = {}
posts.each do |post|
RelatedPost.process_words(post.body) if post.words.blank?
post.words.split(',').uniq.each do |word|
words[word] = 0 if words[word].nil?
words[word] += 1
end
end
words.each do |word, freq|
words[word] = Math.log(posts.size / freq)
end
words
end
end
# app/controllers/posts_controller.rb
class PostsController < ApplicationController
def show
@post = Post.find_by_id(params[:id])
@related = @post.related.first(3)
end
def new
@post = Post.new
end
def create
@post = Post.new(post_params)
if @post.save
@post.update_related!
@post.related.each { |p| p.update_related! }
redirect_to @post, :notice => "Post was created successfully."
else
render 'new'
end
end
def edit
@post = Post.find_by_id(params[:id])
end
def update
@post = Post.find_by_id(params[:id])
if @post.update(post_params)
@post.update_related!
@post.related.each { |p| p.update_related! }
redirect_to @post, :notice => "Post was updated successfully."
else
render 'edit'
end
end
private
def post_params
params.require(:post).permit(:title, :body, :published)
end
end
@pawneetdev
Copy link

pawneetdev commented Mar 24, 2017

I am getting the following error:

uninitialized constant Post::RelatedPost

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment