Skip to content

Instantly share code, notes, and snippets.

@mumoshu
Created April 13, 2009 13:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mumoshu/94452 to your computer and use it in GitHub Desktop.
Save mumoshu/94452 to your computer and use it in GitHub Desktop.
require "forwardable"
module NaiveBayes
class Class
extend Forwardable
attr_reader :name
attr_accessor :frequency
def_delegators :@likelihood, :[], :[]=
def initialize(name)
@name = name
@frequency = 0
@likelihood = Hash.new(0)
end
end
class Classifier
attr_reader :classes
def initialize
@classes = Hash.new {|h,k| h[k] = NaiveBayes::Class.new(k) }
end
def train(c,doc)
cls = @classes[c]
cls.frequency += 1 # => 1, 2, 3, 1, 2, 3
doc.each {|term, freq|
cls[term] += freq
}
cls
end
def classify(doc)
@classes.values.sort_by {|c|
c.frequency * doc.inject(0) {|likelihood,(term,freq)|
likelihood + (c[term]) ** freq # => 0, 3, 4, 4, 4, 4, 4, 7, 7, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2
}
}.last.name
end
end
class Object
def initialize(obj)
@obj = obj
end
def freq
@obj\
.split(" ")\
.inject(Hash.new {|h,k| h[k]=0 }) {|doc,term| doc[term] += 1; doc }
end
end
def self._(obj)
Object.new(obj)
end
def self.test
# data set
data_set = [
{
:class => :mac,
:body => "How do I customize my iPod settings?"
},
{
:class => :mac,
:body => "How do I set up an AirPort wireless network?"
},
{
:class => :mac,
:body => "How do I set up Mac OS X Mail?"
},
{
:class => :windows,
:body => "You must be running Microsoft Internet Explorer 5 or later."
},
{
:class => :windows,
:body => "You can obtain updates from the Microsoft Download Center."
},
{
:class => :windows,
:body => "Get Office Live Basics for your business today."
}
]
# test set
test_set = [
"How do I sync audio and video to my iPod?",
"We empower your business now!"
]
# training
classifier = NaiveBayes::Classifier.new
data_set.map {|data|
c = data[:class] # => :mac, :mac, :mac, :windows, :windows, :windows
d = _(data[:body]).freq # => {"do"=>1, "my"=>1, "How"=>1, "iPod"=>1, "customize"=>1, "settings?"=>1, "I"=>1}, {"do"=>1, "AirPort"=>1, "an"=>1, "set"=>1, "up"=>1, "How"=>1, "network?"=>1, "wireless"=>1, "I"=>1}, {"do"=>1, "X"=>1, "set"=>1, "OS"=>1, "Mac"=>1, "up"=>1, "How"=>1, "I"=>1, "Mail?"=>1}, {"running"=>1, "or"=>1, "Internet"=>1, "be"=>1, "later."=>1, "Microsoft"=>1, "Explorer"=>1, "must"=>1, "You"=>1, "5"=>1}, {"Download"=>1, "from"=>1, "updates"=>1, "Center."=>1, "Microsoft"=>1, "can"=>1, "the"=>1, "obtain"=>1, "You"=>1}, {"Get"=>1, "Live"=>1, "business"=>1, "today."=>1, "Basics"=>1, "your"=>1, "Office"=>1, "for"=>1}
[c,d]
}.each {|c,doc|
classifier.train(c,doc) # => #<NaiveBayes::Class:0x3840de8 @name=:mac, @likelihood={"do"=>1, "my"=>1, "How"=>1, "iPod"=>1, "customize"=>1, "settings?"=>1, "I"=>1}, @frequency=1>, #<NaiveBayes::Class:0x3840de8 @name=:mac, @likelihood={"do"=>2, "my"=>1, "set"=>1, "an"=>1, "AirPort"=>1, "up"=>1, "How"=>2, "iPod"=>1, "customize"=>1, "wireless"=>1, "network?"=>1, "settings?"=>1, "I"=>2}, @frequency=2>, #<NaiveBayes::Class:0x3840de8 @name=:mac, @likelihood={"do"=>3, "my"=>1, "X"=>1, "set"=>2, "an"=>1, "AirPort"=>1, "Mac"=>1, "OS"=>1, "up"=>2, "How"=>3, "iPod"=>1, "customize"=>1, "wireless"=>1, "network?"=>1, "settings?"=>1, "I"=>3, "Mail?"=>1}, @frequency=3>, #<NaiveBayes::Class:0x383e8a4 @name=:windows, @likelihood={"running"=>1, "be"=>1, "Internet"=>1, "or"=>1, "later."=>1, "Microsoft"=>1, "You"=>1, "must"=>1, "Explorer"=>1, "5"=>1}, @frequency=1>, #<NaiveBayes::Class:0x383e8a4 @name=:windows, @likelihood={"running"=>1, "updates"=>1, "from"=>1, "Download"=>1, "be"=>1, "Internet"=>1, "or"=>1, "later."=>1, "can"=>1, "Center."=>1, "Microsoft"=>2, "the"=>1, "obtain"=>1, "You"=>2, "must"=>1, "Explorer"=>1, "5"=>1}, @frequency=2>, #<NaiveBayes::Class:0x383e8a4 @name=:windows, @likelihood={"Get"=>1, "Live"=>1, "business"=>1, "running"=>1, "updates"=>1, "from"=>1, "Download"=>1, "be"=>1, "Internet"=>1, "or"=>1, "later."=>1, "can"=>1, "Center."=>1, "Microsoft"=>2, "today."=>1, "the"=>1, "Basics"=>1, "Office"=>1, "your"=>1, "for"=>1, "obtain"=>1, "You"=>2, "must"=>1, "Explorer"=>1, "5"=>1}, @frequency=3>
}
# testing
test_set.each {|data|
data # => "How do I sync audio and video to my iPod?", "We empower your business now!"
cls = classifier.classify _(data).freq # => :mac, :windows
puts cls
}
end
end
NaiveBayes::test
# >> mac
# >> windows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment