Skip to content

Instantly share code, notes, and snippets.

@ianpreston
Last active August 29, 2015 14:02
Show Gist options
  • Save ianpreston/40ab702af079fb04a60f to your computer and use it in GitHub Desktop.
Save ianpreston/40ab702af079fb04a60f to your computer and use it in GitHub Desktop.
Naive Bayes Classifier in Haskell. Does it work? No idea.
data FeatureVec =
FeatureVec { cls :: Int, feat :: [Bool] }
deriving (Show)
data ValueVec =
-- TODO: Need a better name for vals
ValueVec { cnt :: Int, vals :: [Int] }
deriving (Show)
type ProbVec = [Float]
--
-- buildValueVec
--
-- Turns a list of feature vectors for a given class into one 'value vector'
-- for that class, representing the sum of the features.
--
shiznit :: Bool -> Int -> Int
shiznit True i = i + 1
shiznit False i = i
appendFeatureVec :: FeatureVec -> ValueVec -> ValueVec
appendFeatureVec (FeatureVec fv_cls fv_feat) (ValueVec vv_cnt vv_vals) =
let
pairs = (zip fv_feat vv_vals)
vals = map (uncurry shiznit) pairs
cnt = (sum vals)
in
ValueVec{ cnt=cnt, vals=vals }
buildValueVec :: [FeatureVec] -> ValueVec -> ValueVec
buildValueVec (x:xs) vv =
if null xs
then result
else buildValueVec xs result
where
result = appendFeatureVec x vv
--
-- train
--
-- Takes a list of value vectors (transformed from feature vectors by
-- buildValueVec), one for each class, and returns the final trained model,
-- as one ProbVec per class.
--
buildProbVec :: Int -> ValueVec -> ProbVec
buildProbVec cnt (ValueVec vv_cnt vv_vals) =
map (\x -> (fromIntegral x) / (fromIntegral cnt)) vv_vals
train :: [ValueVec] -> [ProbVec]
train vectors =
let
overall_cnt = sum (map (\x -> cnt x) vectors)
in
map (buildProbVec overall_cnt) vectors
--
-- Uses the trained model (two ProbVec instances) to classify a given
-- feature vector.
--
dopeShiznit :: Bool -> Float -> Float
dopeShiznit True x = x
dopeShiznit False x = 0.0
classify :: ProbVec -> ProbVec -> [Bool] -> Int
classify pv0 pv1 fv =
let
prob0 = sum $ map (uncurry dopeShiznit) (zip fv pv0)
prob1 = sum $ map (uncurry dopeShiznit) (zip fv pv1)
in
if prob0 >= prob1
then 0
else 1
--
-- Test code and shit
--
main = do
let vectors1 = [(FeatureVec{ cls=1, feat=[True, False, True] }),
(FeatureVec{ cls=1, feat=[False, False, True] }),
(FeatureVec{ cls=1, feat=[False, True, True] }),
(FeatureVec{ cls=1, feat=[True, True, True] })]
let vectors0 = [(FeatureVec{ cls=0, feat=[True, True, False] }),
(FeatureVec{ cls=0, feat=[False, True, False] }),
(FeatureVec{ cls=0, feat=[False, True, False] }),
(FeatureVec{ cls=0, feat=[False, True, False] })]
let vv1 = buildValueVec vectors1 (ValueVec{ cnt=0, vals=[0,0,0] })
let vv0 = buildValueVec vectors0 (ValueVec{ cnt=0, vals=[0,0,0] })
print vv1
print vv0
let pvs = train [vv1, vv0]
let pv1 = (head pvs)
let pv0 = (head (tail pvs))
putStrLn "Probability vector for class 1:"
print pv1
putStrLn "Probability vector for class 0:"
print pv0
let to_classify = [True, True, False]
putStrLn "Classified as:"
print $ classify pv0 pv1 to_classify
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment