Skip to content

Instantly share code, notes, and snippets.

@startling
Created January 15, 2014 23:31
Show Gist options
  • Save startling/8446881 to your computer and use it in GitHub Desktop.
Save startling/8446881 to your computer and use it in GitHub Desktop.
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveTraversable #-}
-- base
import Data.List
import Data.Monoid
import Data.Foldable (Foldable)
import Data.Traversable (Traversable)
import Control.Applicative
import Control.Monad
-- QuickCheck
import Test.QuickCheck
-- Hspec
import Test.Hspec
newtype NGram a = NGram { toList :: [Maybe a] }
deriving
( Eq
, Ord
, Show
, Monoid
, Functor
, Foldable
, Traversable
)
tests :: IO ()
tests = hspec $ do
describe "match" $ do
it "matches straightforward wildcardless ngrams" $ do
NGram [Just 'a', Just 'b'] `shouldMatch` "ab"
it "matches when the string has a suffix" $ do
NGram [Just 'a', Just 'b'] `shouldMatch` "abc"
it "matches when the string has a prefix" $ do
NGram [Just 'a', Just 'b'] `shouldMatch` "9ab"
it "doesn't match the empty string the NGram is empty" $ do
NGram [Just 'a'] `shouldn'tMatch` ""
NGram [] `shouldMatch` ""
it "the empty ngram matches everything" . property $
\xs -> (NGram [] :: NGram Char) `matches` xs
it "the wildcard-only ngram matches every nonempty string" . property $
\xs -> null xs || (NGram [Nothing] :: NGram Char) `matches` xs
it "some ngrams with wildcards match things" $ do
NGram [Just "quick", Nothing, Just "fox"]
`shouldMatch` words "the quick red fox"
describe "ngramsFrom" $ do
it "only creates ngrams that match the string given" . property $
\xs -> all (`matches` xs) $ ngramsFrom 5 (xs :: [Char])
where
-- Convenience expectations.
shouldMatch :: Eq a => NGram a -> [a] -> Expectation
a `shouldMatch` b = matches a b `shouldBe` True
shouldn'tMatch :: Eq a => NGram a -> [a] -> Expectation
a `shouldn'tMatch` b = matches a b `shouldBe` False
-- | Test whether an 'NGram' matches a list.
matches :: Eq a => NGram a -> [a] -> Bool
matches (NGram []) = const True
matches ns = any (matchInit ns) . init . tails
where
matchInit :: Eq a => NGram a -> [a] -> Bool
matchInit (NGram ns) = all (uncurry match) . zip ns
match :: Eq a => Maybe a -> a -> Bool
match Nothing _ = True
match (Just a) b = a == b
-- | Find all the ngrams no longer than a particular length that
-- match a given list.
ngramsFrom :: Int -> [a] -> [NGram a]
ngramsFrom n = concatMap (map NGram . replacingWith Nothing . toList)
. wildcardless n
where
-- Given a list, generate all possible lists with a given
-- item inserted at arbitrary places an arbitrary number of times.
replacingWith :: a -> [a] -> [[a]]
replacingWith a as = forM as $ (: a : [])
-- Generate all the wildcard-free ngrams that match a string
-- no longer than a given length.
wildcardless :: Int -> [a] -> [NGram a]
wildcardless n = map (NGram . map Just) . sublistsLengthN n
-- Generate all the sublists of a list with a length no longer
-- than the given 'Int'.
sublistsLengthN :: Int -> [a] -> [[a]]
sublistsLengthN 0 _ = [[]]
sublistsLengthN n ns = (sublistsLengthN (n - 1) ns ++)
. filter (\x -> length x == n) . map (take n)
. takeWhile (not . null) . iterate tail $ ns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment