Created
October 7, 2016 20:40
-
-
Save jeremysinger/667b9c39b9d565f0bdabc03a8c44d60b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char | |
x = "the quick brown fox jumps over the lazy dog" | |
y = "it was the best of times, it was the worst of times" | |
z = "to be or not to be" | |
bigrams :: [Char] -> [(Char,Char)] | |
-- generate a list of bigram pairs for a string | |
bigrams sentence = removeNonAlphaChars $ zip sentence $ tail sentence | |
removeNonAlphaChars :: [(Char,Char)] -> [(Char,Char)] | |
-- filter out bigrams that contain spaces, punctuation, etc | |
removeNonAlphaChars = filter $ \(x,y) -> isLetter x && isLetter y | |
insert :: Eq a => a -> [(a,Int)] -> [(a,Int)] | |
-- the list counts occurrences of a's | |
-- NOTE would be better to do this with a Data.Map | |
insert x [] = [(x,1)] | |
insert x ((y,count):rest) = if x==y then (y,count+1):rest | |
else (y,count):(insert x rest) | |
freqlist :: Eq a => [a] -> [(a,Int)] | |
-- take list of elements, produce list of (element,count) pairs | |
-- NOTE would be better to do this with a Data.Map | |
freqlist elts = foldl (\lst elt -> insert elt lst) [] elts | |
-- now try something like: | |
-- freqlist $ bigrams z |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
and it would be nicer to define a newtype for Bigram too