Created
August 28, 2015 14:17
-
-
Save unhammer/137e62e988ab82cc6fb4 to your computer and use it in GitHub Desktop.
find possible pangrams
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
{-# OPTIONS_GHC -fwarn-incomplete-patterns #-} | |
import Data.List | |
import Data.List.Split | |
import System.Environment | |
import qualified Data.Text.Lazy as T | |
import qualified Data.Text.Lazy.IO as TIO | |
-- assumes sorted input | |
difference [] b = ([],b) | |
difference a [] = (a, []) | |
difference (a:as) (b:bs) | a == b = | |
difference as bs | |
difference (a:as) (b:bs) | a < b = | |
let (a1,b1) = difference as (b:bs) in (a:a1,b1) | |
difference (a:as) (b:bs) | otherwise = | |
let (a1,b1) = difference (a:as) bs in (a1,b:b1) | |
-- assumes letters is sorted | |
grab letters [] = Nothing | |
grab letters ((w,sw):ws) = | |
let (lettersleft, wordleft) = difference letters sw in | |
case wordleft of | |
"" -> Just (lettersleft, w, ws) | |
_ -> grab letters ws | |
-- language-specific filtering ahoy: | |
vowels = "aeiouyæøå" | |
goodwords letters words = filter good words | |
where good (w,sw) = | |
case (w, difference letters sw) of | |
("a", (_,"")) -> True | |
("i", (_,"")) -> True | |
("å", (_,"")) -> True | |
(_, (_,"")) | T.length w > 1 -> T.any (`elem` vowels) w | |
otherwise -> False | |
pangram' :: String -> [(T.Text, String)] -> Maybe [[T.Text]] | |
pangram' [] _ = Just [] | |
pangram' (_:_) [] = Nothing | |
pangram' letters words = do | |
(lettersleft, w, wordsleft) <- grab letters words | |
case (pangram' lettersleft wordsleft, pangram' letters wordsleft) of | |
(Just wrest, Just withoutw) -> return $ (prependw w wrest)++withoutw | |
(Just wrest, Nothing) -> return $ prependw w wrest | |
(Nothing, Just withoutw) -> return withoutw | |
(Nothing, Nothing) -> Nothing | |
where prependw w [] = [[w]] | |
prependw w ws = map (w:) ws | |
pangrams :: String -> [T.Text] -> Maybe [[T.Text]] | |
pangrams letters words = | |
let letters_su = nub $ sort letters | |
words_s = map (\w -> (w, sort $ T.unpack w)) words | |
words_su = nub $ sort $ goodwords letters_su words_s | |
in | |
pangram' letters_su words_su | |
readwords path = do | |
text <- TIO.readFile path | |
let lines = T.splitOn "\n" text | |
cols = map (T.splitOn "\t") lines | |
forms = map (!!2) $ filter (\l -> length l == 6) cols | |
in | |
return forms | |
pangramsToString Nothing = return () | |
pangramsToString (Just ps) = | |
mapM_ TIO.putStrLn $ map (T.intercalate "\t") ps | |
main :: IO () | |
main = do | |
[letters,path] <- System.Environment.getArgs | |
w <- readwords path | |
let ps = pangrams letters w | |
in pangramsToString ps | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
svn export --force "svn://svn.savannah.nongnu.org/ordbanken/trunk/"fullform_nn.txt | |
ghc -O2 pangrams.hs | |
./pangrams abcdefghijkl fullform_nn.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment