Skip to content

Instantly share code, notes, and snippets.

@zarkzork
Last active December 17, 2015 15:59
Show Gist options
  • Save zarkzork/5635284 to your computer and use it in GitHub Desktop.
Save zarkzork/5635284 to your computer and use it in GitHub Desktop.
simple haskell indexed search for files.
import Data.List
import Data.Map
import System.Environment
import Control.Monad
import System.IO
-- returns triagrams for given string
splitIntoTrigrams :: String -> [String]
splitIntoTrigrams (a : b : c : ls) = (a : b : c : []) : splitIntoTrigrams (b : c : ls)
splitIntoTrigrams (b : c : []) = []
splitIntoTrigrams (c : []) = []
splitIntoTrigrams [] = []
-- Valid symbols for searching
validSymbol :: Char -> Bool
validSymbol = (`elem` (['0'..'9'] ++ ['A' .. 'Z'] ++ ['a' .. 'z']))
-- Extract valid tokens (words) from string
alphanumWordsFromString :: String -> [String]
alphanumWordsFromString [] = []
alphanumWordsFromString s = let (word, rest) = breakValid s in word : (alphanumWordsFromString rest)
where breakValid = (break notValid) . (dropWhile notValid)
notValid = not . validSymbol
-- All valid triagrams for given line of input
trigramsFromLine :: String -> [String]
trigramsFromLine s = concat $ splitIntoTrigrams `fmap` (alphanumWordsFromString s)
-- Create Map containing all triagrams from sting to passed value
buildTriagramsMapFromString :: String -> v -> Map String v
buildTriagramsMapFromString s v = Data.Map.fromList $ zip (trigramsFromLine s) (repeat v)
-- Main index for searching for files
type Index = Map String [(String, Int, String)]
-- Empty index, literally
emptyIndex :: Index
emptyIndex = empty
-- Add file contents to given index
addToIndex :: Index -> String -> String -> Index
addToIndex index filename fileData = Data.List.foldl' accF index $ zip (lines fileData) [1..]
where accF acc (s, num) = unionWith (++) acc $ buildTriagramsMapFromString s [(filename, num, s)]
-- Seach index for passed line
searchIndex :: Index -> String -> Maybe [(String, Int, String)]
searchIndex i "" = Nothing
searchIndex i s = purify $ (Data.List.foldr1 intersect) `fmap` sequence ((`Data.Map.lookup` i) `fmap` trigramsFromLine s)
where purify (Just []) = Nothing
purify i = i
showResult :: Maybe [(String, Int, String)] -> String
showResult Nothing = "Nothing found, sorry."
showResult (Just a) = Data.List.foldl' f "" a
where f acc (filename, linenum, string) = acc ++ filename ++ ":" ++ (show linenum) ++ " " ++ string ++ "\n"
main = do
filenames <- System.Environment.getArgs
contents <- sequence $ readFile `fmap` filenames
let index = Data.List.foldr (\ (filename, fileData) i -> addToIndex i filename fileData) emptyIndex $ zip filenames contents
forever $ do
putStr "search term: "
hFlush stdout
line <- getLine
putStrLn $ showResult $ searchIndex index line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment