Last active
February 25, 2024 15:41
-
-
Save paolino/36d6cbc0a01ff4782dd66b36cda45e92 to your computer and use it in GitHub Desktop.
a replace that streams
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BlockArguments #-} | |
{-# LANGUAGE NumericUnderscores #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
import Prelude | |
import Data.Functor (Identity (runIdentity)) | |
import Control.Monad.Fix (fix) | |
import Data.Bifunctor (Bifunctor (..), second) | |
import Data.List (tails) | |
import Data.List.NonEmpty (NonEmpty (..), toList) | |
import Data.Maybe (fromMaybe) | |
import System.IO (IOMode (..), hClose, openFile) | |
import qualified Data.List.NonEmpty as NE | |
import qualified Data.Text as T | |
import qualified Data.Text.IO as T | |
type NonEmptyString = NonEmpty Char | |
type Tries = [NonEmptyString] | |
tries :: Maybe String -> String -> Tries | |
tries Nothing [] = [] | |
tries mfront xs = fmap NE.fromList $ init $ tails $ fromMaybe "" mfront <> xs | |
-- when there is enough input to decide | |
-- we return the boloean to riport success or failure | |
-- when there is not enough input to dedide | |
-- we return the part of the pattern that matched | |
match | |
:: String -- pattern | |
-> String -- input | |
-> Either Bool String -- success or failure or partial match | |
match [] _ = Left True | |
match _ [] = Right [] | |
match (x : xs) (y : ys) | |
| x == y = error "TODO" | |
| otherwise = Left False | |
-- matching function will cut the matching on the first partial match | |
-- and return the leftovers | |
-- this is ok because the leftovers will be fed back to the next chunk | |
matching | |
:: String -- pattern | |
-> String -- replacement | |
-> Tries -- inputs | |
-> (String, Maybe String) -- result and leftovers | |
matching _pattern _replace [] = ([], Nothing) | |
matching pattern' replacement (x : xs) = error "TODO" | |
where | |
lp = length pattern' - 1 | |
replace | |
:: Maybe String -- old leftovers | |
-> String -- pattern | |
-> String -- replacement | |
-> String -- input | |
-> (String, Maybe String) -- result and leftovers | |
replace old pattern' replace' = matching pattern' replace' . tries old | |
-- a state machine to track the state of a replace operation | |
data ReplaceState = ReplaceState | |
{ _feed :: String -> (String, ReplaceState) -- feed a chunk of input | |
, _dump :: Maybe String -- dump the matched but not replaced input | |
} | |
-- a smart constructor to create a ReplaceState from a pattern and a replacement | |
mkReplaceState | |
:: String -- pattern | |
-> String -- replacement | |
-> ReplaceState -- initial state | |
mkReplaceState pattern' replace' = error "TODO" | |
-- a stream of chunks of data where each chunk is a string and a continuation to | |
-- get the next chunk | |
data Stream m = Stream (m (String, Stream m)) | Done | |
-- stream producer from a file | |
inputFromFile :: FilePath -> Stream IO | |
inputFromFile path = Stream $ do | |
handle <- openFile path ReadMode | |
let get = error "TODO" | |
get | |
-- stream transformer that replace over a stream of chunks | |
replaceInStream :: Monad m => ReplaceState -> Stream m -> Stream m | |
replaceInStream s (Done) = Stream $ pure (fromMaybe "" (_dump s), Done) | |
replaceInStream s (Stream input) = Stream $ do | |
error "TODO" | |
-- stream consumer that dump the chunks to a file and counts the chunks | |
outputToFile :: FilePath -> Stream IO -> IO Int | |
outputToFile path input = do | |
handle <- openFile path WriteMode | |
let go = error "TODO" | |
go 0 input | |
--- Tests --- | |
spec :: (Show a) => (a -> a -> Bool) -> a -> a -> IO () | |
spec t x y | |
| t x y = pure () | |
| otherwise = | |
error | |
$ "Assertion failed: " <> show x <> " == " <> show y | |
specEq :: (Show a, Eq a) => a -> a -> IO () | |
specEq = spec (==) | |
specIO :: Show a => (a -> a -> Bool) -> IO a -> a -> IO () | |
specIO f x y = do | |
x' <- x | |
spec f x' y | |
specEqIO :: (Eq a, Show a) => IO a -> a -> IO () | |
specEqIO = specIO (==) | |
inputFromList :: Applicative m => [String] -> Stream m | |
inputFromList = go | |
where | |
go [] = Done | |
go (x : xs) = Stream $ pure (x, go xs) | |
outputToString :: Monad m => Stream m -> m String | |
outputToString Done = pure "" | |
outputToString (Stream s) = do | |
(x, next) <- s | |
(x <>) <$> outputToString next | |
streamTest :: ReplaceState -> [String] -> String | |
streamTest r = runIdentity . outputToString . replaceInStream r . inputFromList | |
streamTestFile :: ReplaceState -> FilePath -> FilePath -> IO Int | |
streamTestFile r input output = | |
outputToFile output $ replaceInStream r $ inputFromFile input | |
test :: IO () | |
test = do | |
let replacer = mkReplaceState "foo" "bar" | |
specEq | |
do streamTest replacer ["foo"] | |
do "bar" | |
specEq | |
do streamTest replacer $ replicate 100 "foo" | |
do concat $ replicate 100 "bar" | |
specEq | |
do streamTest replacer ["f", "o", "o"] | |
do "bar" | |
specEq | |
do streamTest replacer ["f", "oof", "oo"] | |
do "barbar" | |
specIO | |
do \(f, n) (f', n') -> f == f' && n > n' | |
do | |
let replacerAbba = mkReplaceState "abbababbaba" "bar" | |
T.writeFile "foo.txt" $ T.replicate 1_000_000 "abbababbaba" | |
n <- streamTestFile replacerAbba "foo.txt" "bar.txt" | |
f <- T.readFile "bar.txt" | |
pure (f, n) | |
do | |
(T.replicate 1_000_000 "bar", 1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment