Skip to content

Instantly share code, notes, and snippets.

@IronGremlin
Created June 16, 2017 17:25
Show Gist options
  • Save IronGremlin/f037823daa1a1febbb2e2382cd64eff7 to your computer and use it in GitHub Desktop.
Save IronGremlin/f037823daa1a1febbb2e2382cd64eff7 to your computer and use it in GitHub Desktop.
conduitMMapParser
passesPredicate :: (POExpinDoc -> Bool) -> (Either ParseError POExpinDoc -> Bool)
passesPredicate p = either (\_ -> True) (p)
both :: (a->c) -> (b->c) -> Either a b -> Either c c
both fl fr inp =
case inp of
Left l -> Left (fl $! l)
Right r -> Right (fr $! r)
loadFromFile :: (ACustomDoc -> Bool)-> [FilePath] -> Format -> IO (V.Vector (CString,Int,Int))
{-# INLINE loadFromFile #-}
loadFromFile fPred iPath format = do
let pred = passesPredicate $! fPred
let xmap = case format of
Wrapped -> DC.lines .| D.filter isDoctype .| D.concatMapAccum unWrap False
Unwrapped -> D.map id
res <- runConduitRes $
(sequence_ $ map D.sourceFile $ iPath)
.| D.filterE (/= (C.toWord8 '\r'))
.| xmap
.| conduitParserEither (p_UWrapped)
.| D.map (fmap snd)
.| D.filter pred
.| D.map (both (B.toStrict . B.pack . show) (showRaw))
.| forkBS
act <- mapFile res
return act
projectPositions :: MonadIO m => (Either ByteString ByteString) -> Int -> m (Int, [(Int,Int)])
projectPositions inp eofM = do
case inp of
Right pex -> do
let propLen = C.length pex
let eofM' = eofM + propLen
return $! (eofM', [(eofM,propLen)])
Left _ -> do
return $! (eofM, [])
mapFile :: (V.Vector (Int,Int)) -> IO (V.Vector (CString,Int,Int))
mapFile inVec = do
let totalSize = V.sum . V.map snd $ inVec
(ptr, _ , _ , _) <- mmapFilePtr "./outputTempFile" ReadOnly (Just (0,totalSize))
return (V.map (\(x,y) -> (ptr,x,y)) inVec)
left :: Either l r -> Maybe l
left = either Just (const Nothing)
right :: Either l r -> Maybe r
right = either (const Nothing) Just
forkBS :: MonadResource m => Sink (Either ByteString ByteString) m (V.Vector (Int,Int))
forkBS =
getZipSink $
( ZipSink (D.concatMap left .| D.sinkFileBS "./Errors.log")
*> ZipSink (D.concatMap right .| D.sinkFileBS "./outputTempFile")
*> ZipSink ( D.concatMapAccumM projectPositions 0
.| D.foldMapM (return . V.singleton) ) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment