Skip to content

Instantly share code, notes, and snippets.

@mildsunrise
Last active February 26, 2024 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mildsunrise/78e414d4c64f4536ec6e2498c49cf32a to your computer and use it in GitHub Desktop.
Save mildsunrise/78e414d4c64f4536ec6e2498c49cf32a to your computer and use it in GitHub Desktop.
Haskell implementation of SA-IS
{-# LANGUAGE TupleSections #-}
import Data.Array (accumArray, listArray, elems)
import Data.List.Extra (findIndices, groupSortOn, chunksOf)
import Data.Vector ((!), toList)
import Control.Monad (forM_, when, zipWithM)
import qualified Data.Vector as Vec
import qualified Data.Vector.Mutable as MVec
fill v = zipWithM (MVec.write v) [0..MVec.length v - 1]
indices = (\l -> [0..l-1]) . Vec.length
revIndices = (\l -> [l-1,l-2..0]) . Vec.length
toReverseList v = map (v !) (revIndices v)
fromReverseListN n = Vec.reverse . Vec.fromListN n
pairsWith f v = zipWith f v (tail v)
-- SUFFIX CLASSIFICATION
withKinds v =
fromReverseListN (Vec.length v) $
zip (toReverseList v) $
map (> EQ) $
scanl (flip (<>)) GT $
pairsWith compare $
toReverseList v
uwuPoints t =
Vec.fromList $
(++ [Vec.length t - 1]) $
map succ $
findIndices id $
pairsWith (<) $
map snd $
toList t
-- DIVIDE/CONQUER PHASE
buildText uwu =
(uwu Vec.//) .
concat .
zipWith (\i -> map (,i)) [0..]
uwuArgSort uwu gs =
if alen == Vec.length uwu
then map head gs
else toList $ saIs alen $ buildText uwu gs
where alen = length gs
sortedUwu t =
map (uwu' !) $
uwuArgSort uwu' $
groupSortOn key (indices uwu')
where
uwu = uwuPoints t
uwu' = Vec.init uwu
key i = map (t !) [uwu ! i .. uwu ! i+1]
-- COMBINE PHASE
bucketBoundaries alen =
scanl (+) 0 .
elems .
accumArray (+) 0 range .
map (,1) .
toList
where
range = ((0, False), (alen-1, True))
getPtrs b = map head . chunksOf 2 . drop b
combine alen t uwu = Vec.create $ do
sa <- MVec.replicate (Vec.length t) (-1)
ptr <- MVec.new alen
let
putSuffix o d e a =
when (o == o') $ MVec.modifyM ptr f c
where
(c, o') = t ! a
f p = p+e <$ MVec.write sa (p+d) a
step o d e r = forM_ r $ \i -> do
a <- MVec.read sa i
when (a > 0) $ putSuffix o d e (a - 1)
let bbs = bucketBoundaries alen t
let initPtrs b = fill ptr $ getPtrs b bbs
initPtrs 1 >> forM_ uwu (putSuffix True 0 1)
initPtrs 0 >> step False 0 1 (indices t)
initPtrs 2 >> step True (-1) (-1) (revIndices t)
return sa
-- ENTRY POINT
saIs alen v = combine alen t $ sortedUwu t
where t = withKinds v
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment