Last active
February 26, 2024 14:25
-
-
Save mildsunrise/78e414d4c64f4536ec6e2498c49cf32a to your computer and use it in GitHub Desktop.
Haskell implementation of SA-IS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE TupleSections #-} | |
import Data.Array (accumArray, listArray, elems) | |
import Data.List.Extra (findIndices, groupSortOn, chunksOf) | |
import Data.Vector ((!), toList) | |
import Control.Monad (forM_, when, zipWithM) | |
import qualified Data.Vector as Vec | |
import qualified Data.Vector.Mutable as MVec | |
fill v = zipWithM (MVec.write v) [0..MVec.length v - 1] | |
indices = (\l -> [0..l-1]) . Vec.length | |
revIndices = (\l -> [l-1,l-2..0]) . Vec.length | |
toReverseList v = map (v !) (revIndices v) | |
fromReverseListN n = Vec.reverse . Vec.fromListN n | |
pairsWith f v = zipWith f v (tail v) | |
-- SUFFIX CLASSIFICATION | |
withKinds v = | |
fromReverseListN (Vec.length v) $ | |
zip (toReverseList v) $ | |
map (> EQ) $ | |
scanl (flip (<>)) GT $ | |
pairsWith compare $ | |
toReverseList v | |
uwuPoints t = | |
Vec.fromList $ | |
(++ [Vec.length t - 1]) $ | |
map succ $ | |
findIndices id $ | |
pairsWith (<) $ | |
map snd $ | |
toList t | |
-- DIVIDE/CONQUER PHASE | |
buildText uwu = | |
(uwu Vec.//) . | |
concat . | |
zipWith (\i -> map (,i)) [0..] | |
uwuArgSort uwu gs = | |
if alen == Vec.length uwu | |
then map head gs | |
else toList $ saIs alen $ buildText uwu gs | |
where alen = length gs | |
sortedUwu t = | |
map (uwu' !) $ | |
uwuArgSort uwu' $ | |
groupSortOn key (indices uwu') | |
where | |
uwu = uwuPoints t | |
uwu' = Vec.init uwu | |
key i = map (t !) [uwu ! i .. uwu ! i+1] | |
-- COMBINE PHASE | |
bucketBoundaries alen = | |
scanl (+) 0 . | |
elems . | |
accumArray (+) 0 range . | |
map (,1) . | |
toList | |
where | |
range = ((0, False), (alen-1, True)) | |
getPtrs b = map head . chunksOf 2 . drop b | |
combine alen t uwu = Vec.create $ do | |
sa <- MVec.replicate (Vec.length t) (-1) | |
ptr <- MVec.new alen | |
let | |
putSuffix o d e a = | |
when (o == o') $ MVec.modifyM ptr f c | |
where | |
(c, o') = t ! a | |
f p = p+e <$ MVec.write sa (p+d) a | |
step o d e r = forM_ r $ \i -> do | |
a <- MVec.read sa i | |
when (a > 0) $ putSuffix o d e (a - 1) | |
let bbs = bucketBoundaries alen t | |
let initPtrs b = fill ptr $ getPtrs b bbs | |
initPtrs 1 >> forM_ uwu (putSuffix True 0 1) | |
initPtrs 0 >> step False 0 1 (indices t) | |
initPtrs 2 >> step True (-1) (-1) (revIndices t) | |
return sa | |
-- ENTRY POINT | |
saIs alen v = combine alen t $ sortedUwu t | |
where t = withKinds v |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment