Skip to content

Instantly share code, notes, and snippets.

@rubenmoor
Created January 19, 2022 15:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rubenmoor/27f4e09766d9bafe9186fc67adae8156 to your computer and use it in GitHub Desktop.
Save rubenmoor/27f4e09766d9bafe9186fc67adae8156 to your computer and use it in GitHub Desktop.
parseWord, with bad speed-up
parseWord :: Text -> IO (Text, [(RawSteno, (PatternGroup key, Greediness))])
parseWord hyph =
case parseSeries triePrimitives hyph of
Right stenos -> pure (hyph, stenos)
Left pe -> (hyph, []) <$ case pe of
PEExceptionTable orig -> Text.putStrLn $
"Error in exception table for: " <> orig
PEParsec _ _ -> pure ()
PEImpossible str -> do
Text.putStrLn $ "Seemingly impossible: " <> str
appendLine fileDictNoParse
$ Text.unwords [word, hyph]
where
word = Text.replace "|" "" hyph
-- | Find steno chords for a given, hyphenated word, e.g. "Ge|sund|heit"
-- or return a parser error.
-- In case of success, provide the most efficient steno chords along with a
-- list of alternatives. E.g. steno that uses more chords or more letters,
-- or steno that uses the same number of chords or letters, but requires
-- higher greediness
parseSeries
:: forall key
. Palantype key
=> Trie [(Greediness, RawSteno, PatternGroup key)]
-> Text
-> Either ParseError [(RawSteno, (PatternGroup key, Greediness))]
parseSeries trie hyphenated = case Map.lookup unhyphenated (mapExceptions @key) of
Just raws -> sequence
( checkException . second (, 0) <$> raws
)
Nothing ->
let
-- calculate result for lower case word
eAcronym = runParser ((,) <$> acronym <*> getInput) () "" hyphenated
(hyphenated', isAcronym) = case eAcronym of
Right (syls, rem) -> (Text.intercalate "|" syls <> rem, True)
Left _ -> (hyphenated, False)
str = Text.encodeUtf8 $ toLower hyphenated'
st = State
{ stStrRawSteno = ""
, stNLetters = 0
, stNChords = 1
, stMFinger = Nothing
, stMLastKey = Nothing
, stMPatternGroup = Nothing
}
levels =
catMaybes
$ lsPatterns @key
<&> \(g, patterns) -> if any (`isInfixOf` str) patterns
then Just g
else Nothing
lsResultLc =
levels <&> \maxG ->
((maxG, False), ) $ optimizeStenoSeries trie maxG st str
lsResult
| isAcronym = second (mapSuccess $ addAcronymChord @key) <$> lsResultLc
| isCapitalized hyphenated =
let
lsNoCOpt =
second (mapSuccess $ addCapChord @key)
<$> lsResultLc
lsYesCOpt = first (second $ const True) <$> lsResultLc
in
lsNoCOpt ++ lsYesCOpt
| otherwise = lsResultLc
in
case sortOn (Down . uncurry scoreWithG) lsResult of
(_, Failure raw err) : _ -> Left $ PEParsec raw err
[] -> Left $ PEImpossible $ "Empty list for: " <> hyphenated
ls -> Right $ filterAlts $ snd <$> ls
where
checkException (raw, patG) = case parseWord @key raw of
Right chords -> Right (unparts (fromChord <$> chords), patG)
Left err -> Left $ PEExceptionTable
$ unhyphenated
<> ": "
<> unRawSteno raw
<> "; "
<> Text.pack (show err)
unhyphenated = replace "|" "" hyphenated
filterAlts
:: [Result (State key)]
-> [(RawSteno, (PatternGroup key, Greediness))]
filterAlts [] = []
filterAlts (Failure _ _ : as) = filterAlts as
filterAlts (Success state : as) =
let rawSteno = stStrRawSteno state
pg = case stMPatternGroup state of
Just patG -> patG
Nothing -> error "impossible: pattern group Nothing"
distinct (Failure _ _) = False
distinct (Success st2) = rawSteno /= stStrRawSteno st2
as'= filter distinct as
in (RawSteno rawSteno, pg) : filterAlts as'
-- | Try to fit as many letters as possible into a steno
-- chord (a chord contains keys that can be typed all at once).
-- The score of a chord is the number of letters it successfully
-- encoded, w/o the hyphenation symbol.
--
-- look at next character:
-- '|' -> do
-- consume character
-- (steno1, score1) = append '/', recursion with increased chord count and
-- (steno2, score2) = recursion
-- return steno with highest score
-- otherwise -> get matches from primtive trie
-- for every match:
-- consume and recursion with remaining string ...
-- ... increase letter count by match length
-- return steno with highest score
optimizeStenoSeries
:: forall key
. Palantype key
=> Trie [(Greediness, RawSteno, PatternGroup key)]
-> Greediness
-> State key
-> ByteString
-> Result (State key)
optimizeStenoSeries _ _ st "" = Success st
optimizeStenoSeries trie g st str | BS.head str == bsPipe =
let newState =
st { stStrRawSteno = stStrRawSteno st <> "/"
, stNChords = stNChords st + 1
, stMFinger = Nothing
, stMLastKey = Nothing
, stMPatternGroup =
max (Just (patSimpleMulti, 0)) $ stMPatternGroup st
}
str' = BS.tail str
r1 = optimizeStenoSeries trie g newState str'
r2 = optimizeStenoSeries trie g st str'
in maximumBy (comparing score) [r1, r2]
optimizeStenoSeries trie g st str =
let
matches = filterGreediness $ flatten $ Trie.matches trie str
matchToResult (consumed, (greediness, raw, pg), rem) =
case parseKey (greediness, raw)
(stMFinger st)
(stMLastKey st) of
Left err -> Failure raw err
Right (strRaw, (mFinger, mLK)) ->
let newState = State
{ stStrRawSteno = stStrRawSteno st <> strRaw
, stNLetters = stNLetters st + countLetters consumed
, stNChords = stNChords st + countChords strRaw
, stMFinger = mFinger
, stMLastKey = mLK
, stMPatternGroup =
max (Just (pg, greediness)) $ stMPatternGroup st
}
in optimizeStenoSeries trie g newState rem
results = case matches of
[] -> [Failure "" $ Parsec.newErrorUnknown (initialPos "")]
ms -> matchToResult <$> ms
in maximumBy (comparing score) results
where
filterGreediness
:: [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)]
-> [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)]
filterGreediness = filter (\(_, (g', _, _), _) -> g' <= g)
flatten
:: [(ByteString, [(Greediness, RawSteno, PatternGroup key)], ByteString)]
-> [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)]
flatten = mconcat . fmap expand
where
expand
:: (ByteString, [(Greediness, RawSteno, PatternGroup key)], ByteString)
-> [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)]
expand (c, rs, rem) = (c, , rem) <$> rs
parseKey
:: (Greediness, RawSteno)
-> Maybe Finger
-> Maybe key
-> Either Parsec.ParseError
(Text, (Maybe Finger, Maybe key))
parseKey (_, RawSteno s) mFinger' mLastKey' =
let
mFinger = mFinger'
mLastKey = mLastKey'
ePair = runParser ((,) <$> keysWithSlash <*> getState)
(mFinger, mLastKey) "" s
makeRawStr =
mconcat <<< intersperse "/" <<< fmap (mconcat <<< fmap showt)
in
first makeRawStr <$> ePair
keysWithSlash :: Parsec Text (Maybe Finger, Maybe key) [[key]]
keysWithSlash =
sepBy1 keys (char '/' *> setState (Nothing, Nothing)) <* eof
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment