Created
January 19, 2022 15:11
-
-
Save rubenmoor/27f4e09766d9bafe9186fc67adae8156 to your computer and use it in GitHub Desktop.
parseWord, with bad speed-up
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parseWord :: Text -> IO (Text, [(RawSteno, (PatternGroup key, Greediness))]) | |
parseWord hyph = | |
case parseSeries triePrimitives hyph of | |
Right stenos -> pure (hyph, stenos) | |
Left pe -> (hyph, []) <$ case pe of | |
PEExceptionTable orig -> Text.putStrLn $ | |
"Error in exception table for: " <> orig | |
PEParsec _ _ -> pure () | |
PEImpossible str -> do | |
Text.putStrLn $ "Seemingly impossible: " <> str | |
appendLine fileDictNoParse | |
$ Text.unwords [word, hyph] | |
where | |
word = Text.replace "|" "" hyph | |
-- | Find steno chords for a given, hyphenated word, e.g. "Ge|sund|heit" | |
-- or return a parser error. | |
-- In case of success, provide the most efficient steno chords along with a | |
-- list of alternatives. E.g. steno that uses more chords or more letters, | |
-- or steno that uses the same number of chords or letters, but requires | |
-- higher greediness | |
parseSeries | |
:: forall key | |
. Palantype key | |
=> Trie [(Greediness, RawSteno, PatternGroup key)] | |
-> Text | |
-> Either ParseError [(RawSteno, (PatternGroup key, Greediness))] | |
parseSeries trie hyphenated = case Map.lookup unhyphenated (mapExceptions @key) of | |
Just raws -> sequence | |
( checkException . second (, 0) <$> raws | |
) | |
Nothing -> | |
let | |
-- calculate result for lower case word | |
eAcronym = runParser ((,) <$> acronym <*> getInput) () "" hyphenated | |
(hyphenated', isAcronym) = case eAcronym of | |
Right (syls, rem) -> (Text.intercalate "|" syls <> rem, True) | |
Left _ -> (hyphenated, False) | |
str = Text.encodeUtf8 $ toLower hyphenated' | |
st = State | |
{ stStrRawSteno = "" | |
, stNLetters = 0 | |
, stNChords = 1 | |
, stMFinger = Nothing | |
, stMLastKey = Nothing | |
, stMPatternGroup = Nothing | |
} | |
levels = | |
catMaybes | |
$ lsPatterns @key | |
<&> \(g, patterns) -> if any (`isInfixOf` str) patterns | |
then Just g | |
else Nothing | |
lsResultLc = | |
levels <&> \maxG -> | |
((maxG, False), ) $ optimizeStenoSeries trie maxG st str | |
lsResult | |
| isAcronym = second (mapSuccess $ addAcronymChord @key) <$> lsResultLc | |
| isCapitalized hyphenated = | |
let | |
lsNoCOpt = | |
second (mapSuccess $ addCapChord @key) | |
<$> lsResultLc | |
lsYesCOpt = first (second $ const True) <$> lsResultLc | |
in | |
lsNoCOpt ++ lsYesCOpt | |
| otherwise = lsResultLc | |
in | |
case sortOn (Down . uncurry scoreWithG) lsResult of | |
(_, Failure raw err) : _ -> Left $ PEParsec raw err | |
[] -> Left $ PEImpossible $ "Empty list for: " <> hyphenated | |
ls -> Right $ filterAlts $ snd <$> ls | |
where | |
checkException (raw, patG) = case parseWord @key raw of | |
Right chords -> Right (unparts (fromChord <$> chords), patG) | |
Left err -> Left $ PEExceptionTable | |
$ unhyphenated | |
<> ": " | |
<> unRawSteno raw | |
<> "; " | |
<> Text.pack (show err) | |
unhyphenated = replace "|" "" hyphenated | |
filterAlts | |
:: [Result (State key)] | |
-> [(RawSteno, (PatternGroup key, Greediness))] | |
filterAlts [] = [] | |
filterAlts (Failure _ _ : as) = filterAlts as | |
filterAlts (Success state : as) = | |
let rawSteno = stStrRawSteno state | |
pg = case stMPatternGroup state of | |
Just patG -> patG | |
Nothing -> error "impossible: pattern group Nothing" | |
distinct (Failure _ _) = False | |
distinct (Success st2) = rawSteno /= stStrRawSteno st2 | |
as'= filter distinct as | |
in (RawSteno rawSteno, pg) : filterAlts as' | |
-- | Try to fit as many letters as possible into a steno | |
-- chord (a chord contains keys that can be typed all at once). | |
-- The score of a chord is the number of letters it successfully | |
-- encoded, w/o the hyphenation symbol. | |
-- | |
-- look at next character: | |
-- '|' -> do | |
-- consume character | |
-- (steno1, score1) = append '/', recursion with increased chord count and | |
-- (steno2, score2) = recursion | |
-- return steno with highest score | |
-- otherwise -> get matches from primtive trie | |
-- for every match: | |
-- consume and recursion with remaining string ... | |
-- ... increase letter count by match length | |
-- return steno with highest score | |
optimizeStenoSeries | |
:: forall key | |
. Palantype key | |
=> Trie [(Greediness, RawSteno, PatternGroup key)] | |
-> Greediness | |
-> State key | |
-> ByteString | |
-> Result (State key) | |
optimizeStenoSeries _ _ st "" = Success st | |
optimizeStenoSeries trie g st str | BS.head str == bsPipe = | |
let newState = | |
st { stStrRawSteno = stStrRawSteno st <> "/" | |
, stNChords = stNChords st + 1 | |
, stMFinger = Nothing | |
, stMLastKey = Nothing | |
, stMPatternGroup = | |
max (Just (patSimpleMulti, 0)) $ stMPatternGroup st | |
} | |
str' = BS.tail str | |
r1 = optimizeStenoSeries trie g newState str' | |
r2 = optimizeStenoSeries trie g st str' | |
in maximumBy (comparing score) [r1, r2] | |
optimizeStenoSeries trie g st str = | |
let | |
matches = filterGreediness $ flatten $ Trie.matches trie str | |
matchToResult (consumed, (greediness, raw, pg), rem) = | |
case parseKey (greediness, raw) | |
(stMFinger st) | |
(stMLastKey st) of | |
Left err -> Failure raw err | |
Right (strRaw, (mFinger, mLK)) -> | |
let newState = State | |
{ stStrRawSteno = stStrRawSteno st <> strRaw | |
, stNLetters = stNLetters st + countLetters consumed | |
, stNChords = stNChords st + countChords strRaw | |
, stMFinger = mFinger | |
, stMLastKey = mLK | |
, stMPatternGroup = | |
max (Just (pg, greediness)) $ stMPatternGroup st | |
} | |
in optimizeStenoSeries trie g newState rem | |
results = case matches of | |
[] -> [Failure "" $ Parsec.newErrorUnknown (initialPos "")] | |
ms -> matchToResult <$> ms | |
in maximumBy (comparing score) results | |
where | |
filterGreediness | |
:: [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)] | |
-> [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)] | |
filterGreediness = filter (\(_, (g', _, _), _) -> g' <= g) | |
flatten | |
:: [(ByteString, [(Greediness, RawSteno, PatternGroup key)], ByteString)] | |
-> [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)] | |
flatten = mconcat . fmap expand | |
where | |
expand | |
:: (ByteString, [(Greediness, RawSteno, PatternGroup key)], ByteString) | |
-> [(ByteString, (Greediness, RawSteno, PatternGroup key), ByteString)] | |
expand (c, rs, rem) = (c, , rem) <$> rs | |
parseKey | |
:: (Greediness, RawSteno) | |
-> Maybe Finger | |
-> Maybe key | |
-> Either Parsec.ParseError | |
(Text, (Maybe Finger, Maybe key)) | |
parseKey (_, RawSteno s) mFinger' mLastKey' = | |
let | |
mFinger = mFinger' | |
mLastKey = mLastKey' | |
ePair = runParser ((,) <$> keysWithSlash <*> getState) | |
(mFinger, mLastKey) "" s | |
makeRawStr = | |
mconcat <<< intersperse "/" <<< fmap (mconcat <<< fmap showt) | |
in | |
first makeRawStr <$> ePair | |
keysWithSlash :: Parsec Text (Maybe Finger, Maybe key) [[key]] | |
keysWithSlash = | |
sepBy1 keys (char '/' *> setState (Nothing, Nothing)) <* eof |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment