Skip to content

Instantly share code, notes, and snippets.

@DKurilo
Last active February 11, 2020 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DKurilo/611d736cfb76947a5684a8c8470967b9 to your computer and use it in GitHub Desktop.
Save DKurilo/611d736cfb76947a5684a8c8470967b9 to your computer and use it in GitHub Desktop.
-- to add colors like dark-green or dark\ngreen in this algorithm you need to add each different phrase
-- as sparate phrase or to modify AhoCorassick to use wildcards. It's possible and even easy to do it.
-- But you need to change package.
module Main where
import Data.Char (isAlphaNum)
import Data.List.Split (splitOn)
import qualified Data.Map as M
import System.Environment
import System.IO
import Text.AhoCorasick
processStringWithMachine :: StateMachine Char String -> M.Map String String -> String -> String
processStringWithMachine sm m cs = (lastStep . foldl go ("", 0, Position 0 0 "", cs)) ps
where ps = findAll sm cs
go :: (String, Int, Position String, String) -> Position String -> (String, Int, Position String, String)
go (cs', cursor, lastPos, rest) pos
| pIndex pos > cursor' && wholeWord = (cs' ++ take (pIndex lastPos - cursor) rest
++ (replace . pVal) lastPos
, cursor'
, pos
, drop (cursor' - cursor) rest
) -- next word, applying previous
| pIndex pos == pIndex lastPos && pLength pos > pLength lastPos && wholeWord =
(cs', cursor, pos, rest) -- new match is longer, use it
| otherwise = (cs', cursor, lastPos, rest) -- just skip this match
where cursor' = pIndex lastPos + pLength lastPos
wholeWord = isWordBoundary rest (pIndex pos - cursor - 1) -- before phrase
&& isWordBoundary rest (pIndex pos + pLength pos - cursor) -- after phrase
lastStep :: (String, Int, Position String, String) -> String
lastStep (cs', cursor, pos, rest) = cs'
++ take (pIndex pos - cursor) rest
++ (replace . pVal) pos
++ drop (pIndex pos - cursor + pLength pos) rest
replace :: String -> String
replace "" = ""
replace cs' = case cs' `M.lookup` m of
Just color -> "<span class=\"color\" style=\"color: " ++ color ++ "\">" ++ cs' ++ "</span>"
_ -> cs'
isWordBoundary :: String -> Int -> Bool
isWordBoundary cs n
| n < 0 || n >= length cs = True
| otherwise = (not . isAlphaNum) c
where c = cs !! n
main :: IO ()
main = do
colorMap <- M.fromList
. map (\(w1: w2: _) -> (w1, w2)) . filter (\ws -> length ws > 1)
. map (splitOn "\t") . filter (\cs -> (not . null) cs && head cs /= '#') . lines
<$> readFile "./rgb.txt"
let acMachine = (makeSimpleStateMachine . M.keys) colorMap
filename: _ <- getArgs
updatedString <- processStringWithMachine acMachine colorMap <$> readFile filename
writeFile (filename ++ ".out") updatedString
{ nixpkgs ? import <nixpkgs> {} }:
let
inherit (nixpkgs) pkgs;
myAhoCorasickSrc = pkgs.fetchFromGitHub {
owner = "stackbuilders";
repo = "AhoCorasick";
rev = "9a825aef5d19c707d2306befca688a1a72d50bb0";
sha256 = "1hgpbiqslqskrbgkv60vdzbak7sg4kxhi8qvghfw6fnngxd8sdb1";
};
myHaskellPackages = pkgs.haskellPackages.override {
overrides = self: super: with pkgs.haskell.lib; {
AhoCorasick = super.callCabal2nix "AhoCorasick" myAhoCorasickSrc {};
};
};
ghc = myHaskellPackages.ghcWithPackages (ps: with ps; [
containers
split
AhoCorasick
]);
in
pkgs.mkShell {
name = "color-highlighter-annotator-env";
buildInputs = [ ghc ];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment