Heimdell/Lexer.hs

## Lexer.hs
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Redundant multi-way if" #-}

{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveTraversable #-}

{- | Implementation of Thompson scanner (regexp).

     Mostly taken verbatim from
     https://www.cs.kent.ac.uk/people/staff/sjt/craft2e/regExp.pdf.

     Higly unefficient in generation of DFA, slightly inefficient in matching
     (we're calling `<=` on `Set Int` here multiple times per character).

-}

module Lexer where

import Control.Monad.Except (throwError)
import Control.Monad.State ( runState, evalState, get, put, State )
import Data.Map (Map)
import Data.Map qualified as Map
import Data.Maybe (listToMaybe)
import Data.Set (Set)
import Data.Set qualified as Set
import Data.String (IsString)
import GHC.Exts (IsString(fromString))

import Debug.Trace (traceShowM)

-- | Regular expression.
--
data Reg a
  = Eps                   -- ^ Empty string
  | Lit  a                -- ^ One char
  | And  (Reg a) (Reg a)  -- ^ Sequence
  | Or   (Reg a) (Reg a)  -- ^ Selection
  | Star (Reg a)          -- ^ Kleene star (0 or more repeats)
  deriving stock (Eq, Ord, Functor, Foldable)

-- | Alphabet - set of all used characters ("literals").
--
literals :: Ord a => Reg a -> Set a
literals = foldMap Set.singleton

-- | ToString().
--
instance Show (Reg Char) where
  show :: Reg Char -> String
  show = \case
    Eps           -> ""
    Lit  c        -> [c]
    And  reg reg' -> show reg <> show reg'
    Or   reg reg' -> "(" <> show reg <> "|" <> show reg' <> ")"
    Star reg      -> "(" <> show reg <> ")*"

-- | FromString().
--
instance IsString (Reg Char) where
  fromString :: String -> Reg Char
  fromString = foldr (And . Lit) Eps

-- | Indetermistic finite automaton.
--
data NFA c s = NFA
  { nfaStates :: Set s           -- ^ All automaton states
  , nfaMoves  :: Set (Move c s)  -- ^ All automaton moves
  , nfaStart  :: s               -- ^ Starting (current) state
  , nfaEnds   :: Set s           -- ^ All ending states
  }
  deriving stock (Eq, Ord, Show, Foldable)

-- | Automaton moves.
--
data Move c s
  = Move  s c s  -- ^ Move on given literal (character)
  | EMove s   s  -- ^ Non-labeled transition (thus nondeterministic)
  deriving stock (Eq, Ord, Show, Functor, Foldable, Traversable)

-- | Closure of the first state for given alphabet.
--
trans :: (Ord s, Eq c) => NFA c s -> Set c -> Set s
trans nfa = foldr (oneTrans nfa) startset
  where
    startset = closure nfa (Set.singleton (nfaStart nfa))

-- | Closure on the state that happens if we accept this char in this state set.
--
oneTrans :: (Ord s, Eq c) => NFA c s -> c -> Set s -> Set s
oneTrans nfa c s = closure nfa (oneMove nfa c s)

-- | Closure.
--
--   NFA closure is a set of states, that is built from smaller /kernel/ set of
--   states. It is a set of states that is reachable with `EMove` transitions.
--
--   Mechanically, we contract all the states, `EMove`-connected to the /kernel/
--   into one set.
--
closure :: Ord s => NFA c s -> Set s -> Set s
closure NFA {nfaStart, nfaMoves, nfaStates, nfaEnds} =
  limit \set -> Set.union set $ Set.fromList
    [ s
    | x         <- Set.toList set       -- for each kernel state,
    , EMove y s <- Set.toList nfaMoves  -- select `EMove`-s,
    , y == x                            -- connected to that kernel state
    ]

-- | Continiously apply function to the argument until it stops changing it.
--
limit :: Eq a => (a -> a) -> a -> a
limit f a =
  let b = f a
  in
    if a == b
    then a
    else limit f b

-- | Get a set of states that we reach from closure if we accept given literal.
--
oneMove :: (Ord s, Eq c) => NFA c s -> c -> Set s -> Set s
oneMove NFA {nfaStart, nfaMoves, nfaStates, nfaEnds} c x =
  Set.fromList
    [ s
    | t          <- Set.toList x         -- each closure state
    , Move z d s <- Set.toList nfaMoves  -- find all moves
    , z == t                             -- originating from that state
    , d == c                             -- and accepting given symbol
    ]

-- | Convert regular expresion into `NFA`. We use `Int`-s as state names.
--
build :: Ord c => Reg c -> NFA c Int
build = \case
  Eps -> NFA
    { nfaStates = Set.fromList [0, 1]        -- begin, end
    , nfaMoves  = Set.singleton (EMove 0 1)
    , nfaStart  = 0
    , nfaEnds   = Set.singleton 1
    }

  Lit c -> NFA
    { nfaStates = Set.fromList [0, 1]
    , nfaStart  = 0
    , nfaMoves  = Set.singleton (Move 0 c 1)  -- one move on given literal
    , nfaEnds   = Set.singleton 1
    }

  And  reg reg' -> nfaAnd  (build reg) (build reg')  -- join graphs sequentally
  Or   reg reg' -> nfaOr   (build reg) (build reg')  -- join graphs alongside
  Star reg      -> nfaStar (build reg)               -- make a loop
  where
    -- We add 2 states: starting and finishing, and connect both subgraphs to
    -- them.
    --
    -- States are renamed such that 0 would not belong to any of them, and their
    -- nodes do not intersect. The number line looks like that:
    --
    --    0 [nfa1] [nfa2] (m1 + m2 + 1)
    --
    -- where m1, m2 are the sizes of nfa1 and nfa2.
    --
    nfaOr :: Ord c => NFA c Int -> NFA c Int -> NFA c Int
    nfaOr nfa1 nfa2 = NFA
      { nfaStart = 0
      , nfaStates = Set.unions [states1, states2, newStates]
      , nfaMoves  = Set.unions [moves1, moves2, newMoves]
      , nfaEnds   = Set.singleton (m1 + m2 + 1)
      }
      where
        m1 = Set.size (nfaStates nfa1)
        m2 = Set.size (nfaStates nfa2)

        states1 = Set.map (+       1)  (nfaStates nfa1)  -- shift right 1 point
        states2 = Set.map (+ (m1 + 1)) (nfaStates nfa2)  -- shr (1 + nfa1) pts

        newStates = Set.fromList [0, m1 + m2 + 1]

        moves1 = Set.map (fmap (+       1))  (nfaMoves nfa1)
        moves2 = Set.map (fmap (+ (m1 + 1))) (nfaMoves nfa2)

        newMoves = Set.fromList
          [
            EMove   0         1            -- start -> nfa1
          , EMove   0       (m1      + 1)  -- start -> nfa2
          , EMove  m1       (m1 + m2 + 1)  -- nfa1  -> finish
          , EMove (m1 + m2) (m1 + m2 + 1)  -- nfa2  -> finish
          ]

    -- No states are added, we connect graphs in sequence.
    --
    -- States are renamed such that the starting node of @nfa2@ is the finishing
    -- node of @nfa1@.
    --
    -- Number line:
    --
    -- [nfa1... joint]
    --         [joint ...nfa2]
    --
    -- We also add no new moves, because graphs become correctly connected
    -- naturally.
    --
    nfaAnd :: Ord c => NFA c Int -> NFA c Int -> NFA c Int
    nfaAnd nfa1 nfa2 = NFA
      { nfaStart  = 0
      , nfaStates = states1 `Set.union` states2
      , nfaMoves  = moves1  `Set.union` moves2
      , nfaEnds   = Set.singleton (m1 + m2 - 2)
      }
      where
        m1 = Set.size (nfaStates nfa1)
        m2 = Set.size (nfaStates nfa2)

        states1 = nfaStates nfa1
        states2 = Set.map (+ (m1 - 1)) (nfaStates nfa2)  -- shr (1 - nfa1) pts
                                                         -- ... so they can join

        moves1 = nfaMoves nfa1
        moves2 = Set.map (fmap (+ (m1 - 1))) (nfaMoves nfa2)

    -- We add startin anf final point, and wire moves such that graph becomes
    -- a loop around nfa1.
    --
    -- Number line
    --
    --    0 [nfa1] (m1 + 1)
    --
    nfaStar :: Ord c => NFA c Int -> NFA c Int
    nfaStar nfa1  = NFA
      { nfaStart  = 0
      , nfaStates = states1 `Set.union` newStates
      , nfaMoves  = moves1  `Set.union` newMoves
      , nfaEnds   = Set.singleton (m1 + 1)
      }
      where
        m1 = Set.size (nfaStates nfa1)
        newStates = Set.fromList [0, m1 + 1]
        states1 = Set.map       (+ 1)  (nfaStates nfa1)
        moves1  = Set.map (fmap (+ 1)) (nfaMoves  nfa1)

        newMoves = Set.fromList
          [ EMove   0       1   -- start -> nfa1
          , EMove   0 (m1 + 1)  -- start -> finish (remove to get Kleene plus)
          , EMove  m1       1   -- nfa1  -> more nfa1
          , EMove  m1 (m1 + 1)  -- nfa1  -> finish
          ]

-- | Remove (retract) all empty moves (EMoves).
--
--   Naturally, we use state sets instead of states, because all EMove-connected
--   states are indistinguishable, as nothing prevents you from using them
--   at any time.
--
--   So we pull all the states, connected to the kernels into one set, which
--   counts as state for DFA.
--
--   NFA (Set State) == DFA, basically.
--
--   We can view DFA states as EMove-connected groups of NFA states.
--
--   Is done by running `addStep` until no new info is gained.
--
determine
  :: Ord c
  => NFA c Int  -- ^ Automata
  -> Set c      -- ^ Alphabet
  -> NFA c (Set Int)
determine nfa@NFA {nfaStart, nfaMoves, nfaStates, nfaEnds} alph =
    limit (addStep nfa alph) start
  where
    start = NFA
      { nfaStart  = startState
      , nfaStates = Set.singleton startState
      , nfaMoves  = Set.empty
      , nfaEnds   = if
        | nfaEnds `Set.disjoint` startState -> Set.empty
        | otherwise                         -> Set.singleton startState
      }

    startState = closure nfa (Set.singleton nfaStart)

-- | Add all possible transitions of NFA into DFA.
--
addStep
  :: forall c
  .  Ord c
  => NFA c Int        -- ^ NFA
  -> Set c            -- ^ Alphabet
  -> NFA c (Set Int)  -- ^ DFA
  -> NFA c (Set Int)
addStep nfa alph dfa = foldr add dfa (Set.toList (nfaStates dfa))
  where
    add :: Set Int -> NFA c (Set Int) -> NFA c (Set Int)
    add s dfa = Set.foldr (addMove nfa s) dfa alph

-- | Add all NFA transitions on given literal (character) to the DFA.
--
addMove
  :: Ord c
  => NFA c Int        -- ^ NFA
  -> Set Int          -- ^ Current set of states == Current DFA state
  -> c                -- ^ Literal (character)
  -> NFA c (Set Int)  -- ^ DFA
  -> NFA c (Set Int)
addMove
  nfa@NFA { nfaEnds = term }
  x
  c
  dfa@NFA { nfaStart, nfaMoves, nfaEnds, nfaStates}
    =
  NFA
    { nfaStart
    , nfaMoves = moves'
    , nfaStates = states'
    , nfaEnds = ends'
    }
  where
    states' = nfaStates `Set.union` Set.singleton new
    moves'   = nfaMoves `Set.union` Set.singleton (Move x c new)

    ends' -- andd `new` to the end states, if intersects with NFA's end states
      | term `Set.disjoint` new = nfaEnds
      | otherwise               = nfaEnds `Set.union` Set.singleton new

    new = oneTrans nfa c x  -- calculate state on `Move ... c ...` transition

-- | For some arcane reasons, my automatas are polluted by additional DFA state.
--
--   This DFA state is an empty set of NFA states and has all possible
--   connections with all other graph nodes.
--
--   For time being, instead of wrapping `closure`/`oneTrans` with `Maybe`
--   to cut off creation of null-state, I decided to filter it out afterwards.
--
--   TODO: prevent that from happening instead.
--
--   We remove null-set from states, and purge all moves from and to it.
--
filterNFA :: Ord s => NFA c (Set s) -> NFA c (Set s)
filterNFA NFA { nfaStart, nfaEnds, nfaMoves, nfaStates } =
  NFA
    { nfaStart
    , nfaEnds
    , nfaMoves  = Set.filter (not . any Set.null) nfaMoves
    , nfaStates = Set.filter (not .     Set.null) nfaStates
    }

rename :: Ord c => NFA c (Set Int) -> NFA c Int
rename nfa = evalState (traverseNFA renumber nfa) (Map.empty, 0)
  where
    renumber :: Set Int -> State (Map (Set Int) Int, Int) Int
    renumber set = do
      (m, ptr) <- get
      case Map.lookup set m of
        Nothing -> do
          put (Map.insert set ptr m, ptr + 1)
          return ptr

        Just any -> do
          return any


traverseNFA :: (Applicative f, Ord t, Ord c) => (s -> f t) -> NFA c s -> f (NFA c t)
traverseNFA f NFA {nfaStart, nfaStates, nfaMoves, nfaEnds} = do
  nfaStart  <- f nfaStart
  nfaEnds   <- Set.fromList <$>  traverse           f (Set.toList nfaEnds)
  nfaMoves  <- Set.fromList <$> (traverse.traverse) f (Set.toList nfaMoves)
  nfaStates <- Set.fromList <$>  traverse           f (Set.toList nfaStates)
  return NFA {nfaStart, nfaStates, nfaMoves, nfaEnds}

-- | Run DFA on a string to check if string fits the rules.
--
match
  :: (Ord s, Ord c) => NFA c s  -- ^ DFA
  -> [c]              -- ^ Input
  -> Either (Set c, [c]) ()
match dfa = \case
  []  -- end of string should councide with automata bein in final state
    | inEndState dfa -> return ()
    | otherwise      -> throwError (allCharsExpectedHere dfa, [])

  (c : s) -> do
    dfa' <- move dfa c s
    match dfa' s

-- | Consume character, move to new state.
--
move
  :: (Eq s, Ord c)
  => NFA c s  -- ^ DFA
  -> c        -- ^ Next character
  -> [c]      -- ^ Rest of the string (for error reports)
  -> Either (Set c, [c]) (NFA c s)  -- ^ Error or new automata
move dfa c s = do
  case
      [ dfa {nfaStart = d}  -- I reuse starting state as current
      | Move s c' d <- Set.toList (nfaMoves dfa)  -- all moves
      , s == nfaStart dfa                         -- from current one
      , c == c'                                   -- on that char
      ]
   of
    []      -> throwError (allCharsExpectedHere dfa, c : s)  -- no moves, die
    nfa : _ -> return nfa  -- no more than one move should exist

-- | Find all characters that are exprected in current state.
--
allCharsExpectedHere :: (Eq s, Ord c) => NFA c s -> Set c
allCharsExpectedHere dfa = Set.fromList
  [ c
  | Move s c _ <- Set.toList (nfaMoves dfa)
  , s == nfaStart dfa
  ]

-- | Check if DFA is in its final state.
--
inEndState :: Ord s => NFA c s -> Bool
inEndState dfa = Set.member (nfaStart dfa) (nfaEnds dfa)

-- | Convert regiular expression to DFA.
--
dfa :: Reg Char -> NFA Char (Set Int)
dfa reg = filterNFA $ determine (build reg) (literals reg)

--------------------------------------------------------------------------------

auto :: NFA Char (Set Int)
auto = dfa $ Star (fromString "hello " `Or` fromString "hi ")

nfa :: NFA Char Int
nfa = build $ Star (fromString "hello " `Or` fromString "hi ")

testOk :: Either (Set Char, String) ()
testOk = match auto "hi hi hello hello hi "

testErr :: Either (Set Char, String) ()
testErr = match auto "hi hi hell hi hello hi"
	{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
	{-# HLINT ignore "Redundant multi-way if" #-}

	{-# LANGUAGE ApplicativeDo #-}
	{-# LANGUAGE BlockArguments #-}
	{-# LANGUAGE DerivingStrategies #-}
	{-# LANGUAGE FlexibleInstances #-}
	{-# LANGUAGE ImportQualifiedPost #-}
	{-# LANGUAGE InstanceSigs #-}
	{-# LANGUAGE LambdaCase #-}
	{-# LANGUAGE MultiWayIf #-}
	{-# LANGUAGE NamedFieldPuns #-}
	{-# LANGUAGE ScopedTypeVariables #-}
	{-# LANGUAGE DeriveTraversable #-}

	{- \| Implementation of Thompson scanner (regexp).

	Mostly taken verbatim from
	https://www.cs.kent.ac.uk/people/staff/sjt/craft2e/regExp.pdf.

	Higly unefficient in generation of DFA, slightly inefficient in matching
	(we're calling `<=` on `Set Int` here multiple times per character).

	-}

	module Lexer where

	import Control.Monad.Except (throwError)
	import Control.Monad.State ( runState, evalState, get, put, State )
	import Data.Map (Map)
	import Data.Map qualified as Map
	import Data.Maybe (listToMaybe)
	import Data.Set (Set)
	import Data.Set qualified as Set
	import Data.String (IsString)
	import GHC.Exts (IsString(fromString))

	import Debug.Trace (traceShowM)

	-- \| Regular expression.
	--
	data Reg a
	= Eps -- ^ Empty string
	\| Lit a -- ^ One char
	\| And (Reg a) (Reg a) -- ^ Sequence
	\| Or (Reg a) (Reg a) -- ^ Selection
	\| Star (Reg a) -- ^ Kleene star (0 or more repeats)
	deriving stock (Eq, Ord, Functor, Foldable)

	-- \| Alphabet - set of all used characters ("literals").
	--
	literals :: Ord a => Reg a -> Set a
	literals = foldMap Set.singleton

	-- \| ToString().
	--
	instance Show (Reg Char) where
	show :: Reg Char -> String
	show = \case
	Eps -> ""
	Lit c -> [c]
	And reg reg' -> show reg <> show reg'
	Or reg reg' -> "(" <> show reg <> "\|" <> show reg' <> ")"
	Star reg -> "(" <> show reg <> ")*"

	-- \| FromString().
	--
	instance IsString (Reg Char) where
	fromString :: String -> Reg Char
	fromString = foldr (And . Lit) Eps

	-- \| Indetermistic finite automaton.
	--
	data NFA c s = NFA
	{ nfaStates :: Set s -- ^ All automaton states
	, nfaMoves :: Set (Move c s) -- ^ All automaton moves
	, nfaStart :: s -- ^ Starting (current) state
	, nfaEnds :: Set s -- ^ All ending states
	}
	deriving stock (Eq, Ord, Show, Foldable)

	-- \| Automaton moves.
	--
	data Move c s
	= Move s c s -- ^ Move on given literal (character)
	\| EMove s s -- ^ Non-labeled transition (thus nondeterministic)
	deriving stock (Eq, Ord, Show, Functor, Foldable, Traversable)

	-- \| Closure of the first state for given alphabet.
	--
	trans :: (Ord s, Eq c) => NFA c s -> Set c -> Set s
	trans nfa = foldr (oneTrans nfa) startset
	where
	startset = closure nfa (Set.singleton (nfaStart nfa))

	-- \| Closure on the state that happens if we accept this char in this state set.
	--
	oneTrans :: (Ord s, Eq c) => NFA c s -> c -> Set s -> Set s
	oneTrans nfa c s = closure nfa (oneMove nfa c s)

	-- \| Closure.
	--
	-- NFA closure is a set of states, that is built from smaller /kernel/ set of
	-- states. It is a set of states that is reachable with `EMove` transitions.
	--
	-- Mechanically, we contract all the states, `EMove`-connected to the /kernel/
	-- into one set.
	--
	closure :: Ord s => NFA c s -> Set s -> Set s
	closure NFA {nfaStart, nfaMoves, nfaStates, nfaEnds} =
	limit \set -> Set.union set $ Set.fromList
	[ s
	\| x <- Set.toList set -- for each kernel state,
	, EMove y s <- Set.toList nfaMoves -- select `EMove`-s,
	, y == x -- connected to that kernel state
	]

	-- \| Continiously apply function to the argument until it stops changing it.
	--
	limit :: Eq a => (a -> a) -> a -> a
	limit f a =
	let b = f a
	in
	if a == b
	then a
	else limit f b

	-- \| Get a set of states that we reach from closure if we accept given literal.
	--
	oneMove :: (Ord s, Eq c) => NFA c s -> c -> Set s -> Set s
	oneMove NFA {nfaStart, nfaMoves, nfaStates, nfaEnds} c x =
	Set.fromList
	[ s
	\| t <- Set.toList x -- each closure state
	, Move z d s <- Set.toList nfaMoves -- find all moves
	, z == t -- originating from that state
	, d == c -- and accepting given symbol
	]

	-- \| Convert regular expresion into `NFA`. We use `Int`-s as state names.
	--
	build :: Ord c => Reg c -> NFA c Int
	build = \case
	Eps -> NFA
	{ nfaStates = Set.fromList [0, 1] -- begin, end
	, nfaMoves = Set.singleton (EMove 0 1)
	, nfaStart = 0
	, nfaEnds = Set.singleton 1
	}

	Lit c -> NFA
	{ nfaStates = Set.fromList [0, 1]
	, nfaStart = 0
	, nfaMoves = Set.singleton (Move 0 c 1) -- one move on given literal
	, nfaEnds = Set.singleton 1
	}

	And reg reg' -> nfaAnd (build reg) (build reg') -- join graphs sequentally
	Or reg reg' -> nfaOr (build reg) (build reg') -- join graphs alongside
	Star reg -> nfaStar (build reg) -- make a loop
	where
	-- We add 2 states: starting and finishing, and connect both subgraphs to
	-- them.
	--
	-- States are renamed such that 0 would not belong to any of them, and their
	-- nodes do not intersect. The number line looks like that:
	--
	-- 0 [nfa1] [nfa2] (m1 + m2 + 1)
	--
	-- where m1, m2 are the sizes of nfa1 and nfa2.
	--
	nfaOr :: Ord c => NFA c Int -> NFA c Int -> NFA c Int
	nfaOr nfa1 nfa2 = NFA
	{ nfaStart = 0
	, nfaStates = Set.unions [states1, states2, newStates]
	, nfaMoves = Set.unions [moves1, moves2, newMoves]
	, nfaEnds = Set.singleton (m1 + m2 + 1)
	}
	where
	m1 = Set.size (nfaStates nfa1)
	m2 = Set.size (nfaStates nfa2)

	states1 = Set.map (+ 1) (nfaStates nfa1) -- shift right 1 point
	states2 = Set.map (+ (m1 + 1)) (nfaStates nfa2) -- shr (1 + nfa1) pts

	newStates = Set.fromList [0, m1 + m2 + 1]

	moves1 = Set.map (fmap (+ 1)) (nfaMoves nfa1)
	moves2 = Set.map (fmap (+ (m1 + 1))) (nfaMoves nfa2)

	newMoves = Set.fromList
	[
	EMove 0 1 -- start -> nfa1
	, EMove 0 (m1 + 1) -- start -> nfa2
	, EMove m1 (m1 + m2 + 1) -- nfa1 -> finish
	, EMove (m1 + m2) (m1 + m2 + 1) -- nfa2 -> finish
	]

	-- No states are added, we connect graphs in sequence.
	--
	-- States are renamed such that the starting node of @nfa2@ is the finishing
	-- node of @nfa1@.
	--
	-- Number line:
	--
	-- [nfa1... joint]
	-- [joint ...nfa2]
	--
	-- We also add no new moves, because graphs become correctly connected
	-- naturally.
	--
	nfaAnd :: Ord c => NFA c Int -> NFA c Int -> NFA c Int
	nfaAnd nfa1 nfa2 = NFA
	{ nfaStart = 0
	, nfaStates = states1 `Set.union` states2
	, nfaMoves = moves1 `Set.union` moves2
	, nfaEnds = Set.singleton (m1 + m2 - 2)
	}
	where
	m1 = Set.size (nfaStates nfa1)
	m2 = Set.size (nfaStates nfa2)

	states1 = nfaStates nfa1
	states2 = Set.map (+ (m1 - 1)) (nfaStates nfa2) -- shr (1 - nfa1) pts
	-- ... so they can join

	moves1 = nfaMoves nfa1
	moves2 = Set.map (fmap (+ (m1 - 1))) (nfaMoves nfa2)

	-- We add startin anf final point, and wire moves such that graph becomes
	-- a loop around nfa1.
	--
	-- Number line
	--
	-- 0 [nfa1] (m1 + 1)
	--
	nfaStar :: Ord c => NFA c Int -> NFA c Int
	nfaStar nfa1 = NFA
	{ nfaStart = 0
	, nfaStates = states1 `Set.union` newStates
	, nfaMoves = moves1 `Set.union` newMoves
	, nfaEnds = Set.singleton (m1 + 1)
	}
	where
	m1 = Set.size (nfaStates nfa1)
	newStates = Set.fromList [0, m1 + 1]
	states1 = Set.map (+ 1) (nfaStates nfa1)
	moves1 = Set.map (fmap (+ 1)) (nfaMoves nfa1)

	newMoves = Set.fromList
	[ EMove 0 1 -- start -> nfa1
	, EMove 0 (m1 + 1) -- start -> finish (remove to get Kleene plus)
	, EMove m1 1 -- nfa1 -> more nfa1
	, EMove m1 (m1 + 1) -- nfa1 -> finish
	]

	-- \| Remove (retract) all empty moves (EMoves).
	--
	-- Naturally, we use state sets instead of states, because all EMove-connected
	-- states are indistinguishable, as nothing prevents you from using them
	-- at any time.
	--
	-- So we pull all the states, connected to the kernels into one set, which
	-- counts as state for DFA.
	--
	-- NFA (Set State) == DFA, basically.
	--
	-- We can view DFA states as EMove-connected groups of NFA states.
	--
	-- Is done by running `addStep` until no new info is gained.
	--
	determine
	:: Ord c
	=> NFA c Int -- ^ Automata
	-> Set c -- ^ Alphabet
	-> NFA c (Set Int)
	determine nfa@NFA {nfaStart, nfaMoves, nfaStates, nfaEnds} alph =
	limit (addStep nfa alph) start
	where
	start = NFA
	{ nfaStart = startState
	, nfaStates = Set.singleton startState
	, nfaMoves = Set.empty
	, nfaEnds = if
	\| nfaEnds `Set.disjoint` startState -> Set.empty
	\| otherwise -> Set.singleton startState
	}

	startState = closure nfa (Set.singleton nfaStart)

	-- \| Add all possible transitions of NFA into DFA.
	--
	addStep
	:: forall c
	. Ord c
	=> NFA c Int -- ^ NFA
	-> Set c -- ^ Alphabet
	-> NFA c (Set Int) -- ^ DFA
	-> NFA c (Set Int)
	addStep nfa alph dfa = foldr add dfa (Set.toList (nfaStates dfa))
	where
	add :: Set Int -> NFA c (Set Int) -> NFA c (Set Int)
	add s dfa = Set.foldr (addMove nfa s) dfa alph

	-- \| Add all NFA transitions on given literal (character) to the DFA.
	--
	addMove
	:: Ord c
	=> NFA c Int -- ^ NFA
	-> Set Int -- ^ Current set of states == Current DFA state
	-> c -- ^ Literal (character)
	-> NFA c (Set Int) -- ^ DFA
	-> NFA c (Set Int)
	addMove
	nfa@NFA { nfaEnds = term }
	x
	c
	dfa@NFA { nfaStart, nfaMoves, nfaEnds, nfaStates}
	=
	NFA
	{ nfaStart
	, nfaMoves = moves'
	, nfaStates = states'
	, nfaEnds = ends'
	}
	where
	states' = nfaStates `Set.union` Set.singleton new
	moves' = nfaMoves `Set.union` Set.singleton (Move x c new)

	ends' -- andd `new` to the end states, if intersects with NFA's end states
	\| term `Set.disjoint` new = nfaEnds
	\| otherwise = nfaEnds `Set.union` Set.singleton new

	new = oneTrans nfa c x -- calculate state on `Move ... c ...` transition

	-- \| For some arcane reasons, my automatas are polluted by additional DFA state.
	--
	-- This DFA state is an empty set of NFA states and has all possible
	-- connections with all other graph nodes.
	--
	-- For time being, instead of wrapping `closure`/`oneTrans` with `Maybe`
	-- to cut off creation of null-state, I decided to filter it out afterwards.
	--
	-- TODO: prevent that from happening instead.
	--
	-- We remove null-set from states, and purge all moves from and to it.
	--
	filterNFA :: Ord s => NFA c (Set s) -> NFA c (Set s)
	filterNFA NFA { nfaStart, nfaEnds, nfaMoves, nfaStates } =
	NFA
	{ nfaStart
	, nfaEnds
	, nfaMoves = Set.filter (not . any Set.null) nfaMoves
	, nfaStates = Set.filter (not . Set.null) nfaStates
	}

	rename :: Ord c => NFA c (Set Int) -> NFA c Int
	rename nfa = evalState (traverseNFA renumber nfa) (Map.empty, 0)
	where
	renumber :: Set Int -> State (Map (Set Int) Int, Int) Int
	renumber set = do
	(m, ptr) <- get
	case Map.lookup set m of
	Nothing -> do
	put (Map.insert set ptr m, ptr + 1)
	return ptr

	Just any -> do
	return any


	traverseNFA :: (Applicative f, Ord t, Ord c) => (s -> f t) -> NFA c s -> f (NFA c t)
	traverseNFA f NFA {nfaStart, nfaStates, nfaMoves, nfaEnds} = do
	nfaStart <- f nfaStart
	nfaEnds <- Set.fromList <$> traverse f (Set.toList nfaEnds)
	nfaMoves <- Set.fromList <$> (traverse.traverse) f (Set.toList nfaMoves)
	nfaStates <- Set.fromList <$> traverse f (Set.toList nfaStates)
	return NFA {nfaStart, nfaStates, nfaMoves, nfaEnds}

	-- \| Run DFA on a string to check if string fits the rules.
	--
	match
	:: (Ord s, Ord c) => NFA c s -- ^ DFA
	-> [c] -- ^ Input
	-> Either (Set c, [c]) ()
	match dfa = \case
	[] -- end of string should councide with automata bein in final state
	\| inEndState dfa -> return ()
	\| otherwise -> throwError (allCharsExpectedHere dfa, [])

	(c : s) -> do
	dfa' <- move dfa c s
	match dfa' s

	-- \| Consume character, move to new state.
	--
	move
	:: (Eq s, Ord c)
	=> NFA c s -- ^ DFA
	-> c -- ^ Next character
	-> [c] -- ^ Rest of the string (for error reports)
	-> Either (Set c, [c]) (NFA c s) -- ^ Error or new automata
	move dfa c s = do
	case
	[ dfa {nfaStart = d} -- I reuse starting state as current
	\| Move s c' d <- Set.toList (nfaMoves dfa) -- all moves
	, s == nfaStart dfa -- from current one
	, c == c' -- on that char
	]
	of
	[] -> throwError (allCharsExpectedHere dfa, c : s) -- no moves, die
	nfa : _ -> return nfa -- no more than one move should exist

	-- \| Find all characters that are exprected in current state.
	--
	allCharsExpectedHere :: (Eq s, Ord c) => NFA c s -> Set c
	allCharsExpectedHere dfa = Set.fromList
	[ c
	\| Move s c _ <- Set.toList (nfaMoves dfa)
	, s == nfaStart dfa
	]

	-- \| Check if DFA is in its final state.
	--
	inEndState :: Ord s => NFA c s -> Bool
	inEndState dfa = Set.member (nfaStart dfa) (nfaEnds dfa)

	-- \| Convert regiular expression to DFA.
	--
	dfa :: Reg Char -> NFA Char (Set Int)
	dfa reg = filterNFA $ determine (build reg) (literals reg)

	--------------------------------------------------------------------------------

	auto :: NFA Char (Set Int)
	auto = dfa $ Star (fromString "hello " `Or` fromString "hi ")

	nfa :: NFA Char Int
	nfa = build $ Star (fromString "hello " `Or` fromString "hi ")

	testOk :: Either (Set Char, String) ()
	testOk = match auto "hi hi hello hello hi "

	testErr :: Either (Set Char, String) ()
	testErr = match auto "hi hi hell hi hello hi"