Skip to content

Instantly share code, notes, and snippets.

@sadache
Created April 8, 2010 16:17
Show Gist options
  • Save sadache/360229 to your computer and use it in GitHub Desktop.
Save sadache/360229 to your computer and use it in GitHub Desktop.
-----------------------------------------------------------------------------
--
-- Module : RegexLang
-- Copyright :
-- License : AllRightsReserved
--
-- Maintainer :
-- Stability :
-- Portability :
--
-- |
--
-----------------------------------------------------------------------------
module Main (
) where
{- Enumerate the strings for a regular expression
Order by length, and lexicographically within length.
enum "a(c|b)|d" = ["d","ab","ac"]
enum "(b|ab*a)*" = ["","b","aa","bb","aab","aba","aab","baa","bbb"...
-}
import Data.Char (isAlpha)
type Lang = [String]
alt :: Lang → Lang → Lang -- alternation (merge languages)
cat :: Lang → Lang → Lang -- catenation (product of languages)
clo :: Lang → Lang -- Kleene closure
enum :: String → Lang
alt xs@(x:xt) ys@(y:yt) = case compare (length x, x) (length y, y) of
LT → x : alt xt ys
EQ → x : alt xt yt
GT → y : alt xs yt
alt xs ys = xs ⊕ ys
cat (x:xt) ys@(y:yt) = (x⊕y) : alt (cat [x] yt) (cat xt ys)
cat _ _ = []
clo [] = [""]
clo ("":xs) = clo xs
clo xs = "" : cat xs (clo xs)
enum s = parse [] s
{- Shift-reduce parser of regular expressions
parse k s: k is current stack; s is tail of reg exp.
Each stack symbol carries its language.
Stack initally empty, finally holds one alternation.
-}
data StkSym = P Lang -- Primary ::= letter | "()" | "(" A ")"
| C Lang -- Catenation ::= P | P "*" | C C
| A Lang -- Alternation ::= C | A "|" A
| L -- "("
type Stack = [StkSym] -- head of list is top of stack
parse :: Stack → String → Lang
parse (P(x):z) ('*':s) = parse (C(clo x):z) s
parse (P(x):z) s = parse (C(x):z) s
parse (C(y):C(x):z) s = parse (C(cat x y):z) s
parse (C(x):z) ('|':s) = parse (A(x):z) s
parse (C(x):z) s@(')':_) = parse (A(x):z) s
parse (C(x):z) s@"" = parse (A(x):z) s
parse (A(y):A(x):z) s = parse (A(alt x y):z) s
parse (A(x):L:z) (')':s) = parse (P(x):z) s
parse (L:z) (')':s) = parse (P[""]:z) s
parse z ('(':s) = parse (L:z) s
parse z (c:s) | isAlpha c = parse (P[[c]]:z) s
parse [A(x)] "" = x
parse _ s = error ("suffix where parse failed: \""++s++"\"")
main = putStrLn $ show $ take 100 $ enum "mari(a|o)*na"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment