pbgc/ShowParser.hs

## ShowParser.hs
{-|

the test example we are parsing (show [rec1,rec2]):

[
  MkPersonRecord {
    name = "Wim Vanderbauwhede",
    address = MkAddress {
        line1 = "School of Computing Science",
        number = 17,
        street = "Lilybank Gdns",
        town = "Glasgow",
        postcode = "G12 8QQ"
    },
    id = 557188,
    labels = [Green,Red]
  },
  MkPersonRecord {
    name = "Jeremy Singer",
    address = MkAddress {
        line1 = "School of Computing Science",
        number = 17,
        street = "Lilybank Gdns",
        town = "Glasgow",
        postcode = "G12 8QQ"
    },
    id = 42,
    labels = [Blue,Yellow]
  }
]

parsed output:

<?xml version="1.0" encoding="UTF-8"?>
<list>
    <list-elt>
        <record name="MkPersonRecord">
           <elt key="name">"Wim Vanderbauwhede"</elt>
           <elt key="address">
               <record name="MkAddress">
                   <elt key="line1">"School of Computing Science"</elt>
                   <elt key="number">17</elt>
                   <elt key="street">"Lilybank Gdns"</elt>
                   <elt key="town">"Glasgow"</elt>
                   <elt key="postcode">"G12 8QQ"</elt>
               </record>
           </elt>
           <elt key="id">557188</elt>
           <elt key="labels">
               <list>
                   <list-elt>
                       <adt>Green</adt>
                   </list-elt>
                   <list-elt>
                       <adt>Red</adt>
                   </list-elt>
               </list>
           </elt>
        </record>
    </list-elt>
    <list-elt>
        <record name="MkPersonRecord">
            <elt key="name">"Jeremy Singer"</elt>
            <elt key="address">
                <record name="MkAddress">
                    <elt key="line1">"School of Computing Science"</elt>
                    <elt key="number">17</elt>
                    <elt key="street">"Lilybank Gdns"</elt>
                    <elt key="town">"Glasgow"</elt>
                    <elt key="postcode">"G12 8QQ"</elt>
                </record>
            </elt>
            <elt key="id">42</elt>
            <elt key="labels">
                <list>
                    <list-elt>
                        <adt>Blue</adt>
                    </list-elt>
                    <list-elt>
                        <adt>Yellow</adt>
                    </list-elt>
                </list>
            </elt>
        </record>
    </list-elt>
</list>

-}

module ShowParser ( parseShow ) where
-- renamed original run_parser to runParser (hlint allways advice to use camelCase)
-- had do hide Text.ParserCombinators.Parsec.runParser because it was clashing and GHC refused to compile
-- could just use another name .. but .. learned about hiding :)
import Text.ParserCombinators.Parsec hiding (runParser)
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language
import Data.List ( intercalate )


xmlHeader =  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

parseShow :: String -> String
parseShow str = xmlHeader ++ runParser showParser str

-- used :type parse on GHCI and got:
-- parse :: Text.Parsec.Prim.Stream s Data.Functor.Identity.Identity t => Text.Parsec.Prim.Parsec s () a -> SourceName -> s -> Either ParseError a
-- The Either type represents values with two possibilities: a value of type Either a b is either Left a or Right b.
-- The Either type is sometimes used to represent a value which is either correct or an error;
-- by convention, the Left constructor is used to hold an error value
-- and the Right constructor is used to hold a correct value (mnemonic: "right" also means "correct").
--
-- We are applying parser p to the string str
-- If we get an error (Left err) we throw an exception message (with error)
-- otherwise we return the parsed value
runParser :: Parser a -> String -> a
runParser p str =  case parse p "" str of
    Left err -> error $ "parse error at " ++ show err
    Right val  -> val

-- ---------------------------------------------------------------
-- helper functions to create XML tags with and without attributes
-- ---------------------------------------------------------------

openTag tag = "<" ++ tag ++ ">"
closeTag tag = "</" ++ tag ++ ">"
-- XML tags: <tag>val</tag>
-- example: tag "list" 1 -> <list>1</list>
-- concat :: Foldable t => t [a] -> [a] (The concatenation of all the elements of a container of lists)
tag tag val = concat [openTag tag, val, closeTag tag]

-- unwords :: [String] -> String
-- unwords is an inverse operation to words. It joins words with separating spaces.
-- given a tag, a list of tuples of attributes and a value: example: tagAttrs "test" [("at1", 1), ("at2", 2)] "yupi"
-- we obtain: <test at1="1" at2="2">yupi</test>
-- we are concateneting:
--     openTag (unwords $ [tag] ++ (map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs))    ->   <test at1="1" at2="2">
--     val                                                                                 ->   yupi
--     closeTag tag                                                                        ->   </test>
tagAttrs tag attrs val = concat [
  openTag $ unwords $ tag : map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs,  -- changed .. think it's more readable like this
  val,
  closeTag tag
  ]

-- he intercalate function takes a ByteString and a list of ByteStrings and concatenates the list
-- after interspersing the first argument between each element of the list.
-- joinNL could be substituted with unlines from the prelude
-- this was just to illustrate the use of intercalate and the Data.List module
joinNL ls = intercalate "\n" ls


-- -------------------
-- Parsers Definitions
-- -------------------


-- ----------------------------------------------------
-- Complete parser
-- Combine all parsers using the choice combinator <|>
-- ----------------------------------------------------

-- Parsing alternatives
-- Often we want to try one parser; if that fails, then try another one instead.
-- The choice combinator <|> provides this functionality.
showParser :: Parser String
showParser =
    listParser <|> -- [ ... ]
    tupleParser <|> -- ( ... )
    try recordParser <|> -- MkRec { ... }
    adtParser <|> -- MkADT ...
    number <|>    -- signed integer
    quotedString <?> "Parse error"


-- -----------------------------------
-- Parsers for the derived Show format
-- -----------------------------------


-- "\"" means we are 'escaping' the " using \" so we output the string ". """ would be an error
-- return :: a -> m a Source
--    Inject a value into the monadic type.
quotedString = do
    s <- stringLiteral
    return $ "\"" ++ s ++ "\""


number = do
    n <- integer
    return $ show n


listParser = do
    -- parse brackets and comma separated values
    ls <- brackets $ commaSep showParser
    -- a list [1, 2, 3] will be outputed as:
    --  <list>
    --      <list-elt>1</list-elt>
    --      <list-elt>2</list-elt>
    --      <list-elt>3</list-elt>
    --  </list>
    return $ tag "list" $ joinNL $ map (tag "list-elt") ls


-- tupleParser is not used in the test example
tupleParser = do
    -- parse parens and comma separated values
    ls <- parens $ commaSep showParser
    -- a tuple (1, 2) will be outputed as:
    --  <tuple>
    --      <tuple-elt>1</tuple-elt>
    --      <tuple-elt>2</tuple-elt>
    --  </tuple>
    return $ tag "tuple" $ unwords $ map (tag "tuple-elt") ls


{-|
  MkPersonRecord {
    name = "Jeremy Singer",
    address = MkAddress {
        line1 = "School of Computing Science",
        number = 17,
        street = "Lilybank Gdns",
        town = "Glasgow",
        postcode = "G12 8QQ"
    },
    id = 42,
    labels = [Blue,Yellow]
  }

turns into:

<record name="MkPersonRecord">
    <elt key="name">"Jeremy Singer"</elt>
    <elt key="address">
        <record name="MkAddress">
           <elt key="line1">"School of Computing Science"</elt>
           <elt key="number">17</elt>
           <elt key="street">"Lilybank Gdns"</elt>
           <elt key="town">"Glasgow"</elt>
           <elt key="postcode">"G12 8QQ"</elt>
        </record>
    </elt>
    <elt key="id">42</elt>
    <elt key="labels">
        <list>
           <list-elt><adt>Blue</adt></list-elt>
           <list-elt><adt>Yellow</adt></list-elt>
        </list>
    </elt>
</record>

using:
  - recordParser for: MkPersonRecord { .... } ; inside using kvParser, stringLiteral and number
  - kvParser for: address =
  - recordParser for: MkAddress { .... }
  - kvParser for: labels =
  - adtParser for: Blue and Yellow
-}
recordParser = do
    -- a Record has a Type Identifier, ex: MkPersonRecord
    ti <- typeIdentifier
    -- braces and comma separated values parsed by kvParser
    ls <- braces $ commaSep kvParser
    return $ tagAttrs "record" [("name", ti)] (joinNL ls)


{-|
labels = [Blue,Yellow]

turns into

<elt key="labels">
    <list>
        <list-elt><adt>Blue</adt></list-elt>
        <list-elt><adt>Yellow</adt></list-elt>
    </list>
</elt>

using:
  - kvParser for: labels =
  - listParser for: [ .., ..]
  - adtParser for: Blue and Yellow
-}
adtParser = do
    ti <- typeIdentifier
    return $ tag "adt" ti


{-|
name = "Jeremy Singer",

turns into:

<elt key="name">"Jeremy Singer"</elt>
-}
kvParser = do
    k <- identifier
    symbol "="
    t <- showParser
    return $ tagAttrs "elt" [("key", k)] t


typeIdentifier = do
    -- a Type identifier begins with a Capital Letter
    fst <- oneOf ['A' .. 'Z']
    -- read the rest ... (many alphanumerics)
    rest <- many alphaNum
    whiteSpace
    return $ fst:rest


-- The Parsec.Token module provides a number of basic parsers.
-- Each of these takes as argument a lexer, generated by makeTokenParser using a language definition.
-- Here we use emptyDef from the Language module.
lexer           = P.makeTokenParser emptyDef

-- shorter name for the predefined parsers

parens          = P.parens lexer
brackets        = P.brackets lexer
braces          = P.braces lexer
commaSep        = P.commaSep lexer
whiteSpace      = P.whiteSpace lexer
symbol          = P.symbol lexer
identifier      = P.identifier lexer
integer         = P.integer lexer
stringLiteral   = P.stringLiteral lexer
	{-\|

	the test example we are parsing (show [rec1,rec2]):

	[
	MkPersonRecord {
	name = "Wim Vanderbauwhede",
	address = MkAddress {
	line1 = "School of Computing Science",
	number = 17,
	street = "Lilybank Gdns",
	town = "Glasgow",
	postcode = "G12 8QQ"
	},
	id = 557188,
	labels = [Green,Red]
	},
	MkPersonRecord {
	name = "Jeremy Singer",
	address = MkAddress {
	line1 = "School of Computing Science",
	number = 17,
	street = "Lilybank Gdns",
	town = "Glasgow",
	postcode = "G12 8QQ"
	},
	id = 42,
	labels = [Blue,Yellow]
	}
	]

	parsed output:

	<?xml version="1.0" encoding="UTF-8"?>
	<list>
	<list-elt>
	<record name="MkPersonRecord">
	<elt key="name">"Wim Vanderbauwhede"</elt>
	<elt key="address">
	<record name="MkAddress">
	<elt key="line1">"School of Computing Science"</elt>
	<elt key="number">17</elt>
	<elt key="street">"Lilybank Gdns"</elt>
	<elt key="town">"Glasgow"</elt>
	<elt key="postcode">"G12 8QQ"</elt>
	</record>
	</elt>
	<elt key="id">557188</elt>
	<elt key="labels">
	<list>
	<list-elt>
	<adt>Green</adt>
	</list-elt>
	<list-elt>
	<adt>Red</adt>
	</list-elt>
	</list>
	</elt>
	</record>
	</list-elt>
	<list-elt>
	<record name="MkPersonRecord">
	<elt key="name">"Jeremy Singer"</elt>
	<elt key="address">
	<record name="MkAddress">
	<elt key="line1">"School of Computing Science"</elt>
	<elt key="number">17</elt>
	<elt key="street">"Lilybank Gdns"</elt>
	<elt key="town">"Glasgow"</elt>
	<elt key="postcode">"G12 8QQ"</elt>
	</record>
	</elt>
	<elt key="id">42</elt>
	<elt key="labels">
	<list>
	<list-elt>
	<adt>Blue</adt>
	</list-elt>
	<list-elt>
	<adt>Yellow</adt>
	</list-elt>
	</list>
	</elt>
	</record>
	</list-elt>
	</list>

	-}

	module ShowParser ( parseShow ) where
	-- renamed original run_parser to runParser (hlint allways advice to use camelCase)
	-- had do hide Text.ParserCombinators.Parsec.runParser because it was clashing and GHC refused to compile
	-- could just use another name .. but .. learned about hiding :)
	import Text.ParserCombinators.Parsec hiding (runParser)
	import qualified Text.ParserCombinators.Parsec.Token as P
	import Text.ParserCombinators.Parsec.Language
	import Data.List ( intercalate )


	xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

	parseShow :: String -> String
	parseShow str = xmlHeader ++ runParser showParser str

	-- used :type parse on GHCI and got:
	-- parse :: Text.Parsec.Prim.Stream s Data.Functor.Identity.Identity t => Text.Parsec.Prim.Parsec s () a -> SourceName -> s -> Either ParseError a
	-- The Either type represents values with two possibilities: a value of type Either a b is either Left a or Right b.
	-- The Either type is sometimes used to represent a value which is either correct or an error;
	-- by convention, the Left constructor is used to hold an error value
	-- and the Right constructor is used to hold a correct value (mnemonic: "right" also means "correct").
	--
	-- We are applying parser p to the string str
	-- If we get an error (Left err) we throw an exception message (with error)
	-- otherwise we return the parsed value
	runParser :: Parser a -> String -> a
	runParser p str = case parse p "" str of
	Left err -> error $ "parse error at " ++ show err
	Right val -> val

	-- ---------------------------------------------------------------
	-- helper functions to create XML tags with and without attributes
	-- ---------------------------------------------------------------

	openTag tag = "<" ++ tag ++ ">"
	closeTag tag = "</" ++ tag ++ ">"
	-- XML tags: <tag>val</tag>
	-- example: tag "list" 1 -> <list>1</list>
	-- concat :: Foldable t => t [a] -> [a] (The concatenation of all the elements of a container of lists)
	tag tag val = concat [openTag tag, val, closeTag tag]

	-- unwords :: [String] -> String
	-- unwords is an inverse operation to words. It joins words with separating spaces.
	-- given a tag, a list of tuples of attributes and a value: example: tagAttrs "test" [("at1", 1), ("at2", 2)] "yupi"
	-- we obtain: <test at1="1" at2="2">yupi</test>
	-- we are concateneting:
	-- openTag (unwords $ [tag] ++ (map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs)) -> <test at1="1" at2="2">
	-- val -> yupi
	-- closeTag tag -> </test>
	tagAttrs tag attrs val = concat [
	openTag $ unwords $ tag : map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs, -- changed .. think it's more readable like this
	val,
	closeTag tag
	]

	-- he intercalate function takes a ByteString and a list of ByteStrings and concatenates the list
	-- after interspersing the first argument between each element of the list.
	-- joinNL could be substituted with unlines from the prelude
	-- this was just to illustrate the use of intercalate and the Data.List module
	joinNL ls = intercalate "\n" ls



	-- -------------------
	-- Parsers Definitions
	-- -------------------


	-- ----------------------------------------------------
	-- Complete parser
	-- Combine all parsers using the choice combinator <\|>
	-- ----------------------------------------------------

	-- Parsing alternatives
	-- Often we want to try one parser; if that fails, then try another one instead.
	-- The choice combinator <\|> provides this functionality.
	showParser :: Parser String
	showParser =
	listParser <\|> -- [ ... ]
	tupleParser <\|> -- ( ... )
	try recordParser <\|> -- MkRec { ... }
	adtParser <\|> -- MkADT ...
	number <\|> -- signed integer
	quotedString <?> "Parse error"


	-- -----------------------------------
	-- Parsers for the derived Show format
	-- -----------------------------------


	-- "\"" means we are 'escaping' the " using \" so we output the string ". """ would be an error
	-- return :: a -> m a Source
	-- Inject a value into the monadic type.
	quotedString = do
	s <- stringLiteral
	return $ "\"" ++ s ++ "\""


	number = do
	n <- integer
	return $ show n


	listParser = do
	-- parse brackets and comma separated values
	ls <- brackets $ commaSep showParser
	-- a list [1, 2, 3] will be outputed as:
	-- <list>
	-- <list-elt>1</list-elt>
	-- <list-elt>2</list-elt>
	-- <list-elt>3</list-elt>
	-- </list>
	return $ tag "list" $ joinNL $ map (tag "list-elt") ls


	-- tupleParser is not used in the test example
	tupleParser = do
	-- parse parens and comma separated values
	ls <- parens $ commaSep showParser
	-- a tuple (1, 2) will be outputed as:
	-- <tuple>
	-- <tuple-elt>1</tuple-elt>
	-- <tuple-elt>2</tuple-elt>
	-- </tuple>
	return $ tag "tuple" $ unwords $ map (tag "tuple-elt") ls


	{-\|
	MkPersonRecord {
	name = "Jeremy Singer",
	address = MkAddress {
	line1 = "School of Computing Science",
	number = 17,
	street = "Lilybank Gdns",
	town = "Glasgow",
	postcode = "G12 8QQ"
	},
	id = 42,
	labels = [Blue,Yellow]
	}

	turns into:

	<record name="MkPersonRecord">
	<elt key="name">"Jeremy Singer"</elt>
	<elt key="address">
	<record name="MkAddress">
	<elt key="line1">"School of Computing Science"</elt>
	<elt key="number">17</elt>
	<elt key="street">"Lilybank Gdns"</elt>
	<elt key="town">"Glasgow"</elt>
	<elt key="postcode">"G12 8QQ"</elt>
	</record>
	</elt>
	<elt key="id">42</elt>
	<elt key="labels">
	<list>
	<list-elt><adt>Blue</adt></list-elt>
	<list-elt><adt>Yellow</adt></list-elt>
	</list>
	</elt>
	</record>

	using:
	- recordParser for: MkPersonRecord { .... } ; inside using kvParser, stringLiteral and number
	- kvParser for: address =
	- recordParser for: MkAddress { .... }
	- kvParser for: labels =
	- adtParser for: Blue and Yellow
	-}
	recordParser = do
	-- a Record has a Type Identifier, ex: MkPersonRecord
	ti <- typeIdentifier
	-- braces and comma separated values parsed by kvParser
	ls <- braces $ commaSep kvParser
	return $ tagAttrs "record" [("name", ti)] (joinNL ls)


	{-\|
	labels = [Blue,Yellow]

	turns into

	<elt key="labels">
	<list>
	<list-elt><adt>Blue</adt></list-elt>
	<list-elt><adt>Yellow</adt></list-elt>
	</list>
	</elt>

	using:
	- kvParser for: labels =
	- listParser for: [ .., ..]
	- adtParser for: Blue and Yellow
	-}
	adtParser = do
	ti <- typeIdentifier
	return $ tag "adt" ti


	{-\|
	name = "Jeremy Singer",

	turns into:

	<elt key="name">"Jeremy Singer"</elt>
	-}
	kvParser = do
	k <- identifier
	symbol "="
	t <- showParser
	return $ tagAttrs "elt" [("key", k)] t


	typeIdentifier = do
	-- a Type identifier begins with a Capital Letter
	fst <- oneOf ['A' .. 'Z']
	-- read the rest ... (many alphanumerics)
	rest <- many alphaNum
	whiteSpace
	return $ fst:rest



	-- The Parsec.Token module provides a number of basic parsers.
	-- Each of these takes as argument a lexer, generated by makeTokenParser using a language definition.
	-- Here we use emptyDef from the Language module.
	lexer = P.makeTokenParser emptyDef

	-- shorter name for the predefined parsers

	parens = P.parens lexer
	brackets = P.brackets lexer
	braces = P.braces lexer
	commaSep = P.commaSep lexer
	whiteSpace = P.whiteSpace lexer
	symbol = P.symbol lexer
	identifier = P.identifier lexer
	integer = P.integer lexer
	stringLiteral = P.stringLiteral lexer