Skip to content

Instantly share code, notes, and snippets.

@pbgc
Created April 11, 2020 21:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pbgc/83b1902dfecd2a0b993216e22247973e to your computer and use it in GitHub Desktop.
Save pbgc/83b1902dfecd2a0b993216e22247973e to your computer and use it in GitHub Desktop.
{-|
the test example we are parsing (show [rec1,rec2]):
[
MkPersonRecord {
name = "Wim Vanderbauwhede",
address = MkAddress {
line1 = "School of Computing Science",
number = 17,
street = "Lilybank Gdns",
town = "Glasgow",
postcode = "G12 8QQ"
},
id = 557188,
labels = [Green,Red]
},
MkPersonRecord {
name = "Jeremy Singer",
address = MkAddress {
line1 = "School of Computing Science",
number = 17,
street = "Lilybank Gdns",
town = "Glasgow",
postcode = "G12 8QQ"
},
id = 42,
labels = [Blue,Yellow]
}
]
parsed output:
<?xml version="1.0" encoding="UTF-8"?>
<list>
<list-elt>
<record name="MkPersonRecord">
<elt key="name">"Wim Vanderbauwhede"</elt>
<elt key="address">
<record name="MkAddress">
<elt key="line1">"School of Computing Science"</elt>
<elt key="number">17</elt>
<elt key="street">"Lilybank Gdns"</elt>
<elt key="town">"Glasgow"</elt>
<elt key="postcode">"G12 8QQ"</elt>
</record>
</elt>
<elt key="id">557188</elt>
<elt key="labels">
<list>
<list-elt>
<adt>Green</adt>
</list-elt>
<list-elt>
<adt>Red</adt>
</list-elt>
</list>
</elt>
</record>
</list-elt>
<list-elt>
<record name="MkPersonRecord">
<elt key="name">"Jeremy Singer"</elt>
<elt key="address">
<record name="MkAddress">
<elt key="line1">"School of Computing Science"</elt>
<elt key="number">17</elt>
<elt key="street">"Lilybank Gdns"</elt>
<elt key="town">"Glasgow"</elt>
<elt key="postcode">"G12 8QQ"</elt>
</record>
</elt>
<elt key="id">42</elt>
<elt key="labels">
<list>
<list-elt>
<adt>Blue</adt>
</list-elt>
<list-elt>
<adt>Yellow</adt>
</list-elt>
</list>
</elt>
</record>
</list-elt>
</list>
-}
module ShowParser ( parseShow ) where
-- renamed original run_parser to runParser (hlint allways advice to use camelCase)
-- had do hide Text.ParserCombinators.Parsec.runParser because it was clashing and GHC refused to compile
-- could just use another name .. but .. learned about hiding :)
import Text.ParserCombinators.Parsec hiding (runParser)
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language
import Data.List ( intercalate )
xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
parseShow :: String -> String
parseShow str = xmlHeader ++ runParser showParser str
-- used :type parse on GHCI and got:
-- parse :: Text.Parsec.Prim.Stream s Data.Functor.Identity.Identity t => Text.Parsec.Prim.Parsec s () a -> SourceName -> s -> Either ParseError a
-- The Either type represents values with two possibilities: a value of type Either a b is either Left a or Right b.
-- The Either type is sometimes used to represent a value which is either correct or an error;
-- by convention, the Left constructor is used to hold an error value
-- and the Right constructor is used to hold a correct value (mnemonic: "right" also means "correct").
--
-- We are applying parser p to the string str
-- If we get an error (Left err) we throw an exception message (with error)
-- otherwise we return the parsed value
runParser :: Parser a -> String -> a
runParser p str = case parse p "" str of
Left err -> error $ "parse error at " ++ show err
Right val -> val
-- ---------------------------------------------------------------
-- helper functions to create XML tags with and without attributes
-- ---------------------------------------------------------------
openTag tag = "<" ++ tag ++ ">"
closeTag tag = "</" ++ tag ++ ">"
-- XML tags: <tag>val</tag>
-- example: tag "list" 1 -> <list>1</list>
-- concat :: Foldable t => t [a] -> [a] (The concatenation of all the elements of a container of lists)
tag tag val = concat [openTag tag, val, closeTag tag]
-- unwords :: [String] -> String
-- unwords is an inverse operation to words. It joins words with separating spaces.
-- given a tag, a list of tuples of attributes and a value: example: tagAttrs "test" [("at1", 1), ("at2", 2)] "yupi"
-- we obtain: <test at1="1" at2="2">yupi</test>
-- we are concateneting:
-- openTag (unwords $ [tag] ++ (map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs)) -> <test at1="1" at2="2">
-- val -> yupi
-- closeTag tag -> </test>
tagAttrs tag attrs val = concat [
openTag $ unwords $ tag : map (\(k, v) -> concat [k, "=\"", v, "\""]) attrs, -- changed .. think it's more readable like this
val,
closeTag tag
]
-- he intercalate function takes a ByteString and a list of ByteStrings and concatenates the list
-- after interspersing the first argument between each element of the list.
-- joinNL could be substituted with unlines from the prelude
-- this was just to illustrate the use of intercalate and the Data.List module
joinNL ls = intercalate "\n" ls
-- -------------------
-- Parsers Definitions
-- -------------------
-- ----------------------------------------------------
-- Complete parser
-- Combine all parsers using the choice combinator <|>
-- ----------------------------------------------------
-- Parsing alternatives
-- Often we want to try one parser; if that fails, then try another one instead.
-- The choice combinator <|> provides this functionality.
showParser :: Parser String
showParser =
listParser <|> -- [ ... ]
tupleParser <|> -- ( ... )
try recordParser <|> -- MkRec { ... }
adtParser <|> -- MkADT ...
number <|> -- signed integer
quotedString <?> "Parse error"
-- -----------------------------------
-- Parsers for the derived Show format
-- -----------------------------------
-- "\"" means we are 'escaping' the " using \" so we output the string ". """ would be an error
-- return :: a -> m a Source
-- Inject a value into the monadic type.
quotedString = do
s <- stringLiteral
return $ "\"" ++ s ++ "\""
number = do
n <- integer
return $ show n
listParser = do
-- parse brackets and comma separated values
ls <- brackets $ commaSep showParser
-- a list [1, 2, 3] will be outputed as:
-- <list>
-- <list-elt>1</list-elt>
-- <list-elt>2</list-elt>
-- <list-elt>3</list-elt>
-- </list>
return $ tag "list" $ joinNL $ map (tag "list-elt") ls
-- tupleParser is not used in the test example
tupleParser = do
-- parse parens and comma separated values
ls <- parens $ commaSep showParser
-- a tuple (1, 2) will be outputed as:
-- <tuple>
-- <tuple-elt>1</tuple-elt>
-- <tuple-elt>2</tuple-elt>
-- </tuple>
return $ tag "tuple" $ unwords $ map (tag "tuple-elt") ls
{-|
MkPersonRecord {
name = "Jeremy Singer",
address = MkAddress {
line1 = "School of Computing Science",
number = 17,
street = "Lilybank Gdns",
town = "Glasgow",
postcode = "G12 8QQ"
},
id = 42,
labels = [Blue,Yellow]
}
turns into:
<record name="MkPersonRecord">
<elt key="name">"Jeremy Singer"</elt>
<elt key="address">
<record name="MkAddress">
<elt key="line1">"School of Computing Science"</elt>
<elt key="number">17</elt>
<elt key="street">"Lilybank Gdns"</elt>
<elt key="town">"Glasgow"</elt>
<elt key="postcode">"G12 8QQ"</elt>
</record>
</elt>
<elt key="id">42</elt>
<elt key="labels">
<list>
<list-elt><adt>Blue</adt></list-elt>
<list-elt><adt>Yellow</adt></list-elt>
</list>
</elt>
</record>
using:
- recordParser for: MkPersonRecord { .... } ; inside using kvParser, stringLiteral and number
- kvParser for: address =
- recordParser for: MkAddress { .... }
- kvParser for: labels =
- adtParser for: Blue and Yellow
-}
recordParser = do
-- a Record has a Type Identifier, ex: MkPersonRecord
ti <- typeIdentifier
-- braces and comma separated values parsed by kvParser
ls <- braces $ commaSep kvParser
return $ tagAttrs "record" [("name", ti)] (joinNL ls)
{-|
labels = [Blue,Yellow]
turns into
<elt key="labels">
<list>
<list-elt><adt>Blue</adt></list-elt>
<list-elt><adt>Yellow</adt></list-elt>
</list>
</elt>
using:
- kvParser for: labels =
- listParser for: [ .., ..]
- adtParser for: Blue and Yellow
-}
adtParser = do
ti <- typeIdentifier
return $ tag "adt" ti
{-|
name = "Jeremy Singer",
turns into:
<elt key="name">"Jeremy Singer"</elt>
-}
kvParser = do
k <- identifier
symbol "="
t <- showParser
return $ tagAttrs "elt" [("key", k)] t
typeIdentifier = do
-- a Type identifier begins with a Capital Letter
fst <- oneOf ['A' .. 'Z']
-- read the rest ... (many alphanumerics)
rest <- many alphaNum
whiteSpace
return $ fst:rest
-- The Parsec.Token module provides a number of basic parsers.
-- Each of these takes as argument a lexer, generated by makeTokenParser using a language definition.
-- Here we use emptyDef from the Language module.
lexer = P.makeTokenParser emptyDef
-- shorter name for the predefined parsers
parens = P.parens lexer
brackets = P.brackets lexer
braces = P.braces lexer
commaSep = P.commaSep lexer
whiteSpace = P.whiteSpace lexer
symbol = P.symbol lexer
identifier = P.identifier lexer
integer = P.integer lexer
stringLiteral = P.stringLiteral lexer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment