jkramer (owner)

Revisions

gist: 129625 Download_button fork
public
Public Clone URL: git://gist.github.com/129625.git
Haskell
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
module Mud.Thing.Parse where
 
import Text.ParserCombinators.Parsec
 
import Mud.Thing.Data
import Mud.Thing.Eval
 
 
-- Parse comment (starting with '#' until end of line).
comment = do
position <- getPosition
char '#'
comment <- manyTill (noneOf "\n") (char '\n')
return $ Comment position comment
 
 
-- Parse a reserved word (aka keyword).
reserved = do
position <- getPosition
word <- many1 letter
if word `elem` reservedWords
then return $ Reserved position word
else pzero
 
 
-- Check for a statement separator (semikolon or linebreak).
separator = do
position <- getPosition
character <- try $ oneOf ";\n"
return $ Separator position character
 
 
-- Parse an operator character.
operator = do
position <- getPosition
character <- oneOf "~="
return $ Operator position character
 
 
-- Parse an identifier, consisting of a sigil and a word of just letters.
identifier = do
position <- getPosition
sigil <- try $ oneOf "%&"
name <- many1 letter
return $ Identifier position (sigil : name)
 
 
property = do
position <- getPosition
try $ char ':'
name <- many1 letter
return $ Property position name
 
 
call = do
position <- getPosition
try $ char '!'
name <- many1 letter
return $ Call position name
 
 
-- Parse a normal string enclosed by double quotes.
-- TODO: Escapes!
doubleQuotedString = char '"' >> manyTill (noneOf "\"") (char '"')
 
 
-- Parse a text block enclosed by "***".
blockString = string "***" >> manyTill anyChar (string "***")
 
 
-- Just a number.
number = many1 digit
 
 
-- Check for end of file.
fileEnd = do
position <- getPosition
try $ eof
return $ EOF position
 
 
-- Parse a scalar (normal string, text block or number).
scalar = do
position <- getPosition
content <- (try doubleQuotedString <|> try blockString <|> try number) <?> "scalar"
return $ Scalar position content
 
 
-- Parse a list (zero or more scalars or even more lists).
list = do
position <- getPosition
 
char '['
content <- sepBy (scalar <|> list <|> identifier) (many1 space)
char ']'
 
return $ List position content
 
 
-- Parse a single token.
parseToken = (comment <?> "comment")
<|> (property <?> "property")
<|> (call <?> "function call")
<|> (reserved <?> "keyword")
<|> (scalar <?> "scalar")
<|> (identifier <?> "identifier")
<|> (operator <?> "operator")
<|> (separator <?> "separator")
<|> (fileEnd <?> "end of input")
<|> (list <?> "list")
 
 
-- Parse a single token, or, if the token is the beginning of a block,
-- return a block token that contains even more tokens.
block = do
position <- getPosition
token <- parseToken
 
case token of
(Reserved _ "do") -> do { blockContent position }
_ -> return token
 
where
manyTillDone = do
skipMany (oneOf " \t\r" <?> "")
token <- parseToken
 
case token of
(Reserved _ "done") -> return []
_ -> do { rest <- manyTillDone; return $ token : rest }
 
blockContent position = do
rest <- manyTillDone
return $ Block position rest
 
 
-- Main tokenizing function. Loops until EOF and returns all tokens.
tokenize = do
skipMany (oneOf " \t\r" <?> "")
token <- block
 
case token of
(EOF _) -> return [token]
_ -> tokenizeRest token <|> return [token]
 
where
tokenizeRest token = do
moreTokens <- tokenize
return $ token : moreTokens
 
 
-- Load a file and tokenize its content.
tokenizeFile source = do
result <- parseFromFile tokenize source
case (result) of
Left error -> do { putStrLn $ "error at " ++ show error; return [] }
Right tokens -> return $ splitExpression tokens
 
 
-- List of reserved words.
reservedWords = [ "inherit", "from", "when", "do", "done", "function", "is" ]