Created
March 23, 2012 16:38
-
-
Save roelvandijk/2172570 to your computer and use it in GitHub Desktop.
Language of Physical Units
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE FlexibleContexts | |
, NoImplicitPrelude | |
, PackageImports | |
, UnicodeSyntax | |
#-} | |
module Numeric.Units.Dimensional.TF.Parser.Unit | |
( UnitExp(..) | |
, parseUnitExp | |
) where | |
-------------------------------------------------------------------------------- | |
-- Imports | |
-------------------------------------------------------------------------------- | |
import "base" Control.Applicative ( (<*>) ) | |
import "base" Control.Monad ( return ) | |
import "base" Data.Either ( Either ) | |
import "base" Data.Function ( ($), id ) | |
import "base" Data.Functor ( (<$>) ) | |
import "base" Data.Int ( Int ) | |
import "base" Data.String ( String ) | |
import "base" Prelude ( negate ) | |
import "base" Text.Show ( Show ) | |
import "base-unicode-symbols" Data.Function.Unicode ( (∘) ) | |
import "base-unicode-symbols" Prelude.Unicode ( (⋅) ) | |
import "parsec" Text.Parsec.Char ( char, letter, oneOf, string ) | |
import "parsec" Text.Parsec.Combinator ( between, many1, eof ) | |
import "parsec" Text.Parsec.Error ( ParseError, Message(Message), newErrorMessage ) | |
import "parsec" Text.Parsec.Expr | |
import "parsec" Text.Parsec.Language ( emptyDef ) | |
import "parsec" Text.Parsec.Pos ( initialPos ) | |
import "parsec" Text.Parsec.Prim | |
( Parsec, Stream, parse, (<|>), (<?>), try, getParserState ) | |
import qualified "parsec" Text.Parsec.Token as P | |
import "transformers" Data.Functor.Identity ( Identity ) | |
-- Debug | |
import Prelude | |
-------------------------------------------------------------------------------- | |
-- Language of Physical Units | |
-------------------------------------------------------------------------------- | |
data UnitExp = | |
UEName String -- "metre" | |
| UEMul UnitExp UnitExp -- "newton * metre" | |
| UEDiv UnitExp UnitExp -- "metre / second" | |
| UEPow UnitExp Integer -- "metre ^ 3" | |
deriving Show | |
infixr 8 `UEPow` | |
infixl 7 `UEMul` | |
infixl 7 `UEDiv` | |
-- | Less strict version of the language that is easier to | |
-- parse. Allows nonsensical things with the power operator and with | |
-- integer literals. | |
data UnitExpRaw = | |
UERName String -- "metre" | |
| UERInt Integer -- "3" | |
| UERMul UnitExpRaw UnitExpRaw -- "newton * metre" | |
| UERDiv UnitExpRaw UnitExpRaw -- "metre / second" | |
| UERPow UnitExpRaw UnitExpRaw -- "metre ^ 3" | |
deriving Show | |
infixr 8 `UERPow` | |
infixl 7 `UERMul` | |
infixl 7 `UERDiv` | |
convertRawExp ∷ UnitExpRaw → Maybe UnitExp | |
convertRawExp (UERName n) = Just $ UEName n | |
convertRawExp (UERInt i) = Nothing | |
convertRawExp (UERMul x y) = UEMul <$> convertRawExp x <*> convertRawExp y | |
convertRawExp (UERDiv x y) = UEDiv <$> convertRawExp x <*> convertRawExp y | |
convertRawExp (UERPow x (UERInt i)) = (`UEPow` i) <$> convertRawExp x | |
convertRawExp (UERPow x _) = Nothing | |
-------------------------------------------------------------------------------- | |
-- Lexer | |
-------------------------------------------------------------------------------- | |
unitDef ∷ P.LanguageDef st | |
unitDef = emptyDef | |
{ P.commentStart = "" | |
, P.commentEnd = "" | |
, P.commentLine = "" | |
, P.nestedComments = False | |
, P.identStart = P.identLetter unitDef | |
, P.identLetter = letter | |
, P.opStart = P.opLetter unitDef | |
, P.opLetter = oneOf ['*', '·', '/', '^'] | |
, P.reservedNames = [] | |
, P.reservedOpNames = [] | |
, P.caseSensitive = True | |
} | |
lexer ∷ P.GenTokenParser String u Identity | |
lexer = P.makeTokenParser unitDef | |
-------------------------------------------------------------------------------- | |
-- Unit parser | |
-------------------------------------------------------------------------------- | |
parseUnitExp ∷ String → Either ParseError UnitExp | |
parseUnitExp str = | |
case parse unitExp "" str of | |
Left err → Left err | |
Right uer → | |
case convertRawExp uer of | |
Just ue → Right ue | |
Nothing → Left $ newErrorMessage (Message "Illegal expression") | |
(initialPos "") | |
unitExp ∷ Parsec String () UnitExpRaw | |
unitExp = do ue ← buildExpressionParser table term | |
eof | |
return ue | |
where | |
table = [ [ binOp "^" UERPow AssocRight ] | |
, [ binOp "*" UERMul AssocLeft | |
, binOp "·" UERMul AssocLeft | |
, binOp "/" UERDiv AssocLeft | |
] | |
] | |
term = (P.parens lexer unitExp <?> "group") | |
<|> try (UERPow <$> unitName <*> unitSuperExp) | |
<|> unitName | |
<|> UERInt <$> P.integer lexer | |
unitName = UERName <$> (P.identifier lexer <?> "unit name") | |
unitSuperExp = UERInt <$> (superDecimal <?> "superscript decimal") | |
binOp name fun assoc = Infix (P.reservedOp lexer name >> return fun) assoc | |
superDecimal ∷ (Num α) ⇒ Parsec String () α | |
superDecimal = do | |
digits ← many1 superDigit | |
return $ foldl (\x d → 10⋅x + d) 0 digits | |
superDigit ∷ (Num α) ⇒ Parsec String () α | |
superDigit = (char '⁰' >> return 0) | |
<|> (char '¹' >> return 1) | |
<|> (char '²' >> return 2) | |
<|> (char '³' >> return 3) | |
<|> (char '⁴' >> return 4) | |
<|> (char '⁵' >> return 5) | |
<|> (char '⁶' >> return 6) | |
<|> (char '⁷' >> return 7) | |
<|> (char '⁸' >> return 8) | |
<|> (char '⁹' >> return 9) | |
superSign ∷ (Num α) ⇒ Parsec String () (α → α) | |
superSign = (char '⁻' >> return negate) | |
<|> (char '⁺' >> return id) | |
<|> return id | |
tests ∷ [String] | |
tests = map (\s → s ++ "\n" ++ show (parseUnitExp s) ++ "\n") | |
[ "a" | |
, "(a" | |
, "a)" | |
, "(a)" | |
-- , "((c))" | |
-- , "d/e" | |
-- , "((f)/(g))" | |
-- , "h^1 * i^-1" | |
-- , "j·k⁻¹" | |
-- , "l/(m·m)" | |
-- , "n/o²" | |
-- , "x^3" | |
-- , "x³" | |
-- , "m²·kg·s⁻³·A⁻²" -- Ω | |
-- , "m/s·s" | |
-- , "m/(s·s)" | |
-- , "m/s^2" | |
-- , "m/s²" | |
-- , "x / y / z " | |
-- , "m * s * m" | |
-- -- These should fail: | |
-- , "(m/s)²" | |
-- , "a @@@@" | |
-- , "2" | |
-- , "2/3" | |
-- , "m^s" | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment