Skip to content

Instantly share code, notes, and snippets.

@michaelficarra
Last active December 8, 2016 18:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save michaelficarra/8ad559526c1602bd02dde021105a5da1 to your computer and use it in GitHub Desktop.
Save michaelficarra/8ad559526c1602bd02dde021105a5da1 to your computer and use it in GitHub Desktop.
diff --git c/purescript.cabal i/purescript.cabal
index 4f4fcabd..699e8440 100644
--- c/purescript.cabal
+++ i/purescript.cabal
@@ -231,6 +231,7 @@ library
Language.PureScript.Sugar.TypeClasses
Language.PureScript.Sugar.TypeClasses.Deriving
Language.PureScript.Sugar.TypeDeclarations
+ Language.PureScript.Terms
Language.PureScript.Traversals
Language.PureScript.TypeChecker
Language.PureScript.TypeChecker.Entailment
diff --git c/src/Language/PureScript/AST/Literals.hs i/src/Language/PureScript/AST/Literals.hs
index 3a456237..d0a6de27 100644
--- c/src/Language/PureScript/AST/Literals.hs
+++ i/src/Language/PureScript/AST/Literals.hs
@@ -5,6 +5,7 @@ module Language.PureScript.AST.Literals where
import Prelude.Compat
import Data.Text (Text)
+import Language.PureScript.Terms (PSString)
-- |
-- Data type for literal values. Parameterised so it can be used for Exprs and
@@ -18,7 +19,7 @@ data Literal a
-- |
-- A string literal
--
- | StringLiteral Text
+ | StringLiteral PSString
-- |
-- A character literal
--
diff --git c/src/Language/PureScript/CodeGen/JS/AST.hs i/src/Language/PureScript/CodeGen/JS/AST.hs
index 5f124dd2..3970e8f5 100644
--- c/src/Language/PureScript/CodeGen/JS/AST.hs
+++ i/src/Language/PureScript/CodeGen/JS/AST.hs
@@ -11,6 +11,7 @@ import Data.Text (Text)
import Language.PureScript.AST (SourceSpan(..))
import Language.PureScript.Comments
+import Language.PureScript.Terms (PSString)
import Language.PureScript.Traversals
-- |
@@ -132,7 +133,7 @@ data JS
-- |
-- A string literal
--
- | JSStringLiteral (Maybe SourceSpan) Text
+ | JSStringLiteral (Maybe SourceSpan) PSString
-- |
-- A boolean literal
--
@@ -160,7 +161,7 @@ data JS
-- |
-- An object property accessor expression
--
- | JSAccessor (Maybe SourceSpan) Text JS
+ | JSAccessor (Maybe SourceSpan) PSString JS
-- |
-- A function introduction (optional name, arguments, body)
--
diff --git c/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs i/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs
index 01a41cae..c3405542 100644
--- c/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs
+++ i/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs
@@ -11,6 +11,7 @@ import Data.Maybe (fromMaybe)
import Language.PureScript.Crash
import Language.PureScript.CodeGen.JS.AST
+import Language.PureScript.Terms (mkString)
applyAll :: [a -> a] -> a -> a
applyAll = foldl' (.) id
@@ -71,13 +72,13 @@ removeFromBlock _ js = js
isFn :: (Text, Text) -> JS -> Bool
isFn (moduleName, fnName) (JSAccessor _ x (JSVar _ y)) =
- x == fnName && y == moduleName
+ x == mkString fnName && y == moduleName
isFn (moduleName, fnName) (JSIndexer _ (JSStringLiteral _ x) (JSVar _ y)) =
- x == fnName && y == moduleName
+ x == mkString fnName && y == moduleName
isFn _ _ = False
isDict :: (Text, Text) -> JS -> Bool
-isDict (moduleName, dictName) (JSAccessor _ x (JSVar _ y)) = x == dictName && y == moduleName
+isDict (moduleName, dictName) (JSAccessor _ x (JSVar _ y)) = x == mkString dictName && y == moduleName
isDict _ _ = False
isDict' :: [(Text, Text)] -> JS -> Bool
diff --git c/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs i/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs
index fdc482a3..7f144b75 100644
--- c/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs
+++ i/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs
@@ -23,6 +23,7 @@ import qualified Data.Text as T
import Language.PureScript.CodeGen.JS.AST
import Language.PureScript.CodeGen.JS.Optimizer.Common
import qualified Language.PureScript.Constants as C
+import Language.PureScript.Terms (mkString)
-- TODO: Potential bug:
-- Shouldn't just inline this case: { var x = 0; x.toFixed(10); }
@@ -213,7 +214,7 @@ inlineCommonOperators = applyAll $
isNFn :: Text -> Int -> JS -> Bool
isNFn prefix n (JSVar _ name) = name == (prefix <> T.pack (show n))
- isNFn prefix n (JSAccessor _ name (JSVar _ dataFunctionUncurried)) | dataFunctionUncurried == C.dataFunctionUncurried = name == (prefix <> T.pack (show n))
+ isNFn prefix n (JSAccessor _ name (JSVar _ dataFunctionUncurried)) | dataFunctionUncurried == C.dataFunctionUncurried = name == mkString (prefix <> T.pack (show n))
isNFn _ _ _ = False
runFn :: Int -> JS -> JS
@@ -235,11 +236,11 @@ inlineCommonOperators = applyAll $
convert other = other
isModFn :: (Text, Text) -> JS -> Bool
- isModFn (m, op) (JSAccessor _ op' (JSVar _ m')) = m == m' && op == op'
+ isModFn (m, op) (JSAccessor _ op' (JSVar _ m')) = m == m' && mkString op == op'
isModFn _ _ = False
isModFnWithDict :: (Text, Text) -> JS -> Bool
- isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [(JSVar _ _)]) = m == m' && op == op'
+ isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [JSVar _ _]) = m == m' && mkString op == op'
isModFnWithDict _ _ = False
-- (f <<< g $ x) = f (g x)
diff --git c/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs i/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs
index 8fb82abb..96f2a215 100644
--- c/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs
+++ i/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs
@@ -12,6 +12,7 @@ import Data.Maybe (fromJust, isJust)
import Language.PureScript.CodeGen.JS.AST
import Language.PureScript.CodeGen.JS.Optimizer.Common
import Language.PureScript.Options
+import Language.PureScript.Terms (mkString)
import qualified Language.PureScript.Constants as C
magicDo :: Options -> JS -> JS
@@ -67,7 +68,7 @@ magicDo' = everywhereOnJS undo . everywhereOnJSTopDown convert
-- Check if an expression represents the polymorphic pure or return function
isPurePoly = isFn (C.controlApplicative, C.pure')
-- Check if an expression represents a function in the Eff module
- isEffFunc name (JSAccessor _ name' (JSVar _ eff)) = eff == C.eff && name == name'
+ isEffFunc name (JSAccessor _ name' (JSVar _ eff)) = eff == C.eff && mkString name == name'
isEffFunc _ _ = False
-- Remove __do function applications which remain after desugaring
@@ -106,14 +107,14 @@ inlineST = everywhereOnJS convertBlock
convert agg (JSApp s1 f [arg]) | isSTFunc C.newSTRef f =
JSFunction s1 Nothing [] (JSBlock s1 [JSReturn s1 $ if agg then arg else JSObjectLiteral s1 [(C.stRefValue, arg)]])
convert agg (JSApp _ (JSApp s1 f [ref]) []) | isSTFunc C.readSTRef f =
- if agg then ref else JSAccessor s1 C.stRefValue ref
+ if agg then ref else JSAccessor s1 (mkString C.stRefValue) ref
convert agg (JSApp _ (JSApp _ (JSApp s1 f [ref]) [arg]) []) | isSTFunc C.writeSTRef f =
- if agg then JSAssignment s1 ref arg else JSAssignment s1 (JSAccessor s1 C.stRefValue ref) arg
+ if agg then JSAssignment s1 ref arg else JSAssignment s1 (JSAccessor s1 (mkString C.stRefValue) ref) arg
convert agg (JSApp _ (JSApp _ (JSApp s1 f [ref]) [func]) []) | isSTFunc C.modifySTRef f =
- if agg then JSAssignment s1 ref (JSApp s1 func [ref]) else JSAssignment s1 (JSAccessor s1 C.stRefValue ref) (JSApp s1 func [JSAccessor s1 C.stRefValue ref])
+ if agg then JSAssignment s1 ref (JSApp s1 func [ref]) else JSAssignment s1 (JSAccessor s1 (mkString C.stRefValue) ref) (JSApp s1 func [JSAccessor s1 (mkString C.stRefValue) ref])
convert _ other = other
-- Check if an expression represents a function in the ST module
- isSTFunc name (JSAccessor _ name' (JSVar _ st)) = st == C.st && name == name'
+ isSTFunc name (JSAccessor _ name' (JSVar _ st)) = st == C.st && mkString name == name'
isSTFunc _ _ = False
-- Find all ST Refs initialized in this block
findSTRefsIn = everythingOnJS (++) isSTRef
diff --git c/src/Language/PureScript/Parser/Lexer.hs i/src/Language/PureScript/Parser/Lexer.hs
index cbe90f5f..0bea4073 100644
--- c/src/Language/PureScript/Parser/Lexer.hs
+++ i/src/Language/PureScript/Parser/Lexer.hs
@@ -248,18 +248,6 @@ parseToken = P.choice
symbolChar :: Lexer u Char
symbolChar = P.satisfy isSymbolChar
- surrogates :: Char -> (Char, Char)
- surrogates c = (high, low)
- where
- (h, l) = divMod (fromEnum c - 0x10000) 0x400
- high = toEnum (h + 0xD800)
- low = toEnum (l + 0xDC00)
-
- expandAstralCodePointToUTF16Surrogates :: Char -> [Char]
- expandAstralCodePointToUTF16Surrogates c | fromEnum c > 0xFFFF = [high, low]
- where (high, low) = surrogates c
- expandAstralCodePointToUTF16Surrogates c = [c]
-
parseCharLiteral :: Lexer u Char
parseCharLiteral = P.try $ do {
c <- PT.charLiteral tokenParser;
@@ -269,10 +257,10 @@ parseToken = P.choice
}
parseStringLiteral :: Lexer u Text
- parseStringLiteral = blockString <|> T.pack <$> concatMap expandAstralCodePointToUTF16Surrogates <$> PT.stringLiteral tokenParser
+ parseStringLiteral = T.pack <$> (blockString <|> PT.stringLiteral tokenParser)
where
delimiter = P.try (P.string "\"\"\"")
- blockString = delimiter *> (T.pack <$> P.manyTill P.anyChar delimiter)
+ blockString = delimiter *> P.manyTill P.anyChar delimiter
parseNumber :: Lexer u (Either Integer Double)
parseNumber = (consumeLeadingZero *> P.parserZero) <|>
diff --git c/src/Language/PureScript/Pretty/JS.hs i/src/Language/PureScript/Pretty/JS.hs
index d142873f..7fdc5463 100644
--- c/src/Language/PureScript/Pretty/JS.hs
+++ i/src/Language/PureScript/Pretty/JS.hs
@@ -25,8 +25,7 @@ import Language.PureScript.CodeGen.JS.Common
import Language.PureScript.Comments
import Language.PureScript.Crash
import Language.PureScript.Pretty.Common
-
-import Numeric
+import Language.PureScript.Terms
-- TODO (Christoph): Get rid of T.unpack / pack
@@ -59,7 +58,7 @@ literals = mkPattern' match'
]
where
objectPropertyToString :: (Emit gen) => Text -> gen
- objectPropertyToString s | identNeedsEscaping s = string s
+ objectPropertyToString s | identNeedsEscaping s = string (mkString s)
| otherwise = emit s
match (JSBlock _ sts) = mconcat <$> sequence
[ return $ emit "{\n"
@@ -150,28 +149,8 @@ literals = mkPattern' match'
match (JSRaw _ js) = return $ emit js
match _ = mzero
-string :: (Emit gen) => Text -> gen
-string s = emit $ "\"" <> T.concatMap encodeChar s <> "\""
- where
- encodeChar :: Char -> Text
- encodeChar '\b' = "\\b"
- encodeChar '\t' = "\\t"
- encodeChar '\n' = "\\n"
- encodeChar '\v' = "\\v"
- encodeChar '\f' = "\\f"
- encodeChar '\r' = "\\r"
- encodeChar '"' = "\\\""
- encodeChar '\\' = "\\\\"
- -- PureScript strings are sequences of UTF-16 code units, so this case should never be hit.
- -- If it is somehow hit, though, output the designated Unicode replacement character U+FFFD.
- encodeChar c | fromEnum c > 0xFFFF = "\\uFFFD"
- encodeChar c | fromEnum c > 0xFFF = "\\u" <> showHex' (fromEnum c) ""
- encodeChar c | fromEnum c > 0xFF = "\\u0" <> showHex' (fromEnum c) ""
- encodeChar c | fromEnum c < 0x10 = "\\x0" <> showHex' (fromEnum c) ""
- encodeChar c | fromEnum c > 0x7E || fromEnum c < 0x20 = "\\x" <> showHex' (fromEnum c) ""
- encodeChar c = T.singleton c
-
- showHex' a b = T.pack (showHex a b)
+string :: (Emit gen) => PSString -> gen
+string = emit . renderJSON
conditional :: Pattern PrinterState JS ((Maybe SourceSpan, JS, JS), JS)
conditional = mkPattern match
diff --git c/src/Language/PureScript/Terms.hs i/src/Language/PureScript/Terms.hs
new file mode 100644
index 00000000..a29d7342
--- /dev/null
+++ i/src/Language/PureScript/Terms.hs
@@ -0,0 +1,77 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+-- |
+-- Data types for PureScript terms
+--
+module Language.PureScript.Terms (PSString, mkString, renderJSON, toUTF16CodeUnits) where
+
+import qualified Data.Aeson as A
+import Prelude.Compat
+import Data.Monoid ((<>))
+import Data.Word
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Vector.Unboxed (Vector)
+import qualified Data.Vector.Unboxed as V
+
+import Numeric (showHex)
+
+-- |
+-- Strings in PureScript are sequences of UTF-16 code units, which do not
+-- necessarily represent UTF-16 encoded text. For example, it is permissible
+-- for a string to contain *lone surrogates,* i.e. characters in the range
+-- 0xD800 - 0xDFFF which do not appear as a part of a surrogate pair.
+newtype PSString = PSString (Vector Word16)
+ deriving (Eq, Ord)
+
+instance Show PSString where
+ show = T.unpack . renderJSON
+
+instance A.ToJSON PSString where
+ toJSON = A.toJSON . toString
+ where
+ toString :: PSString -> String
+ toString (PSString s) = toChar <$> V.toList s
+
+renderJSON :: PSString -> Text
+renderJSON s = T.pack $ "\"" <> concatMap encodeChar (toUTF16CodeUnits s) <> "\""
+ where
+ encodeChar :: Word16 -> String
+ encodeChar c | c > 0xFFF = "\\u" <> showHex c ""
+ encodeChar c | c > 0xFF = "\\u0" <> showHex c ""
+ encodeChar c | c < 0x10 = "\\x0" <> showHex c ""
+ encodeChar c | c > 0x7E || c < 0x20 = "\\x" <> showHex c ""
+ encodeChar c | toChar c == '\b' = "\\b"
+ encodeChar c | toChar c == '\t' = "\\t"
+ encodeChar c | toChar c == '\n' = "\\n"
+ encodeChar c | toChar c == '\v' = "\\v"
+ encodeChar c | toChar c == '\f' = "\\f"
+ encodeChar c | toChar c == '\r' = "\\r"
+ encodeChar c | toChar c == '"' = "\\\""
+ encodeChar c | toChar c == '\\' = "\\\\"
+ encodeChar c = pure $ toChar c
+
+instance A.FromJSON PSString where
+ parseJSON o = mkString <$> A.parseJSON o
+
+toChar :: Word16 -> Char
+toChar = toEnum . fromIntegral
+
+toWord :: Int -> Word16
+toWord = fromIntegral
+
+surrogates :: Char -> (Word16, Word16)
+surrogates c = (toWord (h + 0xD800), toWord (l + 0xDC00))
+ where
+ (h, l) = divMod (fromEnum c - 0x10000) 0x400
+
+encodeUTF16 :: Char -> [Word16]
+encodeUTF16 c | fromEnum c > 0xFFFF = [high, low]
+ where (high, low) = surrogates c
+encodeUTF16 c = [toWord $ fromEnum c]
+
+mkString :: Text -> PSString
+mkString = PSString . V.fromList . concatMap encodeUTF16 . T.unpack
+
+toUTF16CodeUnits :: PSString -> [Word16]
+toUTF16CodeUnits (PSString str) = V.toList str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment