Last active
October 5, 2023 18:37
-
-
Save thelissimus/b126669586997c1dcbbc3b3b8f32793a to your computer and use it in GitHub Desktop.
Toy URI parser with megaparsec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE DerivingStrategies #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE RecordWildCards #-} | |
module Parser (module Parser) where | |
import Control.Monad (void) | |
import Data.Kind (Type) | |
import Data.List.Extra (groupSort) | |
import Data.Map (Map) | |
import Data.Map qualified as M | |
import Data.Text (Text) | |
import Data.Text qualified as T | |
import Data.Void (Void) | |
import Text.Megaparsec | |
import Text.Megaparsec.Char (alphaNumChar, char, hexDigitChar, string) | |
import Text.Megaparsec.Char.Lexer qualified as L | |
type Parser :: Type -> Type | |
type Parser = Parsec Void Text | |
type Scheme :: Type | |
data Scheme | |
= SchemeData | |
| SchemeFile | |
| SchemeFtp | |
| SchemeHttp | |
| SchemeHttps | |
| SchemeIrc | |
| SchemeMailto | |
deriving stock (Eq, Show) | |
pScheme :: Parser Scheme | |
pScheme = | |
choice | |
[ SchemeData <$ string "data" | |
, SchemeFile <$ string "file" | |
, SchemeFtp <$ string "ftp" | |
, SchemeHttps <$ string "https" | |
, SchemeHttp <$ string "http" | |
, SchemeIrc <$ string "irc" | |
, SchemeMailto <$ string "mailto" | |
] | |
type Authority :: Type | |
data Authority = MkAuthority | |
{ user :: Maybe (Text, Text) | |
, host :: Text | |
, port :: Maybe Int | |
} | |
deriving stock (Eq, Show) | |
pAuthority :: Parser Authority | |
pAuthority = do | |
void (string "//") | |
user <- optional . try $ do | |
username <- T.pack <$> some alphaNumChar <?> "username" | |
void (char ':') | |
password <- T.pack <$> some alphaNumChar <?> "password" | |
void (char '@') | |
pure (username, password) | |
host <- T.pack <$> some (alphaNumChar <|> char '.') <?> "hostname" | |
port <- optional (char ':' *> label "port number" L.decimal) | |
pure MkAuthority{..} | |
type Path :: Type | |
newtype Path = MkPath Text | |
deriving stock (Eq, Show) | |
pPath :: Parser Path | |
pPath = MkPath . T.pack <$> many ((char '%' *> hexDigitChar *> hexDigitChar) <|> alphaNumChar <|> oneOf pchar) | |
where | |
pchar = "-._~!$&'()*+,;=:@" :: [Char] | |
pQuery :: Parser (Map Text [Text]) | |
pQuery = do | |
void (char '?') | |
kvs <- many $ do | |
void (optional . char $ '&') | |
k <- T.pack <$> many alphaNumChar | |
void (char '=') | |
v <- T.pack <$> many alphaNumChar | |
pure (k, v) | |
pure . M.fromList . groupSort $ kvs | |
pFragment :: Parser Text | |
pFragment = char '#' *> (T.pack <$> many alphaNumChar) | |
type Uri :: Type | |
data Uri = MkUri | |
{ scheme :: Scheme | |
, authority :: Maybe Authority | |
, path :: [Path] | |
, query :: Map Text [Text] | |
, fragment :: Maybe Text | |
} | |
deriving stock (Eq, Show) | |
pUri :: Parser Uri | |
pUri = do | |
scheme <- pScheme <?> "valid scheme" | |
void (char ':') | |
authority <- optional pAuthority | |
path <- many (char '/' *> pPath) <?> "valid path" | |
query <- pQuery | |
fragment <- optional pFragment | |
pure MkUri{..} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment