Last active
August 7, 2016 21:10
-
-
Save rhwlo/aea8d3e255ccc0740ca062a3046f2edd to your computer and use it in GitHub Desktop.
the pandoc part is as yet unnecessary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE FlexibleContexts #-} | |
{- I really like “literate” code as a writing style. Especially when I’m explaining or exploring | |
complicated ideas, writing comments in a longer form and explaining as I go along keeps me moored | |
to reality and attached to what I’m trying to do. Since I wanted a tool to convert into this | |
format, I decided to write one; since I like this format, I decided to write the tool using it. | |
I decided to use Haskell because it has [some history][0] with “literate code”, and because I | |
really like the Pandoc library for converting from Markdown to HTML. -} | |
import Control.Monad (forM_) | |
import Data.Char (isSpace) | |
import Data.Either (lefts, rights) | |
import Data.Functor.Identity (Identity(..)) | |
import Text.Pandoc (Pandoc (..), def, readMarkdown, | |
writeHtmlString) | |
import Text.Pandoc.Error (handleError) | |
import Text.Parsec | |
{- Because I want the text to run alongside the code, I want to make a data type representing the | |
flow of text. In an abstract sense, we have two columns of different types, and this gives us | |
three possible setups, disregarding the `Nothing` case. -} | |
data TwoColumn a b = RightCol a | LeftCol b | BothCols a b | |
{- In this case, our `TwoColumn` layout will consist of `Code` blocks on the right and `String` | |
blocks on the left. -} | |
data Code = Stanza Language String | |
type LiterateCode = TwoColumn String Code | |
{- We’ll define a data type for the programming languages that we support so that we can keep track | |
of the relevant bits of information (for example, comment characters). -} | |
data Language = Haskell | JavaScript | Python | Ruby | Unsupported | |
{- In the languages that I’m supporting here, we have two common types of comments. Haskell, | |
JavaScript, and Python all support multiline comments in various forms, but Ruby’s multi-line | |
syntax is very uncommon, so we’re more likely to see many occurrences of a single-line comment. | |
This can also be represented by a data type! | |
We’ll also define a function to get delimiters for whatever languages we choose to support. -} | |
data Delimiters = SingleLine String | MultiLine String String | |
delimsFor :: Language -> Delimiters | |
delimsFor Haskell = MultiLine "{-" "-}" | |
delimsFor JavaScript = MultiLine "/*" "*/" | |
delimsFor Python = MultiLine "\"\"\"" "\"\"\"" | |
delimsFor Ruby = SingleLine "#" | |
{- I’m going to declare the function `manyWhile` because it’s a common operation in this program to | |
take a string of characters that matches a particular boolean function (e.g., `isSpace`, or | |
`(/= '\n')`). I’ll also immediately use that to define a line parser that matches up until a | |
newline. -} | |
manyWhile :: Stream s Identity Char | |
=> (Char -> Bool) | |
-> Parsec s u String | |
manyWhile = many . satisfy | |
lineP :: Stream s Identity Char => Parsec s u String | |
lineP = manyWhile (/= '\n') | |
{- We’ll also start the declaration of a comment parser here, to be fleshed out for single- and | |
multi-line comments. -} | |
commentP :: Stream s Identity Char | |
=> Delimiters | |
-> Parsec s u String | |
{- For languages with single-line comments like Ruby, the comment writing style is fairly easy to | |
parse, matching a style like so: | |
> # This is a Ruby comment. It starts on one line, continues to the end of that line, and, | |
> # should it need to be multi-line, it just consists of a block of lines with an initial | |
> # indent plus the comment character. | |
We’ll use this as the first case for a comment parser. -} | |
commentP (SingleLine delim) = let | |
indentedDelimiterP = (++) <$> manyWhile isSpace <*> string delim | |
in unlines <$> many (try (indentedDelimiterP *> lineP)) | |
{- For languages with multi-line comments like Haskell, Python, or JavaScript, the comment writing | |
style that I’m used to seeing (and writing) looks like this: | |
> """ This comment starts on a first line, but it doesn’t end on one. Instead, it continues | |
> onto the next line, matching the indentation of the first character of the comment. """ | |
Because of this, our multi-line comment parser has two parts: first, it needs to use the comment | |
delimiters as a sort of bracket; and second, it needs to pay attention to the indent of the first | |
line, whether it be tabs or spaces. This requires some fun monadic binding! -} | |
commentP (MultiLine startDelim endDelim) = let | |
indentedStartDelimiterP = (++) <$> manyWhile isSpace <*> (flip replicate ' ' . length <$> string startDelim) | |
endDelimP = string endDelim | |
in indentedStartDelimiterP >>= \indent -> let | |
separatorP = string "\n" <* string indent | |
multiLineP = manyTill anyChar $ try $ lookAhead endDelimP <|> separatorP | |
in unlines <$> multiLineP `manyTill` try endDelimP | |
{- To test the parser out, we’ll use examples of comment blocks from different languages: -} | |
rubyComment :: String | |
rubyComment = unlines [ | |
" # I have written", | |
" # a comment", | |
" # that is in", | |
" # a single-line comment syntax" ] | |
haskellComment :: String | |
haskellComment = unlines [ | |
" {- and which", | |
" is probably unnecessary", | |
" writing", | |
" in this language -}" ] | |
javaScriptComment :: String | |
javaScriptComment = unlines [ | |
" /* Forgive me", | |
" this trope is trite", | |
" so cliché", | |
" and so hackneyed. */" ] | |
main :: IO () | |
main = [(Ruby, rubyComment), (Haskell, haskellComment), (JavaScript, javaScriptComment)] `forM_` | |
\(language, comment) -> parseTest (commentP (delimsFor language)) comment |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment