rhwlo/Literate.hs

## Literate.hs
{-# LANGUAGE FlexibleContexts #-}

{- I really like “literate” code as a writing style. Especially when I’m explaining or exploring
   complicated ideas, writing comments in a longer form and explaining as I go along keeps me moored
   to reality and attached to what I’m trying to do. Since I wanted a tool to convert into this
   format, I decided to write one; since I like this format, I decided to write the tool using it.

   I decided to use Haskell because it has [some history][0] with “literate code”, and because I
   really like the Pandoc library for converting from Markdown to HTML. -}

import           Control.Monad          (forM_)
import           Data.Char              (isSpace)
import           Data.Either            (lefts, rights)
import           Data.Functor.Identity  (Identity(..))
import           Text.Pandoc            (Pandoc (..), def, readMarkdown,
                                         writeHtmlString)
import           Text.Pandoc.Error      (handleError)
import           Text.Parsec

{- Because I want the text to run alongside the code, I want to make a data type representing the
   flow of text. In an abstract sense, we have two columns of different types, and this gives us
   three possible setups, disregarding the `Nothing` case. -}

data TwoColumn a b = RightCol a | LeftCol b | BothCols a b

{- In this case, our `TwoColumn` layout will consist of `Code` blocks on the right and `String`
   blocks on the left. -}

data Code = Stanza Language String
type LiterateCode = TwoColumn String Code

{- We’ll define a data type for the programming languages that we support so that we can keep track
   of the relevant bits of information (for example, comment characters). -}

data Language = Haskell | JavaScript | Python | Ruby | Unsupported

{- In the languages that I’m supporting here, we have two common types of comments. Haskell,
   JavaScript, and Python all support multiline comments in various forms, but Ruby’s multi-line
   syntax is very uncommon, so we’re more likely to see many occurrences of a single-line comment.
   This can also be represented by a data type!

   We’ll also define a function to get delimiters for whatever languages we choose to support. -}

data Delimiters = SingleLine String | MultiLine String String

delimsFor :: Language -> Delimiters
delimsFor Haskell = MultiLine "{-" "-}"
delimsFor JavaScript = MultiLine "/*" "*/"
delimsFor Python = MultiLine "\"\"\"" "\"\"\""
delimsFor Ruby = SingleLine "#"

{- I’m going to declare the function `manyWhile` because it’s a common operation in this program to
   take a string of characters that matches a particular boolean function (e.g., `isSpace`, or
   `(/= '\n')`). I’ll also immediately use that to define a line parser that matches up until a
   newline. -}

manyWhile :: Stream s Identity Char
          => (Char -> Bool)
          -> Parsec s u String
manyWhile = many . satisfy

lineP :: Stream s Identity Char => Parsec s u String
lineP = manyWhile (/= '\n')

{- We’ll also start the declaration of a comment parser here, to be fleshed out for single- and
   multi-line comments. -}

commentP :: Stream s Identity Char
         => Delimiters
         -> Parsec s u String

{- For languages with single-line comments like Ruby, the comment writing style is fairly easy to
   parse, matching a style like so:

   >        # This is a Ruby comment. It starts on one line, continues to the end of that line, and,
   >        # should it need to be multi-line, it just consists of a block of lines with an initial
   >        # indent plus the comment character.

   We’ll use this as the first case for a comment parser. -}

commentP (SingleLine delim) = let
        indentedDelimiterP = (++) <$> manyWhile isSpace <*> string delim
    in unlines <$> many (try (indentedDelimiterP *> lineP))
{- For languages with multi-line comments like Haskell, Python, or JavaScript, the comment writing
   style that I’m used to seeing (and writing) looks like this:

   >     """ This comment starts on a first line, but it doesn’t end on one. Instead, it continues
   >         onto the next line, matching the indentation of the first character of the comment. """

   Because of this, our multi-line comment parser has two parts: first, it needs to use the comment
   delimiters as a sort of bracket; and second, it needs to pay attention to the indent of the first
   line, whether it be tabs or spaces. This requires some fun monadic binding! -}
commentP (MultiLine startDelim endDelim) = let
        indentedStartDelimiterP = (++) <$> manyWhile isSpace <*> (flip replicate ' ' . length <$> string startDelim)
        endDelimP = string endDelim
    in indentedStartDelimiterP >>= \indent -> let
            separatorP = string "\n" <* string indent
            multiLineP = manyTill anyChar $ try $ lookAhead endDelimP <|> separatorP
        in unlines <$> multiLineP `manyTill` try endDelimP

{- To test the parser out, we’ll use examples of comment blocks from different languages: -}
rubyComment :: String
rubyComment = unlines [
    "  # I have written",
    "  # a comment",
    "  # that is in",
    "  # a single-line comment syntax" ]

haskellComment :: String
haskellComment = unlines [
    "  {- and which",
    "     is probably unnecessary",
    "     writing",
    "     in this language -}" ]

javaScriptComment :: String
javaScriptComment = unlines [
    "      /* Forgive me",
    "         this trope is trite",
    "         so cliché",
    "         and so hackneyed. */" ]

main :: IO ()
main = [(Ruby, rubyComment), (Haskell, haskellComment), (JavaScript, javaScriptComment)] `forM_`
        \(language, comment) -> parseTest (commentP (delimsFor language)) comment
	{-# LANGUAGE FlexibleContexts #-}

	{- I really like “literate” code as a writing style. Especially when I’m explaining or exploring
	complicated ideas, writing comments in a longer form and explaining as I go along keeps me moored
	to reality and attached to what I’m trying to do. Since I wanted a tool to convert into this
	format, I decided to write one; since I like this format, I decided to write the tool using it.

	I decided to use Haskell because it has [some history][0] with “literate code”, and because I
	really like the Pandoc library for converting from Markdown to HTML. -}

	import Control.Monad (forM_)
	import Data.Char (isSpace)
	import Data.Either (lefts, rights)
	import Data.Functor.Identity (Identity(..))
	import Text.Pandoc (Pandoc (..), def, readMarkdown,
	writeHtmlString)
	import Text.Pandoc.Error (handleError)
	import Text.Parsec

	{- Because I want the text to run alongside the code, I want to make a data type representing the
	flow of text. In an abstract sense, we have two columns of different types, and this gives us
	three possible setups, disregarding the `Nothing` case. -}

	data TwoColumn a b = RightCol a \| LeftCol b \| BothCols a b

	{- In this case, our `TwoColumn` layout will consist of `Code` blocks on the right and `String`
	blocks on the left. -}

	data Code = Stanza Language String
	type LiterateCode = TwoColumn String Code

	{- We’ll define a data type for the programming languages that we support so that we can keep track
	of the relevant bits of information (for example, comment characters). -}

	data Language = Haskell \| JavaScript \| Python \| Ruby \| Unsupported

	{- In the languages that I’m supporting here, we have two common types of comments. Haskell,
	JavaScript, and Python all support multiline comments in various forms, but Ruby’s multi-line
	syntax is very uncommon, so we’re more likely to see many occurrences of a single-line comment.
	This can also be represented by a data type!

	We’ll also define a function to get delimiters for whatever languages we choose to support. -}

	data Delimiters = SingleLine String \| MultiLine String String

	delimsFor :: Language -> Delimiters
	delimsFor Haskell = MultiLine "{-" "-}"
	delimsFor JavaScript = MultiLine "/" "/"
	delimsFor Python = MultiLine "\"\"\"" "\"\"\""
	delimsFor Ruby = SingleLine "#"

	{- I’m going to declare the function `manyWhile` because it’s a common operation in this program to
	take a string of characters that matches a particular boolean function (e.g., `isSpace`, or
	`(/= '\n')`). I’ll also immediately use that to define a line parser that matches up until a
	newline. -}

	manyWhile :: Stream s Identity Char
	=> (Char -> Bool)
	-> Parsec s u String
	manyWhile = many . satisfy

	lineP :: Stream s Identity Char => Parsec s u String
	lineP = manyWhile (/= '\n')

	{- We’ll also start the declaration of a comment parser here, to be fleshed out for single- and
	multi-line comments. -}

	commentP :: Stream s Identity Char
	=> Delimiters
	-> Parsec s u String

	{- For languages with single-line comments like Ruby, the comment writing style is fairly easy to
	parse, matching a style like so:

	> # This is a Ruby comment. It starts on one line, continues to the end of that line, and,
	> # should it need to be multi-line, it just consists of a block of lines with an initial
	> # indent plus the comment character.

	We’ll use this as the first case for a comment parser. -}

	commentP (SingleLine delim) = let
	indentedDelimiterP = (++) <$> manyWhile isSpace <*> string delim
	in unlines <$> many (try (indentedDelimiterP *> lineP))
	{- For languages with multi-line comments like Haskell, Python, or JavaScript, the comment writing
	style that I’m used to seeing (and writing) looks like this:

	> """ This comment starts on a first line, but it doesn’t end on one. Instead, it continues
	> onto the next line, matching the indentation of the first character of the comment. """

	Because of this, our multi-line comment parser has two parts: first, it needs to use the comment
	delimiters as a sort of bracket; and second, it needs to pay attention to the indent of the first
	line, whether it be tabs or spaces. This requires some fun monadic binding! -}
	commentP (MultiLine startDelim endDelim) = let
	indentedStartDelimiterP = (++) <$> manyWhile isSpace <*> (flip replicate ' ' . length <$> string startDelim)
	endDelimP = string endDelim
	in indentedStartDelimiterP >>= \indent -> let
	separatorP = string "\n" <* string indent
	multiLineP = manyTill anyChar $ try $ lookAhead endDelimP <\|> separatorP
	in unlines <$> multiLineP `manyTill` try endDelimP

	{- To test the parser out, we’ll use examples of comment blocks from different languages: -}
	rubyComment :: String
	rubyComment = unlines [
	" # I have written",
	" # a comment",
	" # that is in",
	" # a single-line comment syntax" ]

	haskellComment :: String
	haskellComment = unlines [
	" {- and which",
	" is probably unnecessary",
	" writing",
	" in this language -}" ]

	javaScriptComment :: String
	javaScriptComment = unlines [
	" /* Forgive me",
	" this trope is trite",
	" so cliché",
	" and so hackneyed. */" ]

	main :: IO ()
	main = [(Ruby, rubyComment), (Haskell, haskellComment), (JavaScript, javaScriptComment)] `forM_`
	\(language, comment) -> parseTest (commentP (delimsFor language)) comment