Skip to content

Instantly share code, notes, and snippets.

@aavogt
Created February 1, 2014 04:12
Show Gist options
  • Save aavogt/8747831 to your computer and use it in GitHub Desktop.
Save aavogt/8747831 to your computer and use it in GitHub Desktop.
{-# LANGUAGE DeriveDataTypeable, ViewPatterns, QuasiQuotes OverloadedStrings, ScopedTypeVariables #-}
import Data.Aeson
import Control.Applicative
import qualified Data.ByteString.Lazy as L
import qualified Data.HashMap.Strict as M
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.IO as T
import qualified Data.Text as TS
import HMQQ (q)
import qualified Data.Vector as V
import Data.Vector (Vector)
import Data.Monoid
import Data.Maybe
import Control.Monad
import System.Console.CmdArgs
import System.FilePath
import Data.Char
data LhsStyle = LhsStyle { codePrefix, outputPrefix :: T.Text,
codeDelims, outputDelims :: (T.Text,T.Text) }
lhsStyles =
[("bird", LhsStyle "> " "\n<< " ("","") ("","")),
("latex", LhsStyle "" "\n" ("\\begin{code}","\\end{code}")
("\\begin{verbatim}","\\end{verbatim}"))]
data Format = LHS_Markdown | IPYNB
deriving (Data,Typeable, Show, Eq)
data Convert = Convert { input :: String, output :: Maybe String,
from, to :: Maybe Format,
style :: String }
deriving (Data,Typeable,Show)
args0 = Convert { input = "Ihaskell.ipynb" &= typFile &= args,
output = Nothing &= typFile,
style = "bird" &= help "bird (default) or latex (not fully supported yet)",
from = Nothing &= help "guessed from file extension",
to = Nothing &= help "guessed from file extension" }
&= help "convert between ipynb and lhs"
&= details ["Examples all do the same thing, with increasing verbosity:", "",
"convertNotebook IHaskell.ipynb",
"convertNotebook -i IHaskell.ipynb",
"convertNotebook -i IHaskell.ipynb -o IHaskell.lhs",
"convertNotebook --input IHaskell.ipynb --output IHaskell.lhs",
"convertNotebook IHaskell.ipynb --from=IPYNB --to=LHS_Markdown"]
fromExt s = case map toLower (takeExtension s) of
".lhs" -> Just LHS_Markdown
".ipynb" -> Just IPYNB
_ -> Nothing
formatToExtension x = case x of
LHS_Markdown -> ".lhs"
IPYNB -> ".ipynb"
otherFmt x = case x of
LHS_Markdown -> IPYNB
IPYNB -> LHS_Markdown
main = do
args1 <- cmdArgs args0
case args1 of
Convert i o
((`mplus` fromExt i) -> Just f)
(fromMaybe (otherFmt f) -> t)
style
| f /= t,
o <- replaceExtension i (formatToExtension t) `fromMaybe` o,
sty <- fromMaybe (snd (head lhsStyles)) (lookup style lhsStyles) ->
case (f,t) of
(IPYNB, LHS_Markdown) -> ipynbTolhs sty i o
(LHS_Markdown, IPYNB) -> lhsToIpynb sty i o
_ -> error $ "unimplemented: " ++ show f ++ " -> " ++ show t
lhsToIpynb :: LhsStyle -> FilePath -> FilePath -> IO ()
lhsToIpynb sty from to = do
classed <- classifyLines sty . T.lines <$> T.readFile from
L.writeFile to . encode . encodeCells $ groupClassified classed
data CellLine a = CodeLine a | OutputLine a | MarkdownLine a
deriving Show
isCode (CodeLine _) = True
isCode _ = False
isOutput (OutputLine _) = True
isOutput _ = False
isMD (MarkdownLine _) = True
isMD _ = False
isEmptyMD (MarkdownLine a) = a == mempty
isEmptyMD _ = False
untag (CodeLine a) = a
untag (OutputLine a) = a
untag (MarkdownLine a) = a
data Cell a = Code a a | Markdown a
deriving (Show)
encodeCells :: [Cell [T.Text]] -> Value
encodeCells xs = object $
[ "worksheets" .= Array (V.singleton (object
[ "cells" .= Array (V.fromList (map cellToVal xs)) ] ))
] ++ boilerplate
cellToVal :: Cell [T.Text] -> Value
cellToVal (Code i o) = object $
[ "cell_type" .= String "code",
"collapsed" .= Bool False,
"language" .= String "python", -- is what it IPython gives us
"metadata" .= object [],
"input" .= arrayFromTxt i,
"outputs" .= Array
(V.fromList (
[ object ["text" .= arrayFromTxt o,
"metadata" .= object [],
"output_type" .= String "display_data" ]
| _ <- take 1 o])) ]
cellToVal (Markdown txt) = object $
[ "cell_type" .= String "markdown",
"metadata" .= object [],
"source" .= arrayFromTxt txt ]
arrayFromTxt i = Array (V.fromList (map (String . T.toStrict) i))
boilerplate =
[ "metadata" .= object [ "language" .= String "haskell", "name" .= String ""],
"nbformat" .= Number 3,
"nbformat_minor" .= Number 0 ]
groupClassified :: [CellLine T.Text] -> [Cell [T.Text]]
groupClassified (CodeLine a : x)
| (c,x) <- span isCode x,
(_,x) <- span isEmptyMD x,
(o,x) <- span isOutput x = Code (a : map untag c) (map untag o) : groupClassified x
groupClassified (MarkdownLine a : x)
| (m,x) <- span isMD x = Markdown (a: map untag m) : groupClassified x
groupClassified (OutputLine a : x ) = Markdown [a] : groupClassified x
groupClassified [] = []
classifyLines sty@(LhsStyle c o _ _) (l:ls) = case (sp c, sp o) of
(Just a, Nothing) -> CodeLine a : classifyLines sty ls
(Nothing, Just a) -> OutputLine a : classifyLines sty ls
(Nothing,Nothing) -> MarkdownLine l : classifyLines sty ls
where sp c = T.stripPrefix (T.dropWhile isSpace c) (T.dropWhile isSpace l)
classifyLines _ [] = []
ipynbTolhs :: LhsStyle -> FilePath -> FilePath -> IO ()
ipynbTolhs sty from to = do
Just (js :: Object) <- decode <$> L.readFile from
case js of
[q| worksheets : Array x |]
| [ Object [q| cells : Array x |] ] <- V.toList x ->
T.writeFile to $ T.unlines $ V.toList
$ V.map (\(Object y) -> convCell sty y) x
unString :: T.Text -> Vector Value -> Maybe T.Text
unString p arr = T.concat . map (p <>) . V.toList <$> V.mapM toStr arr
toStr (String x) = Just (T.fromStrict x)
toStr _ = Nothing
convOutputs sty arr =
(fst (outputDelims sty) <>) .
(<> snd (outputDelims sty)) . T.concat . V.toList
<$> V.mapM (getTexts (outputPrefix sty)) arr
getTexts p (Object [q| text : Array x |]) = unString p x
getTexts _ _ = Nothing
convCell :: LhsStyle -> Object -> T.Text
convCell sty
[q| cell_type : String "markdown",
source : Array xs |]
| ~ (Just s) <- unString "" xs = s
convCell sty
[q| cell_type : String "code",
input : Array i,
outputs : Array o
|]
| ~ (Just i) <- wrapDelims (codeDelims sty) <$>
unString (codePrefix sty) i,
o <- fromMaybe mempty (convOutputs sty o) = "\n" <> i <> "\n" <> o <> "\n"
convCell _ x = "unknown"
wrapDelims (a,b) x = a <> x <> b
{-# LANGUAGE ViewPatterns, TemplateHaskell #-}
module HMQQ where
import Language.Haskell.TH.Quote
import Language.Haskell.TH
import Language.Haskell.TH.Syntax
import Language.Haskell.Meta
import Data.Aeson
import Control.Monad
import qualified Data.HashMap.Strict as M
import qualified Data.Text as T
-- | so you can write
-- > f [p| "abc de" : v1, abc : v2 |] = g v1 v2
fromK (VarP n) = lift (show n)
fromK (LitP x) = litE x
fromK x = error ("invalid key:"++show x)
-- | incomplete conversion of haskell literals into literals that stand for
-- aeson's Value
fixP s@(LitP (StringL {})) = conP 'String [return s]
fixP x = return x
extractJSONs :: Pat -> Maybe (ExpQ, PatQ)
extractJSONs (ListP ps) = Just $
let (e,p) = unzip [ (\m -> [| M.lookup $(fromK k `sigE` [t| T.Text |]) $m |],
conP 'Just [fixP p])
| ~ (UInfixP k cons p) <- ps,
~ True <- [cons == '(:)] ]
in ([| \m -> $(tupE (map ($ [| m |]) e)) |], tupP p)
extractJSONs _ = Nothing
q = QuasiQuoter
{ quotePat = \s -> case parsePat ("[" ++ s ++ "]") of
Right (extractJSONs -> Just (e,p)) -> viewP e p
x -> fail (show x),
quoteExp = error "q",
quoteType = error "q",
quoteDec = error "q"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment