Skip to content

Instantly share code, notes, and snippets.

@nhirschey
Last active November 6, 2023 11:06
Show Gist options
  • Save nhirschey/900a4b1c6604166cf57e32094d596a54 to your computer and use it in GitHub Desktop.
Save nhirschey/900a4b1c6604166cf57e32094d596a54 to your computer and use it in GitHub Desktop.
IPYNB converter
open System.IO
open System.Text.Json
let ipynbFile = "post.ipynb"
let json = JsonDocument.Parse(File.ReadAllText(ipynbFile))
type ParsedCell =
| Code of {| lang: string; source: string; outputs: string [] option |}
| Markdown of source: string
with member this.ToMarkdown() =
match this with
| Markdown source -> source
| Code code ->
let codeBlock = sprintf $"```{code.lang}\n{code.source}\n```"
match code.outputs with
| None -> codeBlock
| Some outputs ->
let outputsString = outputs |> String.concat "\n\n"
sprintf $"{codeBlock}\n\n{outputsString}\n\n"
let (|TextHtml|_|) (x: JsonElement) =
match x.TryGetProperty("text/html") with
| true, html ->
html.EnumerateArray()
|> Seq.map (fun x -> x.GetString())
|> String.concat ""
|> Some
| _ -> None
let (|TextPlain|_|) (x: JsonElement) =
match x.TryGetProperty("text/plain") with
| true, text ->
let text =
text.EnumerateArray()
|> Seq.map (fun x -> x.GetString())
|> String.concat ""
Some (
"""<table class="pre"><tr><td><pre><code>""" + text + """</code></pre></td></tr></table>"""
)
| _ -> None
let (|OutputDisplayData|_|) (x: JsonElement) =
if x.GetProperty("output_type").GetString() = "display_data" then
match x.GetProperty("data") with
| TextHtml html -> html
| TextPlain text -> text
| s -> failwith $"unknown ouptut {s}"
|> Some
else None
let (|OutputStream|_|) (x: JsonElement) =
if x.GetProperty("output_type").GetString() = "stream" then
let text =
x.GetProperty("text").EnumerateArray()
|> Seq.map (fun x -> x.GetString())
|> String.concat ""
Some (
"""<table class="pre"><tr><td><pre><code>""" + text + """</code></pre></td></tr></table>"""
)
else None
let getSource (cell: JsonElement) =
let source =
match cell.TryGetProperty("source") with
| true, xs -> xs.EnumerateArray()
| _ -> failwith "no source"
source
|> Seq.map (fun x -> x.GetString())
|> String.concat ""
let collectOutputs (cell: JsonElement) =
match cell.TryGetProperty("outputs") with
| true, outputs ->
let xs = outputs.EnumerateArray()
if Seq.isEmpty xs then
None
else
xs
|> Seq.map (fun x ->
match x with
| OutputStream stream -> stream
| OutputDisplayData displayData -> displayData
| s -> failwith $"""unknown ouptut {s.GetProperty("output_type").GetString()}""")
|> Seq.toArray
|> Some
| _ -> None
let getCode (cell: JsonElement) =
// get metadata.language_info.name
let lang =
match cell.TryGetProperty("metadata") with
| false, _ -> failwith "Code cell does not have metadata"
| true, metadata ->
match metadata.TryGetProperty("polyglot_notebook") with
| false, _ -> failwith "code cell does not have metadata.polyglot_notebook"
| true, language_info ->
match language_info.TryGetProperty("kernelName") with
| false, _ -> failwith "code cell does not have metadata.polyglot_notebook.kernelName"
| true, name -> name.GetString()
let source = getSource cell
let outputs = collectOutputs cell
Code {| lang = lang; source = source; outputs = outputs |}
let parseCell (cell: JsonElement) =
let cell_type=
match cell.TryGetProperty("cell_type") with
| true, cellType -> cellType.GetString()
| _ -> failwith "no cell type"
match cell_type with
| "markdown" ->
match getSource cell, collectOutputs cell with
| _, Some _ -> failwith $"Markdown should not have outputs"
| source, None -> Markdown source
| "code" -> getCode cell
| _ -> failwith $"unknown cell type {cell_type}"
json.RootElement.GetProperty("cells").EnumerateArray()
|> Seq.map (parseCell >> (fun x -> x.ToMarkdown()))
|> String.concat "\n\n"
|> fun s -> System.IO.File.WriteAllText(__SOURCE_DIRECTORY__ + "/test.md", s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment