-
-
Save nhirschey/900a4b1c6604166cf57e32094d596a54 to your computer and use it in GitHub Desktop.
IPYNB converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.IO | |
open System.Text.Json | |
let ipynbFile = "post.ipynb" | |
let json = JsonDocument.Parse(File.ReadAllText(ipynbFile)) | |
type ParsedCell = | |
| Code of {| lang: string; source: string; outputs: string [] option |} | |
| Markdown of source: string | |
with member this.ToMarkdown() = | |
match this with | |
| Markdown source -> source | |
| Code code -> | |
let codeBlock = sprintf $"```{code.lang}\n{code.source}\n```" | |
match code.outputs with | |
| None -> codeBlock | |
| Some outputs -> | |
let outputsString = outputs |> String.concat "\n\n" | |
sprintf $"{codeBlock}\n\n{outputsString}\n\n" | |
let (|TextHtml|_|) (x: JsonElement) = | |
match x.TryGetProperty("text/html") with | |
| true, html -> | |
html.EnumerateArray() | |
|> Seq.map (fun x -> x.GetString()) | |
|> String.concat "" | |
|> Some | |
| _ -> None | |
let (|TextPlain|_|) (x: JsonElement) = | |
match x.TryGetProperty("text/plain") with | |
| true, text -> | |
let text = | |
text.EnumerateArray() | |
|> Seq.map (fun x -> x.GetString()) | |
|> String.concat "" | |
Some ( | |
"""<table class="pre"><tr><td><pre><code>""" + text + """</code></pre></td></tr></table>""" | |
) | |
| _ -> None | |
let (|OutputDisplayData|_|) (x: JsonElement) = | |
if x.GetProperty("output_type").GetString() = "display_data" then | |
match x.GetProperty("data") with | |
| TextHtml html -> html | |
| TextPlain text -> text | |
| s -> failwith $"unknown ouptut {s}" | |
|> Some | |
else None | |
let (|OutputStream|_|) (x: JsonElement) = | |
if x.GetProperty("output_type").GetString() = "stream" then | |
let text = | |
x.GetProperty("text").EnumerateArray() | |
|> Seq.map (fun x -> x.GetString()) | |
|> String.concat "" | |
Some ( | |
"""<table class="pre"><tr><td><pre><code>""" + text + """</code></pre></td></tr></table>""" | |
) | |
else None | |
let getSource (cell: JsonElement) = | |
let source = | |
match cell.TryGetProperty("source") with | |
| true, xs -> xs.EnumerateArray() | |
| _ -> failwith "no source" | |
source | |
|> Seq.map (fun x -> x.GetString()) | |
|> String.concat "" | |
let collectOutputs (cell: JsonElement) = | |
match cell.TryGetProperty("outputs") with | |
| true, outputs -> | |
let xs = outputs.EnumerateArray() | |
if Seq.isEmpty xs then | |
None | |
else | |
xs | |
|> Seq.map (fun x -> | |
match x with | |
| OutputStream stream -> stream | |
| OutputDisplayData displayData -> displayData | |
| s -> failwith $"""unknown ouptut {s.GetProperty("output_type").GetString()}""") | |
|> Seq.toArray | |
|> Some | |
| _ -> None | |
let getCode (cell: JsonElement) = | |
// get metadata.language_info.name | |
let lang = | |
match cell.TryGetProperty("metadata") with | |
| false, _ -> failwith "Code cell does not have metadata" | |
| true, metadata -> | |
match metadata.TryGetProperty("polyglot_notebook") with | |
| false, _ -> failwith "code cell does not have metadata.polyglot_notebook" | |
| true, language_info -> | |
match language_info.TryGetProperty("kernelName") with | |
| false, _ -> failwith "code cell does not have metadata.polyglot_notebook.kernelName" | |
| true, name -> name.GetString() | |
let source = getSource cell | |
let outputs = collectOutputs cell | |
Code {| lang = lang; source = source; outputs = outputs |} | |
let parseCell (cell: JsonElement) = | |
let cell_type= | |
match cell.TryGetProperty("cell_type") with | |
| true, cellType -> cellType.GetString() | |
| _ -> failwith "no cell type" | |
match cell_type with | |
| "markdown" -> | |
match getSource cell, collectOutputs cell with | |
| _, Some _ -> failwith $"Markdown should not have outputs" | |
| source, None -> Markdown source | |
| "code" -> getCode cell | |
| _ -> failwith $"unknown cell type {cell_type}" | |
json.RootElement.GetProperty("cells").EnumerateArray() | |
|> Seq.map (parseCell >> (fun x -> x.ToMarkdown())) | |
|> String.concat "\n\n" | |
|> fun s -> System.IO.File.WriteAllText(__SOURCE_DIRECTORY__ + "/test.md", s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment