Last active
November 20, 2017 03:11
-
-
Save ul/c9d9a0f9996a9a2339d20f7a04ab6e8f to your computer and use it in GitHub Desktop.
Restricted Org mode tangler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Base | |
open Stdio | |
open Parser | |
let append_or_id a = function | |
| None -> a | |
| Some b -> List.append b a | |
type doc = { | |
path_to_name: String.t Map.M(String).t; | |
name_to_body: String.t list Map.M(String).t; | |
} | |
let empty_doc={ | |
path_to_name=Map.empty (module String); | |
name_to_body=Map.empty (module String); | |
} | |
let process_block (({path_to_name; name_to_body} as doc, index) as m) = function | |
| None -> m | |
| Some block -> match block with | |
| {name=None; path=None; _} -> m | |
| {name=None; path=Some path; body} -> | |
(let name = "__auto__" ^ (Caml.string_of_int index) in | |
({path_to_name=Map.add path_to_name path name; | |
name_to_body=Map.update name_to_body name ~f:(append_or_id body)}, | |
index + 1)) | |
| {name=Some name; path=None; body} -> | |
({doc with name_to_body=Map.update name_to_body name ~f:(append_or_id body)}, | |
index) | |
| {name=Some name; path=Some path; body} -> | |
({path_to_name=Map.add path_to_name path name; | |
name_to_body=Map.update name_to_body name ~f:(append_or_id body)}, | |
index) | |
let make_doc blocks = List.fold blocks ~init:(empty_doc, 0) ~f:process_block | |
let strip_paddle paddle s = let n = String.length s in | |
if n > paddle then String.slice s paddle n else s | |
let output_line chan line = | |
Out_channel.output_string chan line; | |
Out_channel.newline chan | |
let rec tangle ~chan ~name ~prefix ~name_to_body = | |
let body = Map.find_exn name_to_body name in | |
let unpaddle = List.hd_exn body |> get_paddle |> strip_paddle in | |
List.iter body ~f:(fun line -> | |
let line = unpaddle line in | |
match parse_macro line with | |
| Some (macro_prefix, name, suffix) -> | |
tangle ~chan ~name ~prefix:(prefix ^ macro_prefix) ~name_to_body; | |
output_line chan suffix; | |
| None -> prefix ^ line |> output_line chan) | |
let mkdir_p ~perm dir = | |
let mkdir_if_missing perm dir = | |
try | |
Unix.mkdir dir perm | |
with | |
(* [mkdir] on MacOSX returns [EISDIR] instead of [EEXIST] if the directory already | |
exists. *) | |
| Unix.Unix_error ((EEXIST | EISDIR), _, _) -> () | |
| e -> raise e | |
in | |
let (init, dirs) = match String.split ~on:'/' dir with | |
| [] -> assert false | |
| init :: dirs -> (init, dirs) | |
in | |
mkdir_if_missing perm init; | |
List.fold_left dirs ~init:init ~f:(fun acc dir -> | |
let dir = acc ^ "/" ^ dir in | |
mkdir_if_missing perm dir; | |
dir | |
) |> ignore | |
let main = | |
let doc = In_channel.read_all Caml.Sys.argv.(1) in | |
let blocks = parse_doc doc in | |
let ({path_to_name; name_to_body}, _) = make_doc blocks in | |
Map.iteri path_to_name ~f:( | |
fun ~key:path ~data:name -> | |
let dir = Caml.Filename.dirname path in | |
(* if Caml.Sys.file_exists dir then () else Unix.mkdir_p ~perm:0o755 dir; *) | |
if Caml.Sys.file_exists dir then () else mkdir_p ~perm:0o755 dir; | |
tangle ~chan:(Out_channel.create path) ~name ~prefix:"" ~name_to_body | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Base | |
open Angstrom | |
let (>>) f g = Fn.compose g f | |
(* I don't understand why type constructors are not curried *) | |
(* UPD found it https://groups.google.com/d/msg/fa.caml/IOlkCcVBg5Q/vCx5VdZNxOMJ *) | |
let some x = Some x | |
(* Orgmode source block: | |
* | |
* 1) could be optionally named via #+NAME keyword | |
* 2) could have path to tangle into via #+BEGIN header argument :tangle | |
* 3) have source itself, which could contain macros to expand | |
* | |
* ref. http://orgmode.org/manual/Structure-of-code-blocks.html | |
* | |
* I'm going to implement restricted parser which assumes many things specific | |
* to my usage of Orgmode: | |
* | |
* 1) always :noweb yes :mkdirp yes :paddle no | |
* 2) never use multiple macros on one line | |
* 3) indent using spaces | |
*) | |
type source_block = { | |
name: string option; | |
path: string option; | |
body: string list; | |
} | |
let space = char ' ' | |
(* Why does | |
* let any_till = many_till any_char | |
* have different type? | |
*) | |
let any_till p = many_till any_char p | |
let to_eol = any_till end_of_line | |
let skip_char = any_char *> return None | |
let skip_line = to_eol *> return None | |
let src_name = string "#+NAME:" *> to_eol >>| | |
(String.of_char_list >> String.strip >> some) | |
let src_path = any_till (string ":tangle") *> many1 space *> any_till space >>| | |
String.of_char_list | |
let parse_path s = parse_string src_path s |> Result.ok | |
(* I failed to find properly working combination, but I'm sure that it could be | |
* expressed without calling inner parse_string | |
*) | |
let src_begin = string "#+BEGIN_SRC" *> to_eol >>| (String.of_char_list >> parse_path) | |
let src_end = string "#+END_SRC" | |
let src_body = many_till to_eol src_end >>| List.map ~f:String.of_char_list | |
let some_block name path body = Some {name; path; body} | |
let src_block = lift3 some_block (option None src_name) src_begin src_body | |
(* It generates a lot of None, one for each line outside src block. | |
* How to avoid it? | |
*) | |
let doc = many (src_block <|> skip_line) | |
let parse_doc = parse_string doc >> Result.ok_or_failwith | |
let triple a b c = (String.of_char_list a, String.of_char_list b, String.of_char_list c) | |
let macro = lift3 triple | |
(any_till (string "<<")) | |
(any_till (string ">>")) | |
(any_till end_of_input) | |
let parse_macro = parse_string macro >> Result.ok | |
let paddle = many space >>| List.length | |
let get_paddle s = match parse_string paddle s with | |
| Result.Ok v -> v | |
| Result.Error _ -> 0 |
Thanks! Replaced with Result.ok_or_failwith
which seems to fit better.
I have a implemented a simple noweb-like literate programming tool that solves scanning and parsing by using the OCaml's scanner and parser generator. Maybe it is useful for just seeing a different approach: https://github.com/lindig/lipsum.
See https://realworldocaml.org/v1/en/html/imperative-programming-1.html#the-value-restriction for why many_till any_char
has a different type
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Matching on
Result.t
could be replaced byResult.ok_exn