Skip to content

Instantly share code, notes, and snippets.

@ul
Last active November 20, 2017 03:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ul/c9d9a0f9996a9a2339d20f7a04ab6e8f to your computer and use it in GitHub Desktop.
Save ul/c9d9a0f9996a9a2339d20f7a04ab6e8f to your computer and use it in GitHub Desktop.
Restricted Org mode tangler
open Base
open Stdio
open Parser
let append_or_id a = function
| None -> a
| Some b -> List.append b a
type doc = {
path_to_name: String.t Map.M(String).t;
name_to_body: String.t list Map.M(String).t;
}
let empty_doc={
path_to_name=Map.empty (module String);
name_to_body=Map.empty (module String);
}
let process_block (({path_to_name; name_to_body} as doc, index) as m) = function
| None -> m
| Some block -> match block with
| {name=None; path=None; _} -> m
| {name=None; path=Some path; body} ->
(let name = "__auto__" ^ (Caml.string_of_int index) in
({path_to_name=Map.add path_to_name path name;
name_to_body=Map.update name_to_body name ~f:(append_or_id body)},
index + 1))
| {name=Some name; path=None; body} ->
({doc with name_to_body=Map.update name_to_body name ~f:(append_or_id body)},
index)
| {name=Some name; path=Some path; body} ->
({path_to_name=Map.add path_to_name path name;
name_to_body=Map.update name_to_body name ~f:(append_or_id body)},
index)
let make_doc blocks = List.fold blocks ~init:(empty_doc, 0) ~f:process_block
let strip_paddle paddle s = let n = String.length s in
if n > paddle then String.slice s paddle n else s
let output_line chan line =
Out_channel.output_string chan line;
Out_channel.newline chan
let rec tangle ~chan ~name ~prefix ~name_to_body =
let body = Map.find_exn name_to_body name in
let unpaddle = List.hd_exn body |> get_paddle |> strip_paddle in
List.iter body ~f:(fun line ->
let line = unpaddle line in
match parse_macro line with
| Some (macro_prefix, name, suffix) ->
tangle ~chan ~name ~prefix:(prefix ^ macro_prefix) ~name_to_body;
output_line chan suffix;
| None -> prefix ^ line |> output_line chan)
let mkdir_p ~perm dir =
let mkdir_if_missing perm dir =
try
Unix.mkdir dir perm
with
(* [mkdir] on MacOSX returns [EISDIR] instead of [EEXIST] if the directory already
exists. *)
| Unix.Unix_error ((EEXIST | EISDIR), _, _) -> ()
| e -> raise e
in
let (init, dirs) = match String.split ~on:'/' dir with
| [] -> assert false
| init :: dirs -> (init, dirs)
in
mkdir_if_missing perm init;
List.fold_left dirs ~init:init ~f:(fun acc dir ->
let dir = acc ^ "/" ^ dir in
mkdir_if_missing perm dir;
dir
) |> ignore
let main =
let doc = In_channel.read_all Caml.Sys.argv.(1) in
let blocks = parse_doc doc in
let ({path_to_name; name_to_body}, _) = make_doc blocks in
Map.iteri path_to_name ~f:(
fun ~key:path ~data:name ->
let dir = Caml.Filename.dirname path in
(* if Caml.Sys.file_exists dir then () else Unix.mkdir_p ~perm:0o755 dir; *)
if Caml.Sys.file_exists dir then () else mkdir_p ~perm:0o755 dir;
tangle ~chan:(Out_channel.create path) ~name ~prefix:"" ~name_to_body
)
open Base
open Angstrom
let (>>) f g = Fn.compose g f
(* I don't understand why type constructors are not curried *)
(* UPD found it https://groups.google.com/d/msg/fa.caml/IOlkCcVBg5Q/vCx5VdZNxOMJ *)
let some x = Some x
(* Orgmode source block:
*
* 1) could be optionally named via #+NAME keyword
* 2) could have path to tangle into via #+BEGIN header argument :tangle
* 3) have source itself, which could contain macros to expand
*
* ref. http://orgmode.org/manual/Structure-of-code-blocks.html
*
* I'm going to implement restricted parser which assumes many things specific
* to my usage of Orgmode:
*
* 1) always :noweb yes :mkdirp yes :paddle no
* 2) never use multiple macros on one line
* 3) indent using spaces
*)
type source_block = {
name: string option;
path: string option;
body: string list;
}
let space = char ' '
(* Why does
* let any_till = many_till any_char
* have different type?
*)
let any_till p = many_till any_char p
let to_eol = any_till end_of_line
let skip_char = any_char *> return None
let skip_line = to_eol *> return None
let src_name = string "#+NAME:" *> to_eol >>|
(String.of_char_list >> String.strip >> some)
let src_path = any_till (string ":tangle") *> many1 space *> any_till space >>|
String.of_char_list
let parse_path s = parse_string src_path s |> Result.ok
(* I failed to find properly working combination, but I'm sure that it could be
* expressed without calling inner parse_string
*)
let src_begin = string "#+BEGIN_SRC" *> to_eol >>| (String.of_char_list >> parse_path)
let src_end = string "#+END_SRC"
let src_body = many_till to_eol src_end >>| List.map ~f:String.of_char_list
let some_block name path body = Some {name; path; body}
let src_block = lift3 some_block (option None src_name) src_begin src_body
(* It generates a lot of None, one for each line outside src block.
* How to avoid it?
*)
let doc = many (src_block <|> skip_line)
let parse_doc = parse_string doc >> Result.ok_or_failwith
let triple a b c = (String.of_char_list a, String.of_char_list b, String.of_char_list c)
let macro = lift3 triple
(any_till (string "<<"))
(any_till (string ">>"))
(any_till end_of_input)
let parse_macro = parse_string macro >> Result.ok
let paddle = many space >>| List.length
let get_paddle s = match parse_string paddle s with
| Result.Ok v -> v
| Result.Error _ -> 0
@Leonidas-from-XIV
Copy link

Matching on Result.t could be replaced by Result.ok_exn

@ul
Copy link
Author

ul commented Nov 18, 2017

Thanks! Replaced with Result.ok_or_failwith which seems to fit better.

@lindig
Copy link

lindig commented Nov 19, 2017

I have a implemented a simple noweb-like literate programming tool that solves scanning and parsing by using the OCaml's scanner and parser generator. Maybe it is useful for just seeing a different approach: https://github.com/lindig/lipsum.

@edwintorok
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment