Skip to content

Instantly share code, notes, and snippets.

@aantron
Last active March 15, 2016 22:24
Show Gist options
  • Save aantron/34271c1a4d051d6c966f to your computer and use it in GitHub Desktop.
Save aantron/34271c1a4d051d6c966f to your computer and use it in GitHub Desktop.
let take limit html =
let spaces = Str.regexp "[ \t\r\n]+" in
let rec repeat acc n v = if n > 0 then repeat (v::acc) (n - 1) v else acc in
let filter_signals =
(0, 0)
|> Markup.transform (fun ((seen, unclosed_elements) as state) signal ->
match signal with
| `Text ss ->
let tokens = ss |> String.concat "" |> Str.split spaces in
(* Count tokens in the current `Text signal, accumulating into seen, up
to limit. If limit is reached, create a new signal containing only
the tokens up to limit. Otherwise, keep the original signal. *)
let rec text_to_emit acc seen = function
| [] -> signal, seen
| token::rest when seen < limit ->
text_to_emit (token::acc) (seen + 1) rest
| _::_ -> `Text [List.rev acc |> String.concat " "], seen
in
let signal, seen = text_to_emit [] seen tokens in
(* If limit was not reached, emit the current text signal, and
continue. Otherwise, if limit is reached, emit the current
(potentially truncated) signal, followed by an `End_element signal
for each unclosed element, and stop. *)
if seen < limit then
[signal], Some (seen, unclosed_elements)
else
signal::(repeat [] unclosed_elements `End_element), None
| `Start_element _ -> [signal], Some (seen, unclosed_elements + 1)
| `End_element -> [signal], Some (seen, unclosed_elements - 1)
| _ -> [signal], Some state)
in
Markup.(string html
|> parse_html |> signals |> filter_signals |> write_html |> to_string)
let () = "
<p>foo</p>
<p>
bar <a href='/'>click here</a>
</p>"
|> take 3
|> print_endline
(* Output:
<p>foo</p>
<p>
bar <a href="/">click</a></p>
*)
(* Compile and run:
ocamlfind opt -linkpkg -package markup -package str take.ml && ./a.out
*)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment