Skip to content

Instantly share code, notes, and snippets.

@andreburgaud
Created July 2, 2017 06:22
Show Gist options
  • Save andreburgaud/295b4423090f829fa1147c01b46f48bd to your computer and use it in GitHub Desktop.
Save andreburgaud/295b4423090f829fa1147c01b46f48bd to your computer and use it in GitHub Desktop.
FutureLearn - Functional Programming in Erlang 2.25 - Text Processing
-module(text).
-export([get_file_contents/1,print_text/2,process/2]).
-include_lib("eunit/include/eunit.hrl").
%% -----------------------------------------------------------------------------
%% Text Processing exercise from Functional Programming in Erlang 2.25
%% -----------------------------------------------------------------------------
%% Compiled and tested with Erlang/OTP 20.0
%% -----------------------------------------------------------------------------
%% To execute tests in the Erlang shell:
%% > eunit:test(text, [verbose]).
%% -----------------------------------------------------------------------------
%% To test with an example, create a file with some text in the same directory
%% as this module, then in the Erlang shell, execute:
%% > c(text).
%% > text:process(50, file_name).
%% or
%% > text:print_text(50, file_name).
%% -----------------------------------------------------------------------------
get_file_contents(Name) ->
{ok, File} = file:open(Name, [read]),
Rev = get_all_lines(File, []),
lists:reverse(Rev).
% Auxiliary function for get_file_contents.
% Not exported.
%% The file needs to have a final CR otherwise the last char is missing from the
%% final text.
get_all_lines(File, Partial) ->
case io:get_line(File, "") of
eof -> file:close(File),
Partial;
Line -> {Strip, _} = lists:split(length(Line) - 1, Line),
get_all_lines(File, [Strip | Partial])
end.
%% Split one line into words, only using whitespace a separator.
%% Auxilliary function for split_lines.
-spec split_line(string()) -> [string()].
split_line([]) -> [];
split_line(Line) ->
{ok, RE} = re:compile("\\s+"),
[W || W <- re:split(string:trim(Line), RE, [{return, list}])].
%% Split all lines into list of words.
-spec split_lines([string()]) -> [string()].
split_lines([]) -> [];
split_lines(Lines) ->
[split_line(Line) || Line <- Lines, length(Line) > 0].
%% Flatten a 2 dimension list to a single dimension list. A list of lines, each
%% line being a list of words, become a long list of words.
-spec flatten([[string()]]) -> [string()].
flatten([]) -> [];
flatten(Lines) ->
[W || Line <- Lines, W <- Line].
%% Calculate the length of a line given a list of words separated by whitespaces.
-spec raw_length_line([string()]) -> integer().
raw_length_line([]) -> 0;
raw_length_line(Ws) ->
lists:foldl(fun(W, Total) -> Total + length(W) end, 0, Ws).
%% Calculate the possible length of a line given a list of words with no spaces.
-spec length_line([string()]) -> integer().
length_line([]) -> 0;
length_line(Ws) ->
lists:foldl(fun(W, Total) -> Total + length(W) end, 0, Ws) + length(Ws) - 1.
%% Add extra spaces up to the number N
-spec add_ws(integer(), [string()]) -> [string()].
add_ws(N, Line) ->
lists:mapfoldl(fun(X, WhiteSpaces) ->
case (X == " " andalso WhiteSpaces > 0) of
true -> {" ", WhiteSpaces-1};
false -> {X, WhiteSpaces}
end
end, N, Line).
%% Right justify by padding existing whitespace separarators with extra whitespaces.
-spec align(integer(), [string()]) -> string().
align(N, Line) ->
L = raw_length_line(Line),
{Ws, _} = add_ws(N-L, Line),
string:join(Ws, "").
%% Attempt to strictly justify the text, by padding whitespaces.
%% Cover basic cases only.
-spec justify(integer(), [string()]) -> [string()].
justify(N, Ws) ->
[align(N, lists:join(" ", lists:reverse(Line))) || Line <- justify(N, Ws, [])]. % Line is a list of words
%% Loosely justify the text by spliting lines within the length requested.
-spec justify(integer(), [string()], [string()]) -> [string()].
justify(_, [], Lines) -> Lines;
justify(N, [W|Ws], []) ->
justify(N, Ws, [[W]]);
justify(N, [W|Ws], [Line|Lines]) -> % Line is a list of words
case length_line([W|Line]) =< N of
true -> justify(N, Ws, [[W|Line]|Lines]);
false -> [Line|justify(N, Ws, [[W]|Lines])]
end.
%% Main function. Takes number for the length of the line and a file name to
%% read from.
-spec process(integer(), string()) -> [string()].
process(N, Name) ->
justify(N, flatten(split_lines(get_file_contents(Name)))).
%% Explicitely print the output.
-spec print_text(integer(), string()) -> atom().
print_text(N, Name) ->
lists:foreach(fun(Line) -> io:format("~p~n", [Line]) end, process(N, Name)).
%% -----------------------------------------------------------------------------
%% TESTS
%% -----------------------------------------------------------------------------
split_line_test_() -> [
?_assertEqual(["The", "heat", "bloomed", "in", "December"],
split_line("The heat bloomed in December")),
?_assertEqual(["Nearly", "helpless", "with", "sun", "and", "glare,", "I", "avoided", "Rio's", "brilliant."],
split_line("Nearly helpless with sun and glare, I avoided Rio's brilliant."))].
length_line_test_() -> ?_assertEqual(20, length_line(["un", "deux", "trois", "quatre"])).
flatten_test_() -> ?_assertEqual(["un", "deux", "trois", "quatre"], flatten([["un", "deux"], ["trois", "quatre"]])).
add_ws_test_() -> ?_assertEqual({["un", " ", "deux", " ", "trois"], 0}, add_ws(2, ["un", " ", "deux", " ", "trois"])).
raw_length_line_test_() -> ?_assertEqual(13, raw_length_line(["un", " ", "deux", " ", "trois"])).
length_line_test() -> ?_assertEqual(13, length_line(["un", "deux", "trois"])).
align_test_() -> ?_assertEqual("un deux trois", align(15, ["un", " ", "deux", " ", "trois"])).
justify_test_() ->
?_assertEqual(["un deux trois","un deux trois"],
justify(15, ["un", "deux", "trois", "un", "deux", "trois"])).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment