Created
July 2, 2017 06:22
-
-
Save andreburgaud/295b4423090f829fa1147c01b46f48bd to your computer and use it in GitHub Desktop.
FutureLearn - Functional Programming in Erlang 2.25 - Text Processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(text). | |
-export([get_file_contents/1,print_text/2,process/2]). | |
-include_lib("eunit/include/eunit.hrl"). | |
%% ----------------------------------------------------------------------------- | |
%% Text Processing exercise from Functional Programming in Erlang 2.25 | |
%% ----------------------------------------------------------------------------- | |
%% Compiled and tested with Erlang/OTP 20.0 | |
%% ----------------------------------------------------------------------------- | |
%% To execute tests in the Erlang shell: | |
%% > eunit:test(text, [verbose]). | |
%% ----------------------------------------------------------------------------- | |
%% To test with an example, create a file with some text in the same directory | |
%% as this module, then in the Erlang shell, execute: | |
%% > c(text). | |
%% > text:process(50, file_name). | |
%% or | |
%% > text:print_text(50, file_name). | |
%% ----------------------------------------------------------------------------- | |
get_file_contents(Name) -> | |
{ok, File} = file:open(Name, [read]), | |
Rev = get_all_lines(File, []), | |
lists:reverse(Rev). | |
% Auxiliary function for get_file_contents. | |
% Not exported. | |
%% The file needs to have a final CR otherwise the last char is missing from the | |
%% final text. | |
get_all_lines(File, Partial) -> | |
case io:get_line(File, "") of | |
eof -> file:close(File), | |
Partial; | |
Line -> {Strip, _} = lists:split(length(Line) - 1, Line), | |
get_all_lines(File, [Strip | Partial]) | |
end. | |
%% Split one line into words, only using whitespace a separator. | |
%% Auxilliary function for split_lines. | |
-spec split_line(string()) -> [string()]. | |
split_line([]) -> []; | |
split_line(Line) -> | |
{ok, RE} = re:compile("\\s+"), | |
[W || W <- re:split(string:trim(Line), RE, [{return, list}])]. | |
%% Split all lines into list of words. | |
-spec split_lines([string()]) -> [string()]. | |
split_lines([]) -> []; | |
split_lines(Lines) -> | |
[split_line(Line) || Line <- Lines, length(Line) > 0]. | |
%% Flatten a 2 dimension list to a single dimension list. A list of lines, each | |
%% line being a list of words, become a long list of words. | |
-spec flatten([[string()]]) -> [string()]. | |
flatten([]) -> []; | |
flatten(Lines) -> | |
[W || Line <- Lines, W <- Line]. | |
%% Calculate the length of a line given a list of words separated by whitespaces. | |
-spec raw_length_line([string()]) -> integer(). | |
raw_length_line([]) -> 0; | |
raw_length_line(Ws) -> | |
lists:foldl(fun(W, Total) -> Total + length(W) end, 0, Ws). | |
%% Calculate the possible length of a line given a list of words with no spaces. | |
-spec length_line([string()]) -> integer(). | |
length_line([]) -> 0; | |
length_line(Ws) -> | |
lists:foldl(fun(W, Total) -> Total + length(W) end, 0, Ws) + length(Ws) - 1. | |
%% Add extra spaces up to the number N | |
-spec add_ws(integer(), [string()]) -> [string()]. | |
add_ws(N, Line) -> | |
lists:mapfoldl(fun(X, WhiteSpaces) -> | |
case (X == " " andalso WhiteSpaces > 0) of | |
true -> {" ", WhiteSpaces-1}; | |
false -> {X, WhiteSpaces} | |
end | |
end, N, Line). | |
%% Right justify by padding existing whitespace separarators with extra whitespaces. | |
-spec align(integer(), [string()]) -> string(). | |
align(N, Line) -> | |
L = raw_length_line(Line), | |
{Ws, _} = add_ws(N-L, Line), | |
string:join(Ws, ""). | |
%% Attempt to strictly justify the text, by padding whitespaces. | |
%% Cover basic cases only. | |
-spec justify(integer(), [string()]) -> [string()]. | |
justify(N, Ws) -> | |
[align(N, lists:join(" ", lists:reverse(Line))) || Line <- justify(N, Ws, [])]. % Line is a list of words | |
%% Loosely justify the text by spliting lines within the length requested. | |
-spec justify(integer(), [string()], [string()]) -> [string()]. | |
justify(_, [], Lines) -> Lines; | |
justify(N, [W|Ws], []) -> | |
justify(N, Ws, [[W]]); | |
justify(N, [W|Ws], [Line|Lines]) -> % Line is a list of words | |
case length_line([W|Line]) =< N of | |
true -> justify(N, Ws, [[W|Line]|Lines]); | |
false -> [Line|justify(N, Ws, [[W]|Lines])] | |
end. | |
%% Main function. Takes number for the length of the line and a file name to | |
%% read from. | |
-spec process(integer(), string()) -> [string()]. | |
process(N, Name) -> | |
justify(N, flatten(split_lines(get_file_contents(Name)))). | |
%% Explicitely print the output. | |
-spec print_text(integer(), string()) -> atom(). | |
print_text(N, Name) -> | |
lists:foreach(fun(Line) -> io:format("~p~n", [Line]) end, process(N, Name)). | |
%% ----------------------------------------------------------------------------- | |
%% TESTS | |
%% ----------------------------------------------------------------------------- | |
split_line_test_() -> [ | |
?_assertEqual(["The", "heat", "bloomed", "in", "December"], | |
split_line("The heat bloomed in December")), | |
?_assertEqual(["Nearly", "helpless", "with", "sun", "and", "glare,", "I", "avoided", "Rio's", "brilliant."], | |
split_line("Nearly helpless with sun and glare, I avoided Rio's brilliant."))]. | |
length_line_test_() -> ?_assertEqual(20, length_line(["un", "deux", "trois", "quatre"])). | |
flatten_test_() -> ?_assertEqual(["un", "deux", "trois", "quatre"], flatten([["un", "deux"], ["trois", "quatre"]])). | |
add_ws_test_() -> ?_assertEqual({["un", " ", "deux", " ", "trois"], 0}, add_ws(2, ["un", " ", "deux", " ", "trois"])). | |
raw_length_line_test_() -> ?_assertEqual(13, raw_length_line(["un", " ", "deux", " ", "trois"])). | |
length_line_test() -> ?_assertEqual(13, length_line(["un", "deux", "trois"])). | |
align_test_() -> ?_assertEqual("un deux trois", align(15, ["un", " ", "deux", " ", "trois"])). | |
justify_test_() -> | |
?_assertEqual(["un deux trois","un deux trois"], | |
justify(15, ["un", "deux", "trois", "un", "deux", "trois"])). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment