Skip to content

Instantly share code, notes, and snippets.

@mrc
Created March 16, 2017 07:00
Show Gist options
  • Save mrc/8b1eaa27112196fe297ea8e2867575e0 to your computer and use it in GitHub Desktop.
Save mrc/8b1eaa27112196fe297ea8e2867575e0 to your computer and use it in GitHub Desktop.
-module(index).
-export([get_file_contents/1,show_file_contents/1, index/1, index_map/1, index_helper/3, add_line_number/2]).
% Used to read a file into a list of lines.
% Example files available in:
% gettysburg-address.txt (short)
% dickens-christmas.txt (long)
% Get the contents of a text file into a list of lines.
% Each line has its trailing newline removed.
get_file_contents(Name) ->
{ok,File} = file:open(Name,[read]),
Rev = get_all_lines(File,[]),
lists:reverse(Rev).
% Auxiliary function for get_file_contents.
% Not exported.
get_all_lines(File,Partial) ->
case io:get_line(File,"") of
eof -> file:close(File),
Partial;
Line -> {Strip,_} = lists:split(length(Line)-1,Line),
get_all_lines(File,[Strip|Partial])
end.
% Show the contents of a list of strings.
% Can be used to check the results of calling get_file_contents.
show_file_contents([L|Ls]) ->
io:format("~s~n",[L]),
show_file_contents(Ls);
show_file_contents([]) ->
ok.
% Split a line of text into words, ignoring non-alpha characters, and
% downcasing all alpha characters.
words(L) ->
Lower = string:to_lower(L),
Alpha = re:replace(Lower, "[^a-z ]", " ", [global,{return,list}]),
string:tokens(Alpha, " ").
% An index is a map of words each to a list of ranges of lines that
% the word occurs on.
% Add the current line number to the list of ranges, extending the
% current range (head of the list) if possible, otherwise creating a
% new range.
add_line_number(N, []) ->
[{N,N}];
add_line_number(N, Ranges=[{Rmin,Rmax}|Rs]) ->
if
N==Rmax -> Ranges;
N==Rmax+1 -> [ {Rmin,N } | Rs];
true -> [{N,N} | Ranges]
end.
% Update a map of words to ranges for any words that occur in the
% given list of words.
update_ranges_with_words(_LineNumber, [], Acc) ->
Acc;
update_ranges_with_words(LineNumber, [W|Ws], Acc) ->
Acc1 = Acc#{W => add_line_number(LineNumber, maps:get(W, Acc, []))},
update_ranges_with_words(LineNumber, Ws, Acc1).
% Iterate through a list of lines, numbering them, and updating the
% index for all words that occur on each line.
index_helper(_LineNumber, [], Acc) ->
Acc;
index_helper(LineNumber, [L|Ls], Acc) ->
Acc1 = update_ranges_with_words(LineNumber, words(L), Acc),
index_helper(LineNumber+1, Ls, Acc1).
% Build an index from a list of lines.
index_map(L) ->
Index = index_helper(1, L, #{}),
maps:map(fun(_K,V) -> lists:reverse(V) end, Index).
% Convert to the representation required for the exercise (a list of
% tuples).
index(L) ->
maps:to_list(index_map(L)).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment