Skip to content

Instantly share code, notes, and snippets.

@abhishekc-sharma
Created March 5, 2017 03:58
Show Gist options
  • Save abhishekc-sharma/9a3a13d5e4a30e48c53767055f1822bc to your computer and use it in GitHub Desktop.
Save abhishekc-sharma/9a3a13d5e4a30e48c53767055f1822bc to your computer and use it in GitHub Desktop.
Functional Programming in Erlang MOOC: Programming Challenge - Indexing a File
-module(index).
-export([make_index/1, show_file_contents/1]).
% Used to read a file into a list of lines.
% Example files available in:
% gettysburg-address.txt (short)
% dickens-christmas.txt (long)
make_index(Name) ->
get_word_index(get_all_annotated_words(get_file_contents(Name))).
% Get the contents of a text file into a list of lines.
% Each line has its trailing newline removed.
get_file_contents(Name) ->
{ok,File} = file:open(Name,[read]),
Rev = get_all_lines(File,[]),
lists:reverse(Rev).
% Auxiliary function for get_file_contents.
% Not exported.
get_all_lines(File,Partial) ->
case io:get_line(File,"") of
eof -> file:close(File),
Partial;
Line -> {Strip,_} = lists:split(length(Line)-1,Line),
get_all_lines(File,[Strip|Partial])
end.
% Show the contents of a list of strings.
% Can be used to check the results of calling get_file_contents.
show_file_contents([L|Ls]) ->
io:format("~s~n",[L]),
show_file_contents(Ls);
show_file_contents([]) ->
ok.
% Takes an list of lines and returns a list of words
get_all_annotated_words(Ls) ->
lists:flatmap(fun(X) -> get_annotated_line_words(X) end, annotate_with_line_numbers(Ls)).
% Takes an annotated line of text and returns a list of annotated words
get_annotated_line_words({I, L}) ->
lists:map(fun(W) -> {I, W} end, string:tokens(L, "()[]{}/\\,.?!''\"\"- ")). % convert {LineNumber, Line} to [{LineNumber}, Word]
% Annotate each element of the list with a number starting from 1
annotate_with_line_numbers(Ls) ->
Range = lists:seq(1, length(Ls)), % create a list [1..num_of_lines]
lists:zip(Range, Ls). % create list [{LineNumber, Line}]
% Create an index from a list of annotated words
get_word_index(AWs) ->
lists:foldr(fun(A, AccIn) -> add_word_index(AccIn, A) end, [], AWs). % start with empty list as index
% Add a single annotated word to the index
add_word_index(Index, {I, W}) ->
case lists:keyfind(W, 1, Index) of
false ->
[{W, [{I,I}]}|Index]; % add new entry
{W, Occurances=[{PrevStart,PrevEnd}|OtherOccurances]} ->
if
PrevStart == I ->
Index; % entry for current line already exists
PrevStart == (I + 1) ->
lists:keyreplace(W, 1, Index, {W, [{I, PrevEnd}|OtherOccurances]}); % need to extend first entry for word
true ->
lists:keyreplace(W, 1, Index, {W, [{I,I}|Occurances]}) % need to add new entry to index for the word
end
end.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment