Skip to content

Instantly share code, notes, and snippets.

@michaeldperez
Created March 6, 2017 03:13
Show Gist options
  • Save michaeldperez/38ab2be361bbecb0605b52aea04d2159 to your computer and use it in GitHub Desktop.
Save michaeldperez/38ab2be361bbecb0605b52aea04d2159 to your computer and use it in GitHub Desktop.
Accepts a file path and creates an index.
% Does not properly formate index
% Does not remove duplicates
-module(index).
-export([get_file_contents/1,
show_file_contents/1,
map_to_words/1,
index/1]).
% Used to read a file into a list of lines.
% Example files available in:
% gettysburg-address.txt (short)
% dickens-christmas.txt (long)
% Get the contents of a text file into a list of lines.
% Each line has its trailing newline removed.
get_file_contents(Name) ->
{ok,File} = file:open(Name,[read]),
Rev = get_all_lines(File,[]),
lists:reverse(Rev).
% Auxiliary function for get_file_contents.
% Not exported.
get_all_lines(File,Partial) ->
case io:get_line(File,"") of
eof -> file:close(File),
Partial;
Line -> {Strip,_} = lists:split(length(Line)-1,Line),
get_all_lines(File,[Strip|Partial])
end.
% Show the contents of a list of strings.
% Can be used to check the results of calling get_file_contents.
show_file_contents([L|Ls]) ->
io:format("~s~n",[L]),
show_file_contents(Ls);
show_file_contents([]) ->
ok.
% Get the contents of a line as a list of words.
get_all_words([], _LineNumber) ->
[];
get_all_words(Line, LineNumber) ->
get_all_words(Line, [], [], LineNumber).
get_all_words([], [], Words, _LineNumber) ->
lists:reverse(remove_empty(Words));
get_all_words([], Word, Words, LineNumber) ->
get_all_words([], [], [ { lists:reverse(Word), LineNumber } | Words], LineNumber);
get_all_words([X | Xs], Word, Words, LineNumber) ->
case lists:member(X, ".,\ ;:\t\n\"") of
true ->
get_all_words(Xs, [], [{ lists:reverse(Word), LineNumber } | Words], LineNumber);
false ->
get_all_words(Xs, [X | Word], Words, LineNumber)
end.
% Removes empty words from list
remove_empty([]) ->
[];
remove_empty(ListOfWords) ->
remove_empty(ListOfWords, []).
remove_empty([], NewWordList) ->
NewWordList;
remove_empty([{Word, LineNumber} | Words], NewWordList) ->
case Word of
[] ->
remove_empty(Words, NewWordList);
_ ->
remove_empty(Words, [{Word, LineNumber} | NewWordList])
end.
% Maps each line into a list of words
map_to_words([]) ->
[];
map_to_words(List) ->
map_to_words(List, [], 1).
map_to_words([], ListOfWords, _StartLine) ->
lists:reverse(ListOfWords);
map_to_words([X | Xs], ListOfWords, StartLine) ->
map_to_words(Xs, [get_all_words(X, StartLine) | ListOfWords], StartLine + 1).
% Index each word
% Data structure: [[{word, line-number}, ..., {word, line-number},...], ..., [...]]
index(Name) ->
index(map_to_words(get_file_contents(Name)), []).
index([], Index) ->
lists:reverse(Index);
index([[] | ListOfListsOfWordLineNumberTuples], Index) ->
index(ListOfListsOfWordLineNumberTuples, Index);
index([[{Word, Line}|ListOfWordLineNumberTuple] | ListOfListsOfWordLineNumberTuples], Index) ->
index([ListOfWordLineNumberTuple | ListOfListsOfWordLineNumberTuples], [traverse_list_of_tuples({Word, Line}, ListOfListsOfWordLineNumberTuples) | Index]).
% Traverse and modify one tuple on one list of tuples
traverse_and_transform({Word, Line}, ListOfTuples) ->
traverse_and_transform({Word, Line}, ListOfTuples, []). % [{Line}]
traverse_and_transform({_Word, _Line}, [], WordIndex) ->
lists:reverse(WordIndex);
traverse_and_transform({Word, _Line}, [{Word, OtherLine} | RestOfTuples], WordIndex) ->
traverse_and_transform({Word, _Line}, RestOfTuples, [{OtherLine} | WordIndex]);
traverse_and_transform({Word, _Line}, [{_OtherWord, _OtherLIne} | RestOfTuples], WordIndex) ->
traverse_and_transform({Word, _Line}, RestOfTuples, WordIndex).
% Recursively applies traverse_and_transform function to all lists of tuples in a greater list
traverse_list_of_tuples({Word, Line}, ListOfListsOfTuples) ->
traverse_list_of_tuples({Word, Line}, ListOfListsOfTuples, []).
traverse_list_of_tuples({Word, Line}, [], ListOfLines) ->
{Word, lists:flatten([{Line} | lists:reverse(ListOfLines)])};
traverse_list_of_tuples({Word, Line}, [ListOfTuples | ListOfListsOfTuples], ListOfLines) ->
traverse_list_of_tuples({Word, Line}, ListOfListsOfTuples, [traverse_and_transform({Word, Line}, ListOfTuples) | ListOfLines]).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment