Created
March 6, 2017 03:13
-
-
Save michaeldperez/38ab2be361bbecb0605b52aea04d2159 to your computer and use it in GitHub Desktop.
Accepts a file path and creates an index.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% Does not properly formate index | |
% Does not remove duplicates | |
-module(index). | |
-export([get_file_contents/1, | |
show_file_contents/1, | |
map_to_words/1, | |
index/1]). | |
% Used to read a file into a list of lines. | |
% Example files available in: | |
% gettysburg-address.txt (short) | |
% dickens-christmas.txt (long) | |
% Get the contents of a text file into a list of lines. | |
% Each line has its trailing newline removed. | |
get_file_contents(Name) -> | |
{ok,File} = file:open(Name,[read]), | |
Rev = get_all_lines(File,[]), | |
lists:reverse(Rev). | |
% Auxiliary function for get_file_contents. | |
% Not exported. | |
get_all_lines(File,Partial) -> | |
case io:get_line(File,"") of | |
eof -> file:close(File), | |
Partial; | |
Line -> {Strip,_} = lists:split(length(Line)-1,Line), | |
get_all_lines(File,[Strip|Partial]) | |
end. | |
% Show the contents of a list of strings. | |
% Can be used to check the results of calling get_file_contents. | |
show_file_contents([L|Ls]) -> | |
io:format("~s~n",[L]), | |
show_file_contents(Ls); | |
show_file_contents([]) -> | |
ok. | |
% Get the contents of a line as a list of words. | |
get_all_words([], _LineNumber) -> | |
[]; | |
get_all_words(Line, LineNumber) -> | |
get_all_words(Line, [], [], LineNumber). | |
get_all_words([], [], Words, _LineNumber) -> | |
lists:reverse(remove_empty(Words)); | |
get_all_words([], Word, Words, LineNumber) -> | |
get_all_words([], [], [ { lists:reverse(Word), LineNumber } | Words], LineNumber); | |
get_all_words([X | Xs], Word, Words, LineNumber) -> | |
case lists:member(X, ".,\ ;:\t\n\"") of | |
true -> | |
get_all_words(Xs, [], [{ lists:reverse(Word), LineNumber } | Words], LineNumber); | |
false -> | |
get_all_words(Xs, [X | Word], Words, LineNumber) | |
end. | |
% Removes empty words from list | |
remove_empty([]) -> | |
[]; | |
remove_empty(ListOfWords) -> | |
remove_empty(ListOfWords, []). | |
remove_empty([], NewWordList) -> | |
NewWordList; | |
remove_empty([{Word, LineNumber} | Words], NewWordList) -> | |
case Word of | |
[] -> | |
remove_empty(Words, NewWordList); | |
_ -> | |
remove_empty(Words, [{Word, LineNumber} | NewWordList]) | |
end. | |
% Maps each line into a list of words | |
map_to_words([]) -> | |
[]; | |
map_to_words(List) -> | |
map_to_words(List, [], 1). | |
map_to_words([], ListOfWords, _StartLine) -> | |
lists:reverse(ListOfWords); | |
map_to_words([X | Xs], ListOfWords, StartLine) -> | |
map_to_words(Xs, [get_all_words(X, StartLine) | ListOfWords], StartLine + 1). | |
% Index each word | |
% Data structure: [[{word, line-number}, ..., {word, line-number},...], ..., [...]] | |
index(Name) -> | |
index(map_to_words(get_file_contents(Name)), []). | |
index([], Index) -> | |
lists:reverse(Index); | |
index([[] | ListOfListsOfWordLineNumberTuples], Index) -> | |
index(ListOfListsOfWordLineNumberTuples, Index); | |
index([[{Word, Line}|ListOfWordLineNumberTuple] | ListOfListsOfWordLineNumberTuples], Index) -> | |
index([ListOfWordLineNumberTuple | ListOfListsOfWordLineNumberTuples], [traverse_list_of_tuples({Word, Line}, ListOfListsOfWordLineNumberTuples) | Index]). | |
% Traverse and modify one tuple on one list of tuples | |
traverse_and_transform({Word, Line}, ListOfTuples) -> | |
traverse_and_transform({Word, Line}, ListOfTuples, []). % [{Line}] | |
traverse_and_transform({_Word, _Line}, [], WordIndex) -> | |
lists:reverse(WordIndex); | |
traverse_and_transform({Word, _Line}, [{Word, OtherLine} | RestOfTuples], WordIndex) -> | |
traverse_and_transform({Word, _Line}, RestOfTuples, [{OtherLine} | WordIndex]); | |
traverse_and_transform({Word, _Line}, [{_OtherWord, _OtherLIne} | RestOfTuples], WordIndex) -> | |
traverse_and_transform({Word, _Line}, RestOfTuples, WordIndex). | |
% Recursively applies traverse_and_transform function to all lists of tuples in a greater list | |
traverse_list_of_tuples({Word, Line}, ListOfListsOfTuples) -> | |
traverse_list_of_tuples({Word, Line}, ListOfListsOfTuples, []). | |
traverse_list_of_tuples({Word, Line}, [], ListOfLines) -> | |
{Word, lists:flatten([{Line} | lists:reverse(ListOfLines)])}; | |
traverse_list_of_tuples({Word, Line}, [ListOfTuples | ListOfListsOfTuples], ListOfLines) -> | |
traverse_list_of_tuples({Word, Line}, ListOfListsOfTuples, [traverse_and_transform({Word, Line}, ListOfTuples) | ListOfLines]). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment