Skip to content

Instantly share code, notes, and snippets.

Created May 25, 2020 08:53
Show Gist options
  • Save Joakineee/8912ab3b495c47021e6bf6f3286c302b to your computer and use it in GitHub Desktop.
Save Joakineee/8912ab3b495c47021e6bf6f3286c302b to your computer and use it in GitHub Desktop.
index file exercice
% Used to read a file into a list of lines.
% Example files available in:
% gettysburg-address.txt (short)
% dickens-christmas.txt (long)
% Get the contents of a text file into a list of lines.
% Each line has its trailing newline removed.
get_file_contents(Name) ->
{ok,File} = file:open(Name,[read]),
Rev = get_all_lines(File,[]),
% Auxiliary function for get_file_contents.
% Not exported.
get_all_lines(File,Partial) ->
case io:get_line(File,"") of
eof -> file:close(File),
Line -> {Strip,_} = lists:split(length(Line)-1,Line),
% Show the contents of a list of strings.
% Can be used to check the results of calling get_file_contents.
show_file_contents([L|Ls]) ->
show_file_contents([]) ->
%Recives an list of sentences and returns a list of lists of words.
%List in:
%["Four score and seven years ago our fathers brought",
% "forth on this continent, a new nation, conceived in Liberty,"]
% List out:
-spec get_tokens([list()],[T]) -> [T].
get_tokens([],Acc) ->
get_tokens([H|T],Acc) ->
get_tokens(T,[string:tokens(H, ".,- ")|Acc]).
%Recives a list sentences and generates a list of tumples with and {index,sentence}
-spec add_index([T],[{integer(),[T]}],integer()) ->
add_index([],Acc,_) ->
add_index([H|T],Acc,Index) ->
%Main function, we provide the file name as parameter.
%we build a list N with al list with all words
%and we search word by word in the list M with is the indexed word list:
-spec index_words(string()) -> list().
index_words(File) ->
F = get_file_contents(File),
L = get_tokens(F,[]),
M = add_index(L,[],1),
N = lists:umerge(get_tokens(F,[])),
%sear word by word of the list N.
-spec search_words(list(),list(),[T]) -> [T].
search_words([],_,Acc) -> Acc;
search_words([H|T],M,Acc) -> search_words(T,M,[{H,text_coincidences(H,M,[])}|Acc]).
%generates the list of lines where a word is.
%for example
%{foo, [1,2,3,6,7]}
%an then calls tuple_list/4 wich transforms this in to:
%{foo, [{1,3},{6,7}]}
-spec text_coincidences(list(),list(),[T]) -> [T].
text_coincidences(_,[],[V|_] = Acc) -> tuple_list(lists:reverse(Acc),V,V,[]);
text_coincidences(H,[{X,L}|T],Acc) ->
case lists:member(H,L) of
true -> text_coincidences(H,T,[X|Acc]);
false -> text_coincidences(H,T,Acc)
%function that from a list of lines, for exmaple: [1,2,3,6,7]
%returns the list of tuples,for example: [{1,3},{6,7}]
-spec tuple_list(list(),integer(),integer(),[T]) -> [T].
tuple_list([],_,_,Acc) -> lists:keysort(1,Acc);
tuple_list([H|T],X,Y,Acc) when H == Y + 1 ->
tuple_list([H|T],X,Y,Acc) ->
%test function, be sure that the "getisburg.txt" file is the folder.
test() ->
{"But",[{13,13}]}] = index:index_words("getisburg.txt"),
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment