Skip to content

Instantly share code, notes, and snippets.

@Joakineee
Created May 25, 2020 08:53
Show Gist options
  • Save Joakineee/8912ab3b495c47021e6bf6f3286c302b to your computer and use it in GitHub Desktop.
Save Joakineee/8912ab3b495c47021e6bf6f3286c302b to your computer and use it in GitHub Desktop.
index file exercice
-module(index).
-export([get_file_contents/1,show_file_contents/1,index_words/1,test/0]).
% Used to read a file into a list of lines.
% Example files available in:
% gettysburg-address.txt (short)
% dickens-christmas.txt (long)
% Get the contents of a text file into a list of lines.
% Each line has its trailing newline removed.
get_file_contents(Name) ->
{ok,File} = file:open(Name,[read]),
Rev = get_all_lines(File,[]),
lists:reverse(Rev).
% Auxiliary function for get_file_contents.
% Not exported.
get_all_lines(File,Partial) ->
case io:get_line(File,"") of
eof -> file:close(File),
Partial;
Line -> {Strip,_} = lists:split(length(Line)-1,Line),
get_all_lines(File,[Strip|Partial])
end.
% Show the contents of a list of strings.
% Can be used to check the results of calling get_file_contents.
show_file_contents([L|Ls]) ->
io:format("~s~n",[L]),
show_file_contents(Ls);
show_file_contents([]) ->
ok.
%
%Recives an list of sentences and returns a list of lists of words.
%Example:
%List in:
%["Four score and seven years ago our fathers brought",
% "forth on this continent, a new nation, conceived in Liberty,"]
% List out:
%["Four","score","and","seven","years","ago","our","fathers","brought"],
%["forth","on","this","continent","a","new","nation","conceived","in","Liberty"]
-spec get_tokens([list()],[T]) -> [T].
get_tokens([],Acc) ->
lists:reverse(Acc);
get_tokens([H|T],Acc) ->
get_tokens(T,[string:tokens(H, ".,- ")|Acc]).
%
%Recives a list sentences and generates a list of tumples with and {index,sentence}
%Example:
%recives:
%["Four","score","and","seven","years","ago","our","fathers","brought"],
%["forth","on","this","continent","a","new","nation","conceived","in","Liberty"]
%returns:
%[{1,["Four","score","and","seven","years","ago","our","fathers","brought"]},
%{2,["forth","on","this","continent","a","new","nation","conceived","in","Liberty"]}]
-spec add_index([T],[{integer(),[T]}],integer()) ->
[{integer(),[T]}].
add_index([],Acc,_) ->
lists:reverse(Acc);
add_index([H|T],Acc,Index) ->
add_index(T,[{Index,H}|Acc],Index+1).
%
%Main function, we provide the file name as parameter.
%we build a list N with al list with all words
%["Four","score","and","seven","years","ago","our","fathers","brought","forth","on","this","continent",...]
%and we search word by word in the list M with is the indexed word list:
%[{1,["Four","score","and","seven","years","ago","our","fathers","brought"]},
%{2,["forth","on","this","continent","a","new","nation","conceived","in","Liberty"]}]
-spec index_words(string()) -> list().
index_words(File) ->
F = get_file_contents(File),
L = get_tokens(F,[]),
M = add_index(L,[],1),
N = lists:umerge(get_tokens(F,[])),
search_words(N,M,[]).
%sear word by word of the list N.
-spec search_words(list(),list(),[T]) -> [T].
search_words([],_,Acc) -> Acc;
search_words([H|T],M,Acc) -> search_words(T,M,[{H,text_coincidences(H,M,[])}|Acc]).
%generates the list of lines where a word is.
%for example
%{foo, [1,2,3,6,7]}
%an then calls tuple_list/4 wich transforms this in to:
%{foo, [{1,3},{6,7}]}
%
-spec text_coincidences(list(),list(),[T]) -> [T].
text_coincidences(_,[],[V|_] = Acc) -> tuple_list(lists:reverse(Acc),V,V,[]);
text_coincidences(H,[{X,L}|T],Acc) ->
case lists:member(H,L) of
true -> text_coincidences(H,T,[X|Acc]);
false -> text_coincidences(H,T,Acc)
end.
%
%
%function that from a list of lines, for exmaple: [1,2,3,6,7]
%returns the list of tuples,for example: [{1,3},{6,7}]
-spec tuple_list(list(),integer(),integer(),[T]) -> [T].
tuple_list([],_,_,Acc) -> lists:keysort(1,Acc);
tuple_list([H|T],X,Y,Acc) when H == Y + 1 ->
tuple_list(T,X,H,Acc);
tuple_list([H|T],X,Y,Acc) ->
tuple_list(T,H,H,[{X,Y}|Acc]).
%test function, be sure that the "getisburg.txt" file is the folder.
test() ->
[{"whether",[{5,5}]},
{"that",[{3,3},{6,11},{27,27}]},
{"testing",[{5,5}]},
{"war",[{5,5},{7,7}]},
{"lives",[{9,9}]},
{"their",[{9,9}]},
{"gave",[{9,9},{23,23}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"who",[{9,9},{15,15},{20,20}]},
{"final",[{8,8}]},
{"civil",[{5,5}]},
{"great",[{5,5},{7,7},{21,21}]},
{"brought",[{1,1}]},
{"fathers",[{1,1}]},
{"our",[{1,1},{16,16}]},
{"ago",[{1,1}]},
{"years",[{1,1}]},
{"work",[{19,19}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"say",[{17,17}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"what",[{18,18}]},
{"remember",[{17,17}]},
{"long",[{7,7},{17,17}]},
{"nor",[{17,17}]},
{"note",[{17,17}]},
{"little",[{17,17}]},
{"will",[{17,17}]},
{"world",[{17,17}]},
{"have",[{8,8},{15,15},{20,20},{26,26}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"struggled",[{15,15}]},
{"who",[{9,9},{15,15},{20,20}]},
{"ground",[{14,14}]},
{"this",[{2,2},{11,11},{14,14},{26,26}]},
{"hallow",[{14,14}]},
{"not",[{13,14},{25,25},{28,28}]},
{"detract",[{16,16}]},
{"or",[{6,6},{16,16}]},
{"can",[{7,7},{13,14},{18,18}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"consecrate",[{14,14}]},
{"not",[{13,14},{25,25},{28,28}]},
{"can",[{7,7},{13,14},{18,18}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"dedicate",[{8,8},{13,13}]},
{"not",[{13,14},{25,25},{28,28}]},
{"can",[{7,7},{13,14},{18,18}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"this",[{2,2},{11,11},{14,14},{26,26}]},
{"do",[{11,11}]},
{"should",[{11,11}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"that",[{3,3},{6,11},{27,27}]},
{"sense",[{13,13}]},
{"larger",[{13,13}]},
{"advanced",[{20,20}]},
{"nobly",[{20,20}]},
{"so",[{6,6},{20,20}]},
{"far",[{16,16},{20,20}]},
{"thus",[{20,20}]},
{"have",[{8,8},{15,15},{20,20},{26,26}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"fought",[{20,20}]},
{"who",[{9,9},{15,15},{20,20}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"highly",[{24,24}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"vain",[{25,25}]},
{"gave",[{9,9},{23,23}]},
{"they",[{18,18},{20,20},{23,23}]},
{"which",[{20,20},{23,23}]},
{"they",[{18,18},{20,20},{23,23}]},
{"which",[{20,20},{23,23}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"us",[{18,18},{22,22}]},
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
{"is",[{10,10},{18,18},{21,21}]},
{"add",[{16,16}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"power",[{16,16}]},
{"poor",[{16,16}]},
{"our",[{1,1},{16,16}]},
{"dead",[{15,15},{22,22},{25,25}]},
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
{"living",[{15,15},{19,19}]},
{"men",[{3,3},{15,15}]},
{"above",[{16,16}]},
{"far",[{16,16},{20,20}]},
{"it",[{16,16},{18,18}]},
{"brave",[{15,15}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"in",[{2,2},{5,5},{13,13},{25,25}]},
{"\\consecrated",[{16,16}]},
{"The",[{15,15},{17,17}]},
{"It",[{10,10},{18,18},{21,21}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"did",[{18,18}]},
{"they",[{18,18},{20,20},{23,23}]},
{"what",[{18,18}]},
{"take",[{22,22}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"equal",[{3,3}]},
{"created",[{3,3}]},
{"are",[{3,3},{5,5},{7,7}]},
{"men",[{3,3},{15,15}]},
{"all",[{3,3}]},
{"that",[{3,3},{6,11},{27,27}]},
{"proposition",[{3,3}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"as",[{8,8}]},
{"field",[{8,8}]},
{"that",[{3,3},{6,11},{27,27}]},
{"of",[{7,8},{24,24},{27,27}]},
{"portion",[{8,8}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"dedicate",[{8,8},{13,13}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"those",[{9,9}]},
{"proper",[{10,10}]},
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
{"fitting",[{10,10}]},
{"altogether",[{10,10}]},
{"is",[{10,10},{18,18},{21,21}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"in",[{2,2},{5,5},{13,13},{25,25}]},
{"engaged",[{5,5}]},
{"are",[{3,3},{5,5},{7,7}]},
{"we",[{5,5},{11,11},{13,14},{17,17},{22,22},{24,24}]},
{"war",[{5,5},{7,7}]},
{"earth",[{28,28}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"from",[{22,22},{28,28}]},
{"perish",[{28,28}]},
{"not",[{13,14},{25,25},{28,28}]},
{"shall",[{25,26},{28,28}]},
{"people",[{28,28}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
{"people",[{28,28}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"people",[{28,28}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"of",[{7,8},{24,24},{27,27}]},
{"government",[{27,27}]},
{"that",[{3,3},{6,11},{27,27}]},
{"dead",[{15,15},{22,22},{25,25}]},
{"honored",[{22,22}]},
{"these",[{22,22},{25,25}]},
{"from",[{22,22},{28,28}]},
{"that",[{3,3},{6,11},{27,27}]},
{"us",[{18,18},{22,22}]},
{"task",[{21,21}]},
{"great",[{5,5},{7,7},{21,21}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"birth",[{26,26}]},
{"new",[{2,2},{26,26}]},
{"be",[{19,19},{21,21}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"us",[{18,18},{22,22}]},
{"unfinished",[{19,19}]},
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
{"cause",[{23,23}]},
{"that",[{3,3},{6,11},{27,27}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"before",[{22,22}]},
{"remaining",[{22,22}]},
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
{"rather",[{19,19},{21,21}]},
{"is",[{10,10},{18,18},{21,21}]},
{"devotion",[{24,24}]},
{"increased",[{23,23}]},
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
{"freedom",[{27,27}]},
{"of",[{7,8},{24,24},{27,27}]},
{"by",[{28,28}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"have",[{8,8},{15,15},{20,20},{26,26}]},
{"shall",[{25,26},{28,28}]},
{"It",[{10,10},{18,18},{21,21}]},
{"God",[{26,26}]},
{"under",[{26,26}]},
{"the",[{3,3},{18,19},{21,21},{23,23},{28,28}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"here",[{9,9},{15,15},{17,21},{24,24}]},
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]},
{"be",[{19,19},{21,21}]},
{"to",[{3,3},{8,8},{16,16},{19,19},{21,21},{23,23}]},
{"nation",[{2,2},{6,6},{10,10},{26,26}]},
{"this",[{2,2},{11,11},{14,14},{26,26}]},
{"that",[{3,3},{6,11},{27,27}]},
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]},
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
{"conceived",[{2,2},{6,6}]},
{"so",[{6,6},{20,20}]},
{"nation",[{2,2},{6,6},{10,10},{26,26}]},
{"any",[{6,6}]},
{"or",[{6,6},{16,16}]},
{"nation",[{2,2},{6,6},{10,10},{26,26}]},
{"that",[{3,3},{6,11},{27,27}]},
{"of",[{7,8},{24,24},{27,27}]},
{"field",[{8,8}]},
{"battle",[{7,7}]},
{"great",[{5,5},{7,7},{21,21}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"on",[{2,2},{7,7}]},
{"met",[{7,7}]},
{"are",[{3,3},{5,5},{7,7}]},
{"We",[{8,8}]},
{"endure",[{7,7}]},
{"long",[{7,7},{17,17}]},
{"can",[{7,7},{13,14},{18,18}]},
{"Now",[{5,5}]},
{"Liberty",[{2,2}]},
{"in",[{2,2},{5,5},{13,13},{25,25}]},
{"conceived",[{2,2},{6,6}]},
{"nation",[{2,2},{6,6},{10,10},{26,26}]},
{"new",[{2,2},{26,26}]},
{"a",[{2,2},{5,5},{7,8},{13,13},{26,26}]},
{"continent",[{2,2}]},
{"this",[{2,2},{11,11},{14,14},{26,26}]},
{"in",[{2,2},{5,5},{13,13},{25,25}]},
{"died",[{25,25}]},
{"have",[{8,8},{15,15},{20,20},{26,26}]},
{"not",[{13,14},{25,25},{28,28}]},
{"shall",[{25,26},{28,28}]},
{"dead",[{15,15},{22,22},{25,25}]},
{"these",[{22,22},{25,25}]},
{"that",[{3,3},{6,11},{27,27}]},
{"seven",[{1,1}]},
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
{"score",[{1,1}]},
{"on",[{2,2},{7,7}]},
{"forth",[{2,2}]},
{"dedicated",[{3,3},{6,6},{19,19},{21,21}]},
{"and",[{1,1},{3,3},{6,6},{10,10},{15,15},{27,27}]},
{"It",[{10,10},{18,18},{21,21}]},
{"live",[{10,10}]},
{"might",[{10,10}]},
{"nation",[{2,2},{6,6},{10,10},{26,26}]},
{"that",[{3,3},{6,11},{27,27}]},
{"for",[{9,9},{18,18},{21,21},{23,23},{28,28}]},
{"place",[{9,9}]},
{"resting",[{9,9}]},
{"resolve",[{25,25}]},
{"rather",[{19,19},{21,21}]},
{"devotion",[{24,24}]},
{"of",[{7,8},{24,24},{27,27}]},
{"forget",[{18,18}]},
{"never",[{18,18}]},
{"measure",[{24,24}]},
{"living",[{15,15},{19,19}]},
{"full",[{24,24}]},
{"last",[{24,24}]},
{"can",[{7,7},{13,14},{18,18}]},
{"it",[{16,16},{18,18}]},
{"come",[{8,8}]},
{"have",[{8,8},{15,15},{20,20},{26,26}]},
{"but",[{18,18}]},
{"We",[{8,8}]},
{"Four",[{1,1}]},
{"But",[{13,13}]}] = index:index_words("getisburg.txt"),
ok.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment