Last active
March 17, 2017 17:35
-
-
Save 7-fl/148fea5d1a27990afa477d1a22b838d9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(aw2). | |
-compile(export_all). | |
-include_lib("eunit/include/eunit.hrl"). | |
% I put the code for converting the line numbers into runs in another module | |
% called runs, and I call the function runs:runs() from this module. | |
% Variable names: | |
% Is => Indexes, I => Index | |
% I => {"word", [1, 3, 7, 9]} | |
% Ws => Words | |
% W => Word | |
index_file(FileName) -> | |
Lines = index:get_file_contents(FileName), %From index.erl provided for us. | |
NormLines = norm_lines(Lines), | |
Indexes = index_lines(NormLines), | |
io:format("~p~n", [prettify(Indexes)]). | |
%------------ | |
prettify_test() -> | |
[ {"bye", [{1,4}, {7,8}, {10,10}] }, | |
{"hi", [{1,3}, {5,5}] } | |
] = prettify([ {"hi", [3,5,1,2]}, {"bye", [3,4,3,1,2, 8,7, 10]} ]), | |
all_tests_passed. | |
prettify(Is) -> | |
prettify(Is, []). | |
prettify([], Acc) -> | |
Acc; %No need to reverse because they aren't ordered anyway. | |
prettify([{Word, LineNums} | Is], Acc) -> | |
prettify(Is, [{Word, runs:runs(LineNums)} | Acc]). | |
%---------- | |
index_lines_test() -> | |
[] = index_lines([]), | |
[{"hi", [1]}, {"bye",[1]} ] = index_lines(["hi bye"]), | |
[{"hi", [2, 1]}, {"bye",[2, 1]} ] = index_lines(["hi bye", "hi bye"]), | |
[{"abc", [2]}, {"hi", [1]}, {"bye",[1]}, {"def", [2]} ] = | |
index_lines(["hi bye", "abc def"]), %The order gets a little mixed | |
%up, but that's my algorithm! | |
all_tests_passed. | |
index_lines(Ls) -> | |
index_lines(Ls, [], 1). | |
index_lines([], Is, _) -> | |
Is; | |
index_lines([""|Ls], Is, LineNum) -> %Increment LineNum for blank lines. | |
index_lines(Ls, Is, LineNum+1); | |
index_lines([L|Ls], Is, LineNum) -> %Is => Indexes; I => {"word", [1,2]} | |
Words = split(L), | |
NewIs = index_words(Words, Is, LineNum), | |
index_lines(Ls, NewIs, LineNum+1). | |
%------------ | |
index_words_test() -> %Ws => Word list => ["abc", "def"] | |
% Ws Is LineNum %Is => Indexes; I => {"word", [2, 1]} | |
% V v V %LineNum => 1 | |
[] = index_words([], [], 1), | |
[{"abc", [1]}] = index_words(["abc"], [], 1), | |
[{"def", [2]}, {"abc", [2]} ] = index_words(["abc", "def"], [], 2), | |
[{"abc", [2]}, {"def", [1,2]} ] = index_words(["abc"], [{"def", [1,2]}], 2), | |
all_tests_passed. | |
index_words([], Is, _) -> %Is => Indexes, I => {"word", [4, 3, 1]} | |
Is; | |
index_words([W|Ws], Is, LineNum) -> %W => Word, Ws => Words, Is => Indexes, I => {"word", [3, 2]} | |
{I, OtherIs} = find_index(W, Is), %If W not found in Is, then I will be the default: {W, []}. | |
{_, LineNums} = I, | |
NewI = {W, [LineNum|LineNums] }, | |
index_words(Ws, [NewI|OtherIs], LineNum). %Add found I back to Is. | |
%--------- | |
find_index_test() -> | |
{ {"pear", []}, [] } = find_index("pear", [ {"pear", []} ] ), | |
{ {"pear", [1, 2, 3]}, []} = find_index("pear", [ {"pear", [1, 2, 3]} ] ), | |
{ {"pear", [4, 2]}, [{"beet", [4, 1]}, {"apple", [3, 1]}]} = find_index( | |
"pear", | |
[{"apple", [3, 1]}, {"pear", [4, 2]}, {"beet", [4, 1]}]), | |
all_tests_passed. | |
find_index(W, Is) -> %W => Word, Is => Indexes; I => {"word", [1, 2]} | |
find_index(W, Is, [], {W, []} ). %{W, []} => {"the", []} => Default index | |
find_index(_, [], OtherIs, FoundI) -> | |
{FoundI, OtherIs}; | |
find_index(W, [{W, _}=I | Is ], OtherIs, _) -> %W matches an index in Is. | |
find_index(W, Is, OtherIs, I); %Drop index from OtherIs; store index in FoundI | |
find_index(W, [I|Is], OtherIs, FoundI) -> | |
find_index(W, Is, [I|OtherIs], FoundI). %Add index to NewIs. | |
%------------ | |
split_test() -> | |
["ab"] = split("ab"), | |
["cd", "ab"] = split("ab cd"), | |
["aa", "aa"] = split("aa aa "), %To test bug fix 2. | |
["def", "abc"] = split("abc def"), | |
["def", "abc"] = split("abc def"), | |
["def", "abc"] = split("abc def"), %Two spces created by normalizing "ab -- cd". | |
all_tests_passed. | |
split(Line) -> | |
split(Line, [], []). | |
%-------bug fix 2--------- | |
split([], [], Words) -> | |
Words; | |
%------------------------- | |
split([], Word, Words) -> | |
[reverse(Word) | Words]; %Whatever Word contains at the end is another Word. | |
%No!!! If the list is empty, I am adding a blank string | |
% %to the Words list. Woe is me! Added previous clause to prevent that. | |
% The next split() clause handles the quirk that happens when I normalize: | |
% consecrate -- we | |
% and get: | |
% consecrate we | |
%Note there are two spaces separating those words, which means that the next word after | |
%"consecrate" would be " we", which would be considered a different word than "we", thereby | |
%creating two indexes. The following clause fixes that: when a space is encountered and | |
%the list that accumulates a Word is empty, then skip the space. | |
%-----------bug fix 1--------------- | |
split([X|Xs], [], Words) when X =:= 32 -> %Found space at start of word because Word | |
split(Xs, [], Words); %is empty; skip it. | |
%----------------------------------- | |
split([X|Xs], Word, Words) when X =:= 32 -> %Found a space signaling end of word, so | |
split(Xs, [], [reverse(Word) | Words]); %add Word to Words; start a new Word. | |
split([X|Xs], Word, Words) -> | |
split(Xs, [X|Word], Words). | |
%------------ | |
norm_lines_test() -> | |
["ab"] = norm_lines(["Ab!"]), | |
["ab", "", "us that"] = norm_lines( | |
["Ab", "", "us -- that"]), %Bug here: normalize() => "us that" (with two spaces) | |
["hello world", "goodbye mars", ""] = norm_lines( | |
["Hello, world!", "Goodbye Mars.", ""]), | |
all_tests_passed. | |
norm_lines(Lines) -> | |
norm_lines(Lines, []). | |
norm_lines([], Acc) -> | |
reverse(Acc); | |
norm_lines([L|Ls], Acc) -> | |
norm_lines(Ls, [normalize(L) | Acc] ). | |
%------------- | |
reverse_test() -> | |
[] = reverse([]), | |
[2, 1] = reverse([1, 2]), | |
[1, 2, 3] = reverse([3, 2, 1]), | |
all_tests_passed. | |
reverse(L) -> reverse(L, []). | |
reverse([], Acc) -> | |
Acc; | |
reverse([X|Xs], Acc) -> | |
reverse(Xs, [X|Acc]). | |
%----------------- | |
normalize_test() -> | |
"" = normalize(""), | |
"a" = normalize("a"), | |
"ab" = normalize("ab"), | |
"ab" = normalize("aB"), | |
"ab" = normalize("AB"), | |
"ab" = normalize("1A+B&2"), | |
"ab" = normalize("{%A|B.@}"), | |
"ab" = normalize("?!*a';(<}B{>)-+"), | |
"a b" = normalize("A b!"), | |
all_tests_passed. | |
normalize(L) -> | |
normalize(L, []). | |
normalize([], Acc) -> | |
reverse(Acc); | |
normalize([X|Xs], Acc) when X >= $A, X =< $Z -> %Caps... | |
normalize(Xs, [X+32|Acc]); %convert to lower case | |
normalize([X|Xs], Acc) when X >= $a, X =< $z; X=:=32 -> %Lower case and spaces.. | |
normalize(Xs, [X|Acc]); %no changes. | |
normalize([_|Xs], Acc) -> %Anything else... | |
normalize(Xs, Acc). %skip 'em. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry, I left out two supporting files:
That latter file contains mysort(), which I call to sort the line numbers. You can find both files in my other gists.