Skip to content

Instantly share code, notes, and snippets.

@7-fl
Last active March 17, 2017 17:35
Show Gist options
  • Save 7-fl/148fea5d1a27990afa477d1a22b838d9 to your computer and use it in GitHub Desktop.
Save 7-fl/148fea5d1a27990afa477d1a22b838d9 to your computer and use it in GitHub Desktop.
-module(aw2).
-compile(export_all).
-include_lib("eunit/include/eunit.hrl").
% I put the code for converting the line numbers into runs in another module
% called runs, and I call the function runs:runs() from this module.
% Variable names:
% Is => Indexes, I => Index
% I => {"word", [1, 3, 7, 9]}
% Ws => Words
% W => Word
index_file(FileName) ->
Lines = index:get_file_contents(FileName), %From index.erl provided for us.
NormLines = norm_lines(Lines),
Indexes = index_lines(NormLines),
io:format("~p~n", [prettify(Indexes)]).
%------------
prettify_test() ->
[ {"bye", [{1,4}, {7,8}, {10,10}] },
{"hi", [{1,3}, {5,5}] }
] = prettify([ {"hi", [3,5,1,2]}, {"bye", [3,4,3,1,2, 8,7, 10]} ]),
all_tests_passed.
prettify(Is) ->
prettify(Is, []).
prettify([], Acc) ->
Acc; %No need to reverse because they aren't ordered anyway.
prettify([{Word, LineNums} | Is], Acc) ->
prettify(Is, [{Word, runs:runs(LineNums)} | Acc]).
%----------
index_lines_test() ->
[] = index_lines([]),
[{"hi", [1]}, {"bye",[1]} ] = index_lines(["hi bye"]),
[{"hi", [2, 1]}, {"bye",[2, 1]} ] = index_lines(["hi bye", "hi bye"]),
[{"abc", [2]}, {"hi", [1]}, {"bye",[1]}, {"def", [2]} ] =
index_lines(["hi bye", "abc def"]), %The order gets a little mixed
%up, but that's my algorithm!
all_tests_passed.
index_lines(Ls) ->
index_lines(Ls, [], 1).
index_lines([], Is, _) ->
Is;
index_lines([""|Ls], Is, LineNum) -> %Increment LineNum for blank lines.
index_lines(Ls, Is, LineNum+1);
index_lines([L|Ls], Is, LineNum) -> %Is => Indexes; I => {"word", [1,2]}
Words = split(L),
NewIs = index_words(Words, Is, LineNum),
index_lines(Ls, NewIs, LineNum+1).
%------------
index_words_test() -> %Ws => Word list => ["abc", "def"]
% Ws Is LineNum %Is => Indexes; I => {"word", [2, 1]}
% V v V %LineNum => 1
[] = index_words([], [], 1),
[{"abc", [1]}] = index_words(["abc"], [], 1),
[{"def", [2]}, {"abc", [2]} ] = index_words(["abc", "def"], [], 2),
[{"abc", [2]}, {"def", [1,2]} ] = index_words(["abc"], [{"def", [1,2]}], 2),
all_tests_passed.
index_words([], Is, _) -> %Is => Indexes, I => {"word", [4, 3, 1]}
Is;
index_words([W|Ws], Is, LineNum) -> %W => Word, Ws => Words, Is => Indexes, I => {"word", [3, 2]}
{I, OtherIs} = find_index(W, Is), %If W not found in Is, then I will be the default: {W, []}.
{_, LineNums} = I,
NewI = {W, [LineNum|LineNums] },
index_words(Ws, [NewI|OtherIs], LineNum). %Add found I back to Is.
%---------
find_index_test() ->
{ {"pear", []}, [] } = find_index("pear", [ {"pear", []} ] ),
{ {"pear", [1, 2, 3]}, []} = find_index("pear", [ {"pear", [1, 2, 3]} ] ),
{ {"pear", [4, 2]}, [{"beet", [4, 1]}, {"apple", [3, 1]}]} = find_index(
"pear",
[{"apple", [3, 1]}, {"pear", [4, 2]}, {"beet", [4, 1]}]),
all_tests_passed.
find_index(W, Is) -> %W => Word, Is => Indexes; I => {"word", [1, 2]}
find_index(W, Is, [], {W, []} ). %{W, []} => {"the", []} => Default index
find_index(_, [], OtherIs, FoundI) ->
{FoundI, OtherIs};
find_index(W, [{W, _}=I | Is ], OtherIs, _) -> %W matches an index in Is.
find_index(W, Is, OtherIs, I); %Drop index from OtherIs; store index in FoundI
find_index(W, [I|Is], OtherIs, FoundI) ->
find_index(W, Is, [I|OtherIs], FoundI). %Add index to NewIs.
%------------
split_test() ->
["ab"] = split("ab"),
["cd", "ab"] = split("ab cd"),
["aa", "aa"] = split("aa aa "), %To test bug fix 2.
["def", "abc"] = split("abc def"),
["def", "abc"] = split("abc def"),
["def", "abc"] = split("abc def"), %Two spces created by normalizing "ab -- cd".
all_tests_passed.
split(Line) ->
split(Line, [], []).
%-------bug fix 2---------
split([], [], Words) ->
Words;
%-------------------------
split([], Word, Words) ->
[reverse(Word) | Words]; %Whatever Word contains at the end is another Word.
%No!!! If the list is empty, I am adding a blank string
% %to the Words list. Woe is me! Added previous clause to prevent that.
% The next split() clause handles the quirk that happens when I normalize:
% consecrate -- we
% and get:
% consecrate we
%Note there are two spaces separating those words, which means that the next word after
%"consecrate" would be " we", which would be considered a different word than "we", thereby
%creating two indexes. The following clause fixes that: when a space is encountered and
%the list that accumulates a Word is empty, then skip the space.
%-----------bug fix 1---------------
split([X|Xs], [], Words) when X =:= 32 -> %Found space at start of word because Word
split(Xs, [], Words); %is empty; skip it.
%-----------------------------------
split([X|Xs], Word, Words) when X =:= 32 -> %Found a space signaling end of word, so
split(Xs, [], [reverse(Word) | Words]); %add Word to Words; start a new Word.
split([X|Xs], Word, Words) ->
split(Xs, [X|Word], Words).
%------------
norm_lines_test() ->
["ab"] = norm_lines(["Ab!"]),
["ab", "", "us that"] = norm_lines(
["Ab", "", "us -- that"]), %Bug here: normalize() => "us that" (with two spaces)
["hello world", "goodbye mars", ""] = norm_lines(
["Hello, world!", "Goodbye Mars.", ""]),
all_tests_passed.
norm_lines(Lines) ->
norm_lines(Lines, []).
norm_lines([], Acc) ->
reverse(Acc);
norm_lines([L|Ls], Acc) ->
norm_lines(Ls, [normalize(L) | Acc] ).
%-------------
reverse_test() ->
[] = reverse([]),
[2, 1] = reverse([1, 2]),
[1, 2, 3] = reverse([3, 2, 1]),
all_tests_passed.
reverse(L) -> reverse(L, []).
reverse([], Acc) ->
Acc;
reverse([X|Xs], Acc) ->
reverse(Xs, [X|Acc]).
%-----------------
normalize_test() ->
"" = normalize(""),
"a" = normalize("a"),
"ab" = normalize("ab"),
"ab" = normalize("aB"),
"ab" = normalize("AB"),
"ab" = normalize("1A+B&2"),
"ab" = normalize("{%A|B.@}"),
"ab" = normalize("?!*a';(<}B{>)-+"),
"a b" = normalize("A b!"),
all_tests_passed.
normalize(L) ->
normalize(L, []).
normalize([], Acc) ->
reverse(Acc);
normalize([X|Xs], Acc) when X >= $A, X =< $Z -> %Caps...
normalize(Xs, [X+32|Acc]); %convert to lower case
normalize([X|Xs], Acc) when X >= $a, X =< $z; X=:=32 -> %Lower case and spaces..
normalize(Xs, [X|Acc]); %no changes.
normalize([_|Xs], Acc) -> %Anything else...
normalize(Xs, Acc). %skip 'em.
@7-fl
Copy link
Author

7-fl commented Mar 6, 2017

Sorry, I left out two supporting files:

  1. index.erl (provided for us)
  2. w2.erl

That latter file contains mysort(), which I call to sort the line numbers. You can find both files in my other gists.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment