Skip to content

Instantly share code, notes, and snippets.

@ggb
Created March 1, 2017 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ggb/3999970908a4e78462f2482baf324b2b to your computer and use it in GitHub Desktop.
Save ggb/3999970908a4e78462f2482baf324b2b to your computer and use it in GitHub Desktop.
Functional Programming in Erlang - Second Assignment
-module(assignment2).
-export([index/1]).
% test the code by calling
% assignment2:index("gettysburg-address.txt").
% assignment2:index("dickens-christmas.txt").
clean_line([], C) ->
lists:reverse(C);
% remove all special chars and numbers, but keep spaces
clean_line([H|R], C) when H > 32, H < 65; H > 122; H > 90, H < 97 ->
clean_line(R, C);
clean_line([H|R], C) ->
clean_line(R, [H|C]).
clean_line(C) ->
clean_line(string:to_lower(C), []).
word_and_line([], _N) ->
[];
word_and_line([W|R], N) ->
[{W, N} | word_and_line(R, N)].
prepare_lines([], C, _N) ->
lists:reverse(C);
prepare_lines([H|R], C, N) ->
Clean = clean_line(H),
Token = string:tokens(Clean, " "),
Tup = word_and_line(Token, N),
prepare_lines(R, [Tup|C], N + 1).
prepare_lines(Content) ->
prepare_lines(Content, [], 1).
create_token_map([], M) ->
M;
create_token_map([{W, N}|R], M) ->
U = maps:update_with(W, fun(L) -> [N | L] end, [N], M),
create_token_map(R, U).
create_token_map(Token) ->
create_token_map(Token, #{}).
shrink_list([], [], R) ->
lists:reverse(R);
shrink_list([], [H|_T] = M, R) ->
lists:reverse([{lists:last(M), H}|R]);
shrink_list([H|T], [], R) ->
shrink_list(T, [H], R);
shrink_list([H|T], [H2|_T2] = M, R) ->
case (H2 + 1) == H of
true ->
shrink_list(T, [H|M], R);
false ->
shrink_list(T, [H], [{lists:last(M), H2}|R])
end.
unique([], R) ->
lists:reverse(R);
unique([H|T], R) ->
case lists:member(H, R) of
true ->
unique(T, R);
false ->
unique(T, [H|R])
end.
unique(L) ->
unique(L, []).
shrink_list(L) ->
shrink_list(unique(lists:reverse(L)), [], []).
shrink_map([], _Map, Result) ->
Result;
shrink_map([H|R], Map, Result) ->
V = shrink_list(maps:get(H, Map)),
shrink_map(R, Map, [{H, V}|Result]).
shrink_map(Map) ->
Keys = maps:keys(Map),
shrink_map(Keys, Map, []).
index(File) ->
Content = index:get_file_contents(File),
Token = lists:flatten(prepare_lines(Content)),
Map = create_token_map(Token),
Result = shrink_map(Map),
Result.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment