Last active
July 10, 2017 06:18
-
-
Save kylethebaker/85b2f10f210b3760ab9e5be7d249f8dd to your computer and use it in GitHub Desktop.
Functional Erlang Week 2 Assignment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(assignment2). | |
-export([print_indexes/1]). | |
-export([get_indexes/1, get_top_n_words/2, get_alpha_indexes/1]). | |
-define(SPLIT_TOKENS, " .,'\"()[]{}!?-\\"). | |
% Gets all of the indexes. | |
get_indexes(Filename) -> | |
maps:to_list(get_indexes_ranges(Filename)). | |
% Gets the top N words from the file based on the number of lines they appear | |
% on. The result is in the format {Word, Lines}. | |
get_top_n_words(Filename, N) -> | |
Indexes = get_indexes_list(Filename), | |
Sorted = sort_indexes_count(Indexes), | |
{Top10, _} = lists:split(N, Sorted), | |
lists:map(fun({Word, L}) -> {Word, length(L)} end, Top10). | |
% Gets a list of indexes sorted alphabetically. | |
get_alpha_indexes(Filename) -> | |
Indexes = get_indexes_list(Filename), | |
Sorted = sort_indexes_alpha(Indexes), | |
Sorted. | |
% Gets all word indexes from a file, with the line numbers included | |
% as range tuples: e.g [{1, 3}, {6, 6}, {8, 9}] | |
get_indexes_ranges(Filename) -> | |
Indexes = get_indexes_list(Filename), | |
convert_to_ranges(Indexes). | |
% Gets all word indexes from a file, with the line numbers included | |
% sequentially in a list: e.g [1, 2, 3, 6, 8, 9] | |
get_indexes_list(Filename) -> | |
Lines = get_file_contents(Filename), | |
{_, Result} = lists:foldl(fun get_line_indexes/2, {1, maps:new()}, Lines), | |
Result. | |
% Sorts Indexes by alphabetical order | |
sort_indexes_alpha(Indexes) -> | |
IndexesList = maps:to_list(Indexes), | |
lists:keysort(1, IndexesList). | |
% Sorts Indexes by number of lines a word occurs on (must use list indexes | |
% instead of range tuple indexes) | |
sort_indexes_count(Indexes) -> | |
List = maps:to_list(Indexes), | |
Counts = lists:map(fun({W, I}) -> {W, I, length(I)} end, List), | |
Sorted = lists:keysort(3, Counts), | |
NoCounts = lists:map(fun({W, I, _}) -> {W, I} end, Sorted), | |
lists:reverse(NoCounts). | |
% Records all indexes for words on Line number N. Indexes are stored in a Map | |
% which has the Word as the key and an ordered set of line numbers it appears | |
% on as the value. | |
get_line_indexes(Line, {N, Map}) -> | |
Words = get_words(Line), | |
Map2 = update_word_indexes(Words, N, Map), | |
{N + 1, Map2}. | |
% Gets a list of words from the line, with non-alphanumeric characters | |
% removed and words normalized to lowercase | |
get_words(Line) -> | |
Words = string:tokens(Line, ?SPLIT_TOKENS), | |
lists:map(fun string:lowercase/1, Words). | |
% Record into Map that each word from Words exists on line N. | |
% Uses an ordered set so that line numbers will be sorted without duplicates | |
update_word_indexes([], _, Map) -> Map; | |
update_word_indexes([Word | Words], N, Map) -> | |
Indexes = maps:get(Word, Map, ordsets:new()), | |
Indexes2 = ordsets:add_element(N, Indexes), | |
Map2 = maps:put(Word, Indexes2, Map), | |
update_word_indexes(Words, N, Map2). | |
% Converts the index for each word in the Map from sequences to ranges. | |
convert_to_ranges(Indexes) -> | |
Fn = fun (_, V) -> list_to_ranges(V) end, | |
maps:map(Fn, Indexes). | |
% Converts a sorted list of numbers to a list of tuples containing ranges. | |
% Example: [1, 2, 3, 6, 9, 10, 12] becomes [{1, 3}, {6, 6}, {9, 10}, {12, 12}] | |
list_to_ranges(List) -> list_to_ranges(List, []). | |
list_to_ranges([], Ranges) -> lists:reverse(Ranges); | |
list_to_ranges(L, Ranges) -> | |
{Range, L2} = next_range_sequence(L), | |
list_to_ranges(L2, [Range | Ranges]). | |
% Finds the next sequential range range in the list and returns the remaining | |
% list and the sequence tuple. Lists must be sorted and not contain duplicates | |
% Example: [1, 2, 3, 5, 6, 8] returns {{1, 3}, [5, 6, 8]} | |
next_range_sequence([X | Xs]) -> | |
next_range_sequence(Xs, {X, X}). | |
next_range_sequence([X | Xs], {Lo, Hi}) when X == Hi + 1 -> | |
next_range_sequence(Xs, {Lo, X}); | |
next_range_sequence(Xs, Range) -> {Range, Xs}. | |
% Pretty print all of the word indexes. Allow for both Maps and lists | |
% to be printed. | |
print_indexes(Indexes) when not is_list(Indexes) -> | |
print_indexes(maps:to_list(Indexes)); | |
print_indexes(Indexes) -> | |
lists:foreach(fun print_word_indexes/1, Indexes), | |
ok. | |
% Pretty print a single word and indexes | |
print_word_indexes({Word, Indexes}) -> | |
io:format("~s -> ", [Word]), | |
io:format("~w~n", [ordsets:to_list(Indexes)]), | |
ok. | |
% Gets list of lines from a file | |
get_file_contents(Name) -> | |
{ok, File} = file:open(Name, [read]), | |
lists:reverse(get_all_lines(File)). | |
% Get all lines from a file | |
get_all_lines(File) -> get_all_lines(File, []). | |
get_all_lines(File, Lines) -> | |
case io:get_line(File, "") of | |
eof -> file:close(File), | |
Lines; | |
Line -> get_all_lines(File, [lists:droplast(Line) | Lines]) | |
end. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment