Skip to content

Instantly share code, notes, and snippets.

@TattdCodeMonkey
Created October 19, 2016 15:00
Show Gist options
  • Save TattdCodeMonkey/7e834c269f0dcaa0dbb9b79e8f5538b3 to your computer and use it in GitHub Desktop.
Save TattdCodeMonkey/7e834c269f0dcaa0dbb9b79e8f5538b3 to your computer and use it in GitHub Desktop.
NashElixir - 10/18/2016 - Hacking with Flow v0.7
dir_of_wiki = "Archive"
file_name = "fixtures/war_and_peace.txt"
alias Experimental.Flow
defmodule OurTest do
def count_words_single(file_name) do
File.stream!(file_name)
|> Enum.flat_map(&String.split(&1,~r(\b), trim: true))
|> Enum.map(&String.downcase(&1))
|> Enum.reduce(%{}, fn word, acc ->
Map.update(acc, word, 1, & &1 + 1)
end)
|> Enum.to_list()
|> Enum.sort(fn({_, ac}, {_, bc}) -> ac > bc end)
end
def count_words_single_stream(file_name) do
File.stream!(file_name)
|> Stream.flat_map(&String.split(&1,~r(\b), trim: true))
|> Stream.map(&String.downcase(&1))
|> Enum.reduce(%{}, fn word, acc ->
Map.update(acc, word, 1, & &1 + 1)
end)
|> Enum.to_list()
|> Enum.sort(fn({_, ac}, {_, bc}) -> ac > bc end)
end
def get_files(dir) do
for file <- File.ls!(dir) do
File.stream!("#{dir}/#{file}", read_ahead: 100_000)
end
end
def count_words(dir_name) do
parent = self()
common_words = [
" ", "\n", ". ", "the", "to", "and", "a", "of",
"in", "|"
]
empty_space = :binary.compile_pattern(" ")
get_files(dir_name)
|> Flow.from_enumerables()
|> Flow.flat_map(&String.split(&1, empty_space)) # BINARY
|> Flow.partition()
|> Flow.filter(& not &1 in common_words)
|> Flow.reduce(&Map.new/0, fn word, acc -> # ETS
Map.update(acc, word, 1, & &1 + 1)
end)
|> Enum.to_list()
|> Enum.sort(fn({_, ac}, {_, bc}) -> ac > bc end)
end
end
result = OurTest.count_words(dir_of_wiki)
IO.inspect result
IO.inspect Enum.count(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment