Skip to content

Instantly share code, notes, and snippets.

@alvises
Last active March 30, 2019 01:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alvises/c9fe2cfcac9f1528c5ba3dc3c8c27db9 to your computer and use it in GitHub Desktop.
Save alvises/c9fe2cfcac9f1528c5ba3dc3c8c27db9 to your computer and use it in GitHub Desktop.
Chunks to Lines
defmodule Chunktolines do
## 1. regex split (preserving "\n")
def test_1_regex_split(enum) do
enum
|> Stream.transform("",fn chunk, acc ->
[last_line | lines] =
Regex.split(~r/(?<=\n)/, acc <> chunk)
|> Enum.reverse()
{Enum.reverse(lines),last_line}
end)
|> stream_sum()
end
## 2. recursive
def test_2_recursive(enum) do
enum
|> Stream.transform("",&next_line/2)
|> stream_sum()
end
def next_line(chunk,current_line\\""), do: next_line(chunk,current_line,[])
def next_line(<<"\n"::utf8, rest::binary>>,current_line,lines) do
next_line(rest,"",[current_line | lines])
end
def next_line(<<c::utf8, rest::binary>>,current_line,lines) do
next_line(rest,<<current_line::binary, c::utf8>>,lines)
end
def next_line(<<>>,current_line,lines), do: {Enum.reverse(lines), current_line}
## 3. String.split (loosing "\n")
def test_3_string_split(enum) do
enum
|> Stream.transform("",fn chunk, acc ->
[last_line | lines] =
String.split(acc <> chunk,"\n")
|> Enum.reverse()
{Enum.reverse(lines),last_line}
end)
|> stream_sum()
end
def stream_sum(enum) do
enum
|> Stream.map(fn line->
{num,_} = Integer.parse(line)
num
end)
|> Enum.sum()
end
end
numbers_small_lines = File.stream!("numbers_small.txt",[],:line)
numbers_small_chunks = File.stream!("numbers_small.txt",[],2048)
Benchee.run(%{
"0_lines" => fn ->
numbers_small_lines
|> Chunktolines.stream_sum()
end,
"1_regex_split" => fn ->
numbers_small_chunks
|> Chunktolines.test_1_regex_split()
end,
"2_recursive" => fn ->
numbers_small_chunks
|> Chunktolines.test_2_recursive()
end,
"3_string_split" => fn ->
numbers_small_chunks
|> Chunktolines.test_3_string_split()
end,
},
time: 10
)
defmodule Chunktolines.MixProject do
use Mix.Project
def project do
[
app: :chunktolines,
version: "0.1.0",
elixir: "~> 1.8",
start_permanent: :dev,
deps: [{:benchee, "~> 1.0"}]
]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment