Skip to content

Instantly share code, notes, and snippets.

@alan-mushi
Last active July 15, 2020 22:55
Show Gist options
  • Save alan-mushi/38618e77ad44a9394bb3321d9b4fc136 to your computer and use it in GitHub Desktop.
Save alan-mushi/38618e77ad44a9394bb3321d9b4fc136 to your computer and use it in GitHub Desktop.
Ripe inetnum elixir parsing (OOM-killer)
# Add flow as a mix dep
# Killed by the OOM-killer
defmodule RipeTest do
use ExUnit.Case
doctest Ripe
# From https://ftp.ripe.net/ripe/dbase/split/
@filename "./assets/ripe.db.inetnum.gz"
@outfile 'inetnum.dets'
def filter_comment("#" <> _line), do: false
def filter_comment("%" <> _line), do: false
def filter_comment(_line), do: true
@tag timeout: :infinity
test "do it manually for a bit" do
{:ok, _} = :dets.open_file(:inetnum, [{:type, :duplicate_bag}, {:file, @outfile}])
File.stream!(@filename, [:compressed])
|> Stream.filter(&filter_comment(&1))
|> Stream.chunk_by(&(&1 == "\n"))
|> Stream.filter(fn x ->
case x do
["\n" | _] -> false
_ -> true
end
end)
|> Flow.from_enumerable()
|> Flow.map(fn l ->
{_, res } = Enum.reduce(l, {[], %{}}, fn line, {inserted, acc} ->
l2 = line
|> String.normalize(:nfc)
|> String.split(":", parts: 2)
|> Enum.map(&String.trim(&1))
[k, v] = case l2 do
[k1, v1] -> [k1, v1]
[k1] -> [Enum.at(inserted, 0), k1]
_ -> false # should raise hell
end
e_val = Map.get(acc, k, []) |> Enum.reverse
{ [k | inserted], Map.put(acc, k, Enum.reverse([v | e_val])) }
end)
res
end)
|> Flow.map(fn x ->
:dets.insert(:inetnum, {Map.get(x, "inetnum") |> Enum.at(0), x})
x
end)
|> Flow.run()
:ok = :dets.sync(:inetnum)
IO.inspect "Number of records: #{inspect(:dets.info(:inetnum))}"
:ok = :dets.close(:inetnum)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment