Skip to content

Instantly share code, notes, and snippets.

@KiaraGrouwstra
Created September 14, 2018 15:37
Show Gist options
  • Save KiaraGrouwstra/5adaae00eb28f2cc6f9b7266c496679b to your computer and use it in GitHub Desktop.
Save KiaraGrouwstra/5adaae00eb28f2cc6f9b7266c496679b to your computer and use it in GitHub Desktop.
scraping with elixir
# Agent:
# static:
# declare
defmodule Mix.TasksServer do
def start_link do
Agent.start_link(fn -> HashSet.new end, name: __MODULE__)
end
def put_task(task, project) do
item = {task, project}
Agent.update(__MODULE__, &Set.put(&1, item))
end
end
# use
Mix.TasksServer.put_task(task, project)
# dynamic: just make those `__MODULE__`s an extra name variable, then fetch one:
agent = Mix.TasksServer.start_link
# dynamic implementation:
defmodule Chat.Socket do
def start_link(ws, name) do
Agent.start_link(ws, name: name)
end
def get(name) do
Agent.get(name, &(&1))
end
end
# keep around the agent's name from creation
worker(KV.Registry, [:my_manager_name, [name: :my_worker_name]])
# then call, plugging it into the added param
Mix.TasksServer.put_task(worker, task, project)
# do expensive processing outside the agent:
Agent.get(agent, &(&1)) |> do_something_expensive()
# GenServer:
defmodule Calculator do
use ExActor.GenServer
defstart start_link, do: initial_state(0)
defcast inc(x), state: state, do: new_state(state + x)
defcall get, state: state, do: reply(state)
defcast stop, do: stop_server(:normal)
end
# call dynamic:
{:ok, pid} = Calculator.start_link
Calculator.inc(pid, 10)
# call static:
worker(Calculator, [], id: :my_calc)
Calculator.inc(:my_calc, 10)
# GenEvent
defmodule LoggerHandler do
use GenEvent
def handle_event({:log, x}, state) do
{:ok, [x|state]}
end
def handle_call(:messages, state) do
{:ok, Enum.reverse(state), []}
end
end
# use
{:ok, pid} = GenEvent.start_link()
GenEvent.add_handler(pid, LoggerHandler, [])
GenEvent.notify(pid, {:log, 1})
GenEvent.notify(pid, {:log, 2})
GenEvent.call(pid, LoggerHandler, :messages)
#=> [1, 2]
GenEvent.call(pid, LoggerHandler, :messages)
#=> []
# Tasks:
# tell:
Task.start_link(fn -> IO.puts "ok" end)
# ask:
task = Task.async(fn -> do_some_work() end)
res = do_some_other_work()
res + Task.await(task)
# supervised, loop:
worker(Task, [fn -> IO.puts "ok" end])
# supervised, once:
worker(Task, [fn -> IO.puts "ok" end], restart: :transient)
# register supervisor class:
supervisor(Task.Supervisor, [[name: MyApp.TaskSupervisor]])
# then dynamicall tell:
Task.Supervisor.start_child(MyApp.TaskSupervisor, fn ->
# Do something
end)
# or dynamicall ask:
Task.Supervisor.async(MyApp.TaskSupervisor, fn ->
# Do something
end) |> Task.await()
# RabbitMQ drop-in (persistent queue by domain) for record structures rather than having to deal with serialization crap:
# read(key): lookup key on index
# read_at(key, index): lookup for given secondary index
# first(): first key in table -- deterministic if using autoincrement on the key (default first) column in a table with type: :ordered_set
# delete(key): delete row with a given key value -- if only this were row count I could just plug in 1.
# QLC: pop first
cursor = lc x inlist :mnesia.table(:Urls), do: x
|> :qlc.q() #{:unique, true}
|> :qlc.cursor()
result = :qlc.next_answers(cursor, 1)
:ok = :qlc.delete_cursor(cursor)
result
# Amnesia: pop first
# deftable Urls, [{ :id, autoincrement }, :url], type: :ordered_set
key = Urls.first()
result = Urls.read(key)
:ok = Urls.delete(key)
result
# Amnesia: select, which was suggested in thread
selection = Urls.select(1, {'_', '$0'}) # or nil
{[result], continuation} = selection
# selection |> Amnesia.Selection.values |> Enum.each &IO.puts(&1.content)
key = result |> List.head() #List?
:ok = Urls.delete(key)
result
# Amnesia: read-iterate?
# QLC: read-iterate?
# RabbitMQ: Erlang queue (in/out) per domain on Amnesia
deftable Queues, [:domain, :queue]
defcast push(item), state: domain do
queue = get_queue(domain)
q2 = :queue.in(item, queue)
set_queue(domain, q2)
# q2 = update_queue(domain, fn(q) -> q2 = :queue.in(item, q); {q2, q2} end)
new_state q2
end
defcall pop, state: domain do
queue = get_queue(domain)
{out, q2} = :queue.out(queue) # either :empty or {:value, item}
set_queue(domain, q2)
# {out, q2} = update_queue(domain, fn(q) -> {_, q2} = res = :queue.out(queue); {res, q2} end)
set_and_reply(q2, out)
end
#defp get_queue(domain) do
# Amnesia.transaction do
# Queue.read(domain)
# end
#end
defp get_queue(domain), do: Amnesia.transaction, do: Queues.read(domain).queue
defp set_queue(domain, queue), do: Amnesia.transaction, do: Queues.write(%Queue{ domain: domain, queue: queue })
#defp update_queue(domain, fun) do
# q1 = get_queue(domain)
# {res, q2} = fun.(q1)
# set_queue(domain, q2)
# res
#end
Api.QueueStore.push(:queues, item)
item = Api.QueueStore.pop(:queues)
# ditto in DETS while keeping {domain, queue} in the GenServer too?
# def init(), do: _tbl = :ets.new(:Queue, [:public, :named_table])
defcast push, state: {domain, queue} do
q2 = :queue.in(item, queue)
dets_update(domain, q2)
new_state {domain, q2}
end
defcall pop, state: {domain, queue} do
{out, q2} = :queue.out(queue) # out either :empty or {:value, item}
dets_update(domain, q2)
set_and_reply({domain, q2}, out)
end
defp dets_update(domain, queue), do: :dets.update_element(:Queues, domain, {2, queue})
# read: lookup/2
# seems like double work (and huge copying overhead of immutable copying), more elegant if done once within Amnesia?
# where are the update functions in (d)ets/(a)mnesia?
# (d)ets:
# - update_element(Tab, Key, ElementSpec :: {Pos, Value}) -> boolean()
# - update_counter(Tab, Key, UpdateOp) -> Result
# - update_counter(Tab, Key, UpdateOp, Default) -> Result
# (a)mnesia:
# - merge-update multiple properties (set tables, not bag):
Amnesia.transaction do
new = %{email: "john@gmail.com"}
User.read(1) |> Map.merge(new) |> User.write
# %{User.read(1) | email: "john@gmail.com"} |> User.write
end
# - with lenses (for updating one deep property):
import Elins
Amnesia.transaction do
User.read(1) |> set [:name], "John" |> User.write
# User.read(1) |> edit [:name], fn(s) -> String.upcase(s) end |> User.write
end
# - overwrite on key (won't do partial maps for merge-update):
Amnesia.transaction do
%User{id: 1, name: "John", email: "john@example.com"} |> User.write
end
# - dirty_update_counter(Tab, Key, Incr) -> NewVal | exit({aborted, Reason})
# - http://stackoverflow.com/questions/10821930/erlang-dynamic-record-editing
# - ets(Fun, [, Args]) -> ResultOfFun | exit(Reason)
# could (a)mnesia even do nested records? what of (d)ets?
# - (D)ETS really does no schema whatsoever, just insert tuples and get them by key.
# - mnesia: does key-name schemas
:mnesia.create_table(:Bar, [
{:record_name, :MsgIn},
{:attributes, record_info(:fields, :MsgIn)}
])
fun = fn() -> :mnesia.write(:Foo, %{key: 1, val: "foo"}, :write) end
:mnesia.transaction(fun)
# ^ can't use in terminal cuz record_info is fake, only works during compilation (haven't seen Elixir equivalents aside from Amnesia itself). Anyway, it seems mnesia itself only enforces the structure of the top-level struct, not its values.
# - Amnesia also does optional type annotations
# Amnesia: do operations inside? - not really inside, but see above.
# Agents?: Agent.get(agent, fn state -> expensive(state) end)
use Amnesia
def pop() do
#:mnesia.start()
#:mnesia.transaction(
Amnesia.transaction do
#fn() ->
#cursor =
## for x in :mnesia.table(planemo), do: x
#lc x inlist :mnesia.table(planemo), do: x
#|> :qlc.q() #{:unique, true}
#|> :qlc.cursor()
result =
#:qlc.next_answers(cursor, 1)
Urls.first(...)
:ok =
#:qlc.delete_cursor(cursor)
Urls.delete!(...)
result
#end
#)
end
end

Handle state:

  • GenServer: persistent stateful server; common for embedding state in request-response.
  • GenEvent: event handler, has state too, seems similar to GenServer
  • GenFsm: FSM, simpler than GenServer (which can also store state in its State variable).
  • plain_fsm: OTP like gen_fsm but with selective receive (reduces FSM complexity).
  • Task: one action, no communication, do tasks from queue but not for periodic tasks (1.0: can't get system messages yet)
  • Agent: accessible (shared) mutable state; do expensive operations outside or they'll block.

Pooling:

  • Poolboy or a finite number of actors?
  • ^ Pooler 'protects members (gen_servers/gen_fsms) from being used concurrently'; I imagine Tasks may not need this?
  • Or.. is there an 'unconstrained' Poolboy? Just high overflow even if the default number is low?
  • This pooling assumes I don't care about their differences in state; isn't 'anonymous worker' the domain of Tasks?
    • Can keep static references specific workers in [app].ex's children[] as well.

^ what of mailboxes as queue?

Flow diagram (GenEvent?):

  • constant?
    • constant
  • variable:
    • private?
      • temp?
        • variable
      • semi-persistent:
        • Task
    • public:
      • no processing?
        • need persistence?
          • (parts) processed in order?
            • Queue
          • random access:
            • k/v store
        • no persistence:
          • Agent
      • processing (+ persistent?):
        • state simple?
          • want selective receive?
            • plain_fsm
          • simple receive:
            • GenFsm
        • complex state:
          • GenServer

Use cases:

  • holding queue channel: Agent (or GenServer but doesn't need to do processing)

    • if messages big, queue connection small: Agent
      • pass channel copy to workers on creation
        • mem worth asking only once (right?)
      • on reboot possibly need new AMQP/socket channels though
    • if messages small, queue connection big: GenServer
  • managing websockets:

    - responding to client events:

    - responding to queue events:

    • ^ client events handled by channel, question is just about accessing socket after async processing
    • ideomatic approach: instead use sync processing and rely on many threads?
      • would work normally but not for scraping
    • send events through the channel's send -> handle_info?
    • Agent storing socket[s?] for retrieval?
    • GenServer storing socket to send events?
    • Task consuming queue events?
      • one task per socket? seems elegant.
        • also easier to kill canceled tasks?
      • holding all sockets? no, memory overhead.
      • retrieve sockets from Agent each time? meh, CPU overhead.
    • so Task vs. GenServer... try GenServer due to the ability to persist state across restarts?
    • ^ if messages small, queue connection big: GenServer
    • if messages big, queue connection small: Agent storing all sockets; fetcher:
      • Task? -- I don't think so, this isn't so much about active consuming, in the sense you subscribe to the AMQP topic and need to handle some different communication from the AMQP server.
      • GenServer
  • scraping/processing: Tasks (or GenServers but don't need to ask stuff) consuming queue

  • check if metadata can make round-trip so as to give response ^ uh-oh, Kafka doesn't do message-level custom metadata, try AMQP instead?

    • later: reconstruct request-reply to correlate response
  • port scraper (fetchir)

  • integrate front/back

RabbitMQ drop-in: FIFO queues, persistent + by domain

  • GenServers - shouldn't survive reboot
  • Agents (can block) - shouldn't survive reboot
  • (D)ETS:
    • select: -> Selection; supposedly faster??
    • match: returns bindings -> [Match]
    • match_object: returns objects -> Objects
    • all /3 (table = domain, pattern = '$1', n = 1); use continuation with /1 until end
    • select/match {x, Continuation} | '$end_of_table' | {error, Reason}
    • safe_fixtable(Name, Fix) -> ok delays resizing until released.
    • table type ordered_set 30~40% faster than set (no DETS); using key faster too.
    • pattern: wildcard '_', pattern variables $0/$1/... to match results.
      • [{ #person{idno='', name='', age='$1'}, [], ['$1']}]
      • {'_',dog,'$1'}
  • Mnesia: RAM/disk (ETS/DETS) combination with sharding, transactions, and distribution.
C = qlc:cursor(qlc:q([X || X <- qlc:append(QH1, QH2)],
{unique,true})),
R = qlc:next_answers(C, 5),
ok = qlc:delete_cursor(C),
R.
- maybe check how RabbitMQ uses mnesia for this?
	- uses native Erlang OTP queue module, though I'll need to tag on persistence...

Rate limiting: Maar denk dat ik concurrency in Elixir nog niet goed genoeg begrijp... weet b.v. niet zeker of er nou al actors zijn waarvoor het concurrency b.v. al automatisch afhandelt ofzo. Want voor state-loze delen (alles buiten GenServers e.a.) lijkt dat me ergens een logische stap in al deze Erlang zooi. Maar daarnaast zijn er nog wel andere dingen waarvan ik nog weinig weet. Voor rate limiting erbij zeg maar, ik weet nog niet eens zeker of gewoon sleep() gebruiken b.v. je qua CPU cycles keihard verneukt ofzo (danwel voorkomt dat zo'n actor als een GenServer messages in z'n mailbox kan afhandelen). Wellicht ook niet de meest standaard use-case, al lijkt iets met focus op concurrency als dit er wel minder slecht voor dan... andere zooi. Had nu een throttler (GenServer) die met scheduling (send_interval) credits bijtelde ja. Had die zeg maar centraal b.w.v. garantie dat het globaal was i.p.v. dat verschillende threads (over meerdere nodes?) elk eigen credits konden maken, al kan dat wellicht ook anders. Bestaande rate-limit libs leken nl. veelal boolean "ja je mag al" vs. "nee nog niet". Wat me dus vragen gaf over het voorkomen dat je CPU cycles allemaal gealloceerd worden aan "are we there yet?" danwel blocking sleeps ofzo. Maak je maar 1 consumer dan gaat schedulen in het proces inderdaad ook wel (i.i.g. geen "are we there yet?"), maar werk (URLs) zo laat mogelijk over workers (scrapers) alloceren was eigenlijk juist gunstig i.v.m. potentieel verschil in prioriteit, allocatie over een variabel aantal workers, binnen dat RabbitMQ paradigma ack timeouts, ... . Dus komt wel het een en ander bij kijken qua overwegingen... Tests (concurrency project): sleeping threads blocken niet elkaar (extern); voor sleeping genserver wel inkomende messages (intern)? concurrent: ``` spawn(fn -> IO.puts(CounterAgent.sleep_client(c)) end) def sleep_client(pid) do n = Agent.get(pid, &(&1)); :timer.sleep(1000); n end

sequential: ```
spawn(fn -> IO.puts(CounterAgent.sleep_server(c)) end)
def sleep_server(pid) do
  Agent.get(pid, fn n -> :timer.sleep(1000); n end)
end

so sleeping seems fine. so I'll use sequential blocking sleeps within the Throttler, with one Throttler instance per domain (per node). so far I only had one instance. names/ids seem to solve this. what's the difference?

  • names: substitute for process ID: ```

create process with name

{:ok, pid} = GenServer.start_link(Api.Throttler, nil, name: :foo)

call from pid

GenServer.call(pid, {:get, "a"})

call from name

GenServer.call(:foo, {:get, "a"})

check if alive

Process.whereis(:foo) # :: pid | port | nil

call from name over cluster

GenServer.call({MyStack, :"node@10.0.3.179"}, :pop)

- ids: defined at the top level as an option in `worker()`: ```
# :id - a name used to identify the child specification internally by the supervisor; defaults to the given module name
worker(Api.AmqpSub, [args], id: :responder)

if the throttler sleeps ('server' block, sequential) I'd need a separate throttler per domain. alt, current route: make the client sleep; can no longer have more clients per throttler.

fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])
items = Enum.to_list(1..87) |> Enum.map(fn(page) ->
url = "https://www.alibaba.com/products/F0/S355J2H/----------------------50/#{page}.html"
data = fetch_dec.(url)
case Regex.run(~r/page\.setPageData\((.*?)\);/, data) do
[_match, json] -> json
|> Poison.decode!()
#_ -> []
end
end) |> List.flatten()
suppliers = items |> Enum.map(fn(meta) ->
case meta |> Map.fetch!("normalList") do
nil -> []
list -> list |> Enum.map(fn(item) ->
item |> Map.fetch!("supplierName")
end)
end
end) |> List.flatten()
fname = "/vagrant/S355J2H.json"
save = Fun.curry(&File.write!/2)
(items
|> Poison.encode!()
|> save.(fname).())
(suppliers
|> Poison.encode!()
|> save.("/vagrant/S355J2H_suppliers.json").())
fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])
items = Enum.to_list(0..7) |> Enum.map(fn(page) ->
skip = 48 * page
url = "https://s.taobao.com/search?q=%E6%89%8B%E6%9C%BA&cps=yes&ppath=20573%3A102132332%3B446%3A370632889%3B446%3A569762281%3B446%3A591510374%3B33187%3A549512232%3B33187%3A45852278%3B33187%3A11138181%3B33187%3A22448952%3B933%3A145161329%3B933%3A373042500%3B933%3A33610188%3B933%3A145161342%3B933%3A33610187%3B933%3A33610189%3B933%3A78284%3B933%3A33610184%3B933%3A6577325%3B933%3A170384958%3B12304004%3A21401%3B12304004%3A21971%3B12304004%3A21402%3B10004%3A649836912%3B10004%3A591194881%3B10004%3A569662099%3B10004%3A686948029%3B10004%3A695920003&app=vproduct&vlist=1&cat=1512&s=#{skip}"
data = fetch_dec.(url)
prices = case Regex.run(~r/"spus":(.*?)},"export"/, data) do
[_match, json] -> json
|> Poison.decode!()
|> Enum.map(fn(x) -> x
|> Map.fetch!("price")
|> String.to_integer()
end)
#_ -> []
end
arr = case Regex.run(~r/vertical_from_pos":(.*?),"at/, data) do
[_match, json] -> json
|> Poison.decode!()
|> Enum.map(fn(s) -> spu = s
|> String.split("_")
|> List.last()
"https://s.taobao.com/api?m=spudetail&detail_tab=params&pspuid=#{spu}"
|> fetch_dec.()
|> Poison.decode!()
|> Api.Utils.to_atoms()
end)
#_ -> []
end
merged = List.zip([arr, prices])
|> Enum.map(fn({stats, price}) -> stats
|> Map.put(:price, price)
|> Elins.edit([:params], fn(lst) -> lst
|> Enum.into(%{}, fn(map) -> {
Map.fetch!(map, :pname),
Map.fetch!(map, :pvalue)
} end)
end).()
end)
end) |> List.flatten()
#modded = items |> Enum.map(fn(x) -> x
#end)
fname = "/vagrant/phones.json"
save = Fun.curry(&File.write!/2)
(items
|> Poison.encode!()
|> save.(fname).())
fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])
multiplier = 1.00
fname = "/vagrant/shoes.json"
data = Enum.to_list(1..11) |> Enum.map(fn(page) ->
json = %{ "items(dl)": [ %{ id: "dl@data-id", pic: "img@src", "title": ".item-name", "price": ".c-price", "sold": ".sale-num", comments: ".title span" } ] } |> Poison.encode!()
"https://602307356.taobao.com/i/asynSearch.htm?mid=w-13113220629-0&pageNo=#{page}"
|> fetch_dec.()
|> Api.Utils.de_jsonp()
|> Api.Parsing.parse(json)
|> Api.Utils.to_atoms()
|> Map.fetch!(:items)
end) |> List.flatten() |> Enum.map(fn(x) ->
json = %{
title: "h3",
price: "em.tb-rmb-num",
spm: "#J_Pine@data-spm",
sellerid: "#J_Pine@data-sellerid",
catid: "#J_Pine@data-catid",
rootid: "#J_Pine@data-rootid",
itemid: "#J_Pine@data-itemid",
"attrs(ul.attributes-list li)": [ %{ val: "li@title", kv: "li" } ],
descUrlScript: "script",
"gallery(ul#J_UlThumb li)": [ %{ url: "img@data-src" } ]
} |> Poison.encode!()
"https://item.taobao.com/item.htm?id=#{x.id}"
|> fetch_dec.()
|> Api.Parsing.parse(json)
|> Api.Utils.to_atoms()
end)
#data = File.read!(fname) |> Poison.decode!() |> Api.Utils.to_atoms()
data |> Enum.at(0)
put_go = fn(x) -> IO.puts(inspect(x)); x end
#|> put_go.()
(dict = ["/vagrant/attrs.tsv", "/vagrant/titles.tsv"]
|> Enum.map(fn(file) -> file
|> File.read!()
|> String.split("\r\n")
|> Enum.filter(fn(x) -> String.length(x) > 0 end)
|> Enum.map(fn(line) -> line
|> String.split("\t")
|> List.to_tuple()
end)
|> Map.new()
end)
|> Enum.reduce(&Map.merge/2))
translate = fn(str) ->
case Map.fetch(dict, str) do
{:ok, word} -> word
:error -> str
end
end
fixed = data |> Enum.map(
Elins.editVals(%{
url: fn(_x, item) -> "https://item.taobao.com/item.htm?id=#{item.itemid}" end,
title: fn(str) -> String.strip(str) end |> translate.(),
price: fn(str) -> (multiplier * String.to_float(str)) end, # |> Float.to_string([decimals: 2, compact: false])
gallery: fn(tbl) -> tbl |> Enum.map(fn(x) -> "http:" <> x.url |> String.replace("50x50", "400x400") end) end,
attrs: fn(attrs) -> attrs |> Enum.map(fn(x) -> {
# key
String.split(x.kv, ":") |> Enum.at(0) |> translate.(),
# val
case x.val |> String.split(" ") do
[v] -> v |> translate.()
arr -> arr |> Enum.map(translate)
end
} end) |> Enum.into(%{}) end,
descUrl: fn(_x, item) -> case Regex.run(~r/'http:'\ \?\ '([^\']*)'/, item.descUrlScript) do
[_match, url] -> "http:" <> url
end end,
desc: fn(_x, item) -> item.descUrl |> fetch_dec.() end
})
)
data |> Enum.at(0)
fixed |> Enum.at(0)
#translated |> Enum.at(0)
data |> Enum.at(0) |> Elins.setVals(%{ gallery: nil, descUrlScript: nil }).()
fixed |> Enum.at(0) |> Elins.setVals(%{ gallery: nil, descUrlScript: nil, descUrl: nil, desc: nil }).()
fixed = fixed |> Enum.map(
Elins.editVals(%{
# title: translate
# attrs: fn(attrs) -> attrs |> Enum.map(fn({k,v}) -> {
# k |> translate.(),
# case v do
# arr when is_list(arr) -> arr |> Enum.map(translate)
# v -> v |> translate.()
# end
# } end) |> Enum.into(%{}) end
})
)
find = fn(str) -> fixed
|> Enum.filter(fn(x) -> x
|> inspect()
|> String.contains?(str)
end)
|> Enum.at(0)
end
# |> Map.fetch!(:url)
# fn(str) -> fixed |> Enum.filter(fn(x) -> x |> inspect() |> String.contains?(str) end) |> Enum.map(fn(x) -> x.attrs |> Enum.filter(fn({k,v}) -> inspect({k,v}) |> String.contains?(str) end) end) end.("")
save = Fun.curry(&File.write!/2)
data
|> Poison.encode!()
|> save.(fname).()
fixed
|> Poison.encode!()
|> save.("/vagrant/fixed.json").()
fixed
|> Enum.map(&(&1.title))
|> Enum.join("\n")
|> save.("/vagrant/titles.txt").()
fixed
|> Enum.map(&(&1.desc))
|> Enum.join("\n")
|> save.("/vagrant/descs.txt").()
uniq = Fun.flow([&MapSet.new/1, &MapSet.to_list/1])
fixed
|> Enum.map(&(&1.attrs))
|> Enum.map(&(Map.keys(&1) ++ Map.values(&1)))
|> List.flatten()
|> uniq.()
|> Enum.sort()
|> Enum.join("\n")
|> save.("/vagrant/attrs.txt").()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment