KiaraGrouwstra/elixir.ex

## elixir.ex
# Agent:

# static:
# declare
defmodule Mix.TasksServer do
  def start_link do
    Agent.start_link(fn -> HashSet.new end, name: __MODULE__)
  end
  def put_task(task, project) do
    item = {task, project}
    Agent.update(__MODULE__, &Set.put(&1, item))
  end
end
# use
Mix.TasksServer.put_task(task, project)
# dynamic: just make those `__MODULE__`s an extra name variable, then fetch one:
agent = Mix.TasksServer.start_link
# dynamic implementation:
defmodule Chat.Socket do

  def start_link(ws, name) do
    Agent.start_link(ws, name: name)
  end

  def get(name) do
    Agent.get(name, &(&1))
  end

end
# keep around the agent's name from creation
worker(KV.Registry, [:my_manager_name, [name: :my_worker_name]])
# then call, plugging it into the added param
Mix.TasksServer.put_task(worker, task, project)
# do expensive processing outside the agent:
Agent.get(agent, &(&1)) |> do_something_expensive()


# GenServer:

defmodule Calculator do
  use ExActor.GenServer
  defstart start_link, do: initial_state(0)
  defcast inc(x), state: state, do: new_state(state + x)
  defcall get, state: state, do: reply(state)
  defcast stop, do: stop_server(:normal)
end
# call dynamic:
{:ok, pid} = Calculator.start_link
Calculator.inc(pid, 10)
# call static:
worker(Calculator, [], id: :my_calc)
Calculator.inc(:my_calc, 10)


# GenEvent

defmodule LoggerHandler do
  use GenEvent
  def handle_event({:log, x}, state) do
    {:ok, [x|state]}
  end
  def handle_call(:messages, state) do
    {:ok, Enum.reverse(state), []}
  end
end
# use
{:ok, pid} = GenEvent.start_link()
GenEvent.add_handler(pid, LoggerHandler, [])
GenEvent.notify(pid, {:log, 1})
GenEvent.notify(pid, {:log, 2})
GenEvent.call(pid, LoggerHandler, :messages)
#=> [1, 2]
GenEvent.call(pid, LoggerHandler, :messages)
#=> []


# Tasks:

# tell:
Task.start_link(fn -> IO.puts "ok" end)
# ask:
task = Task.async(fn -> do_some_work() end)
res  = do_some_other_work()
res + Task.await(task)
# supervised, loop:
worker(Task, [fn -> IO.puts "ok" end])
# supervised, once:
worker(Task, [fn -> IO.puts "ok" end], restart: :transient)
# register supervisor class:
supervisor(Task.Supervisor, [[name: MyApp.TaskSupervisor]])
# then dynamicall tell:
Task.Supervisor.start_child(MyApp.TaskSupervisor, fn ->
  # Do something
end)
# or dynamicall ask:
Task.Supervisor.async(MyApp.TaskSupervisor, fn ->
  # Do something
end) |> Task.await()


# RabbitMQ drop-in (persistent queue by domain) for record structures rather than having to deal with serialization crap:


# read(key): lookup key on index
# read_at(key, index): lookup for given secondary index
# first(): first key in table -- deterministic if using autoincrement on the key (default first) column in a table with type: :ordered_set
# delete(key): delete row with a given key value -- if only this were row count I could just plug in 1.

# QLC: pop first
cursor = lc x inlist :mnesia.table(:Urls), do: x
|> :qlc.q() #{:unique, true}
|> :qlc.cursor()
result = :qlc.next_answers(cursor, 1)
:ok = :qlc.delete_cursor(cursor)
result

# Amnesia: pop first
# deftable Urls, [{ :id, autoincrement }, :url], type: :ordered_set
key = Urls.first()
result = Urls.read(key)
:ok = Urls.delete(key)
result

# Amnesia: select, which was suggested in thread
selection = Urls.select(1, {'_', '$0'}) # or nil
{[result], continuation} = selection
# selection |> Amnesia.Selection.values |> Enum.each &IO.puts(&1.content)
key = result |> List.head() #List?
:ok = Urls.delete(key)
result

# Amnesia: read-iterate?

# QLC: read-iterate?

# RabbitMQ: Erlang queue (in/out) per domain on Amnesia
deftable Queues, [:domain, :queue]
defcast push(item), state: domain do
  queue = get_queue(domain)
  q2 = :queue.in(item, queue)
  set_queue(domain, q2)
  # q2 = update_queue(domain, fn(q) -> q2 = :queue.in(item, q); {q2, q2} end)
  new_state q2
end
defcall pop, state: domain do
  queue = get_queue(domain)
  {out, q2} = :queue.out(queue) # either :empty or {:value, item}
  set_queue(domain, q2)
  # {out, q2} = update_queue(domain, fn(q) -> {_, q2} = res = :queue.out(queue); {res, q2} end)
  set_and_reply(q2, out)
end
#defp get_queue(domain) do
#  Amnesia.transaction do
#    Queue.read(domain)
#  end
#end
defp get_queue(domain), do: Amnesia.transaction, do: Queues.read(domain).queue
defp set_queue(domain, queue), do: Amnesia.transaction, do: Queues.write(%Queue{ domain: domain, queue: queue })
#defp update_queue(domain, fun) do
#  q1 = get_queue(domain)
#  {res, q2} = fun.(q1)
#  set_queue(domain, q2)
#  res
#end
Api.QueueStore.push(:queues, item)
item = Api.QueueStore.pop(:queues)


# ditto in DETS while keeping {domain, queue} in the GenServer too?
# def init(), do: _tbl = :ets.new(:Queue, [:public, :named_table])
defcast push, state: {domain, queue} do
  q2 = :queue.in(item, queue)
  dets_update(domain, q2)
  new_state {domain, q2}
end
defcall pop, state: {domain, queue} do
  {out, q2} = :queue.out(queue) # out either :empty or {:value, item}
  dets_update(domain, q2)
  set_and_reply({domain, q2}, out)
end
defp dets_update(domain, queue), do: :dets.update_element(:Queues, domain, {2, queue})
# read: lookup/2
# seems like double work (and huge copying overhead of immutable copying), more elegant if done once within Amnesia?

# where are the update functions in (d)ets/(a)mnesia?
# (d)ets:
# - update_element(Tab, Key, ElementSpec :: {Pos, Value}) -> boolean()
# - update_counter(Tab, Key, UpdateOp) -> Result
# - update_counter(Tab, Key, UpdateOp, Default) -> Result
# (a)mnesia:
# - merge-update multiple properties (set tables, not bag):
Amnesia.transaction do
  new = %{email: "john@gmail.com"}
  User.read(1) |> Map.merge(new) |> User.write
  # %{User.read(1) | email: "john@gmail.com"} |> User.write
end
# - with lenses (for updating one deep property):
import Elins
Amnesia.transaction do
  User.read(1) |> set [:name], "John" |> User.write
  # User.read(1) |> edit [:name], fn(s) -> String.upcase(s) end |> User.write
end
# - overwrite on key (won't do partial maps for merge-update):
Amnesia.transaction do
  %User{id: 1, name: "John", email: "john@example.com"} |> User.write
end
# - dirty_update_counter(Tab, Key, Incr) -> NewVal | exit({aborted, Reason})
# - http://stackoverflow.com/questions/10821930/erlang-dynamic-record-editing
# - ets(Fun, [, Args]) -> ResultOfFun | exit(Reason)

# could (a)mnesia even do nested records? what of (d)ets?
# - (D)ETS really does no schema whatsoever, just insert tuples and get them by key.
# - mnesia: does key-name schemas
:mnesia.create_table(:Bar, [
  {:record_name, :MsgIn},
  {:attributes, record_info(:fields, :MsgIn)}
])
fun = fn() -> :mnesia.write(:Foo, %{key: 1, val: "foo"}, :write) end
:mnesia.transaction(fun)
# ^ can't use in terminal cuz record_info is fake, only works during compilation (haven't seen Elixir equivalents aside from Amnesia itself). Anyway, it seems mnesia itself only enforces the structure of the top-level struct, not its values.
# - Amnesia also does optional type annotations
# Amnesia: do operations inside? - not really inside, but see above.
# Agents?: Agent.get(agent, fn state -> expensive(state) end)

use Amnesia
def pop() do
#:mnesia.start()
#:mnesia.transaction(
Amnesia.transaction do
  #fn() ->
    #cursor =
    ## for x in :mnesia.table(planemo), do: x
    #lc x inlist :mnesia.table(planemo), do: x
    #|> :qlc.q() #{:unique, true}
    #|> :qlc.cursor()
    result =
	#:qlc.next_answers(cursor, 1)
	Urls.first(...)
    :ok =
	#:qlc.delete_cursor(cursor)
	Urls.delete!(...)
    result
  #end
#)
end
end


## elixir.md

      
    Raw
  

              elixir.md
            
          
    Handle state:

GenServer: persistent stateful server; common for embedding state in request-response.

DRYer macros: https://github.com/sasa1977/exactor


GenEvent: event handler, has state too, seems similar to GenServer
GenFsm: FSM, simpler than GenServer (which can also store state in its State variable).
plain_fsm: OTP like gen_fsm but with selective receive (reduces FSM complexity).
Task: one action, no communication, do tasks from queue but not for periodic tasks (1.0: can't get system messages yet)
Agent: accessible (shared) mutable state; do expensive operations outside or they'll block.

Pooling:

Poolboy or a finite number of actors?

concurrent example: https://gist.github.com/henrik/ceede9c4d9bf3fcb4dd5


^ Pooler 'protects members (gen_servers/gen_fsms) from being used concurrently'; I imagine Tasks may not need this?
Or.. is there an 'unconstrained' Poolboy? Just high overflow even if the default number is low?
This pooling assumes I don't care about their differences in state; isn't 'anonymous worker' the domain of Tasks?

Can keep static references specific workers in [app].ex's children[] as well.


^ what of mailboxes as queue?
Flow diagram (GenEvent?):

constant?

constant


variable:

private?

temp?

variable


semi-persistent:

Task


public:

no processing?

need persistence?

(parts) processed in order?

Queue


random access:

k/v store


no persistence:

Agent


processing (+ persistent?):

state simple?

want selective receive?

plain_fsm


simple receive:

GenFsm


complex state:

GenServer


Use cases:


holding queue channel: Agent (or GenServer but doesn't need to do processing)

if messages big, queue connection small: Agent

pass channel copy to workers on creation

mem worth asking only once (right?)


on reboot possibly need new AMQP/socket channels though


if messages small, queue connection big: GenServer


managing websockets:
- responding to client events:

- responding to queue events:


^ client events handled by channel, question is just about accessing socket after async processing
ideomatic approach: instead use sync processing and rely on many threads?

would work normally but not for scraping


send events through the channel's send -> handle_info?
Agent storing socket[s?] for retrieval?
GenServer storing socket to send events?
Task consuming queue events?

one task per socket? seems elegant.

also easier to kill canceled tasks?


holding all sockets? no, memory overhead.
retrieve sockets from Agent each time? meh, CPU overhead.


so Task vs. GenServer... try GenServer due to the ability to persist state across restarts?
^ if messages small, queue connection big: GenServer
if messages big, queue connection small: Agent storing all sockets; fetcher:

Task? -- I don't think so, this isn't so much about active consuming, in the sense you subscribe to the AMQP topic and need to handle some different communication from the AMQP server.
GenServer


scraping/processing: Tasks (or GenServers but don't need to ask stuff) consuming queue


check if metadata can make round-trip so as to give response
^ uh-oh, Kafka doesn't do message-level custom metadata, try AMQP instead?

later: reconstruct request-reply to correlate response


port scraper (fetchir)


integrate front/back


RabbitMQ drop-in: FIFO queues, persistent + by domain

GenServers - shouldn't survive reboot
Agents (can block) - shouldn't survive reboot
(D)ETS:

select: -> Selection; supposedly faster??
match: returns bindings -> [Match]
match_object: returns objects -> Objects
all /3 (table = domain, pattern = '$1', n = 1); use continuation with /1 until end
select/match {x, Continuation} | '$end_of_table' | {error, Reason}
safe_fixtable(Name, Fix) -> ok delays resizing until released.
table type ordered_set 30~40% faster than set (no DETS); using key faster too.
pattern: wildcard '_', pattern variables $0/$1/... to match results.

[{ #person{idno='', name='', age='$1'}, [], ['$1']}]
{'_',dog,'$1'}


Mnesia: RAM/disk (ETS/DETS) combination with sharding, transactions, and distribution.

http://www.erlang.org/doc/man/mnesia.html
ETS works on tuples so no field names; Mnesia uses records.
has wrappers of (D)ETS's select/3 and match_object
http://erlang.org/pipermail/erlang-questions/2008-February/032742.html
^ use mnesia:select(Tab, MatchSpec, NObjs, Lock) (not a popping transaction?) or QLC (includes delete but much more costly than limited select()):


C = qlc:cursor(qlc:q([X || X <- qlc:append(QH1, QH2)],
{unique,true})),
R = qlc:next_answers(C, 5),
ok = qlc:delete_cursor(C),
R.

- maybe check how RabbitMQ uses mnesia for this?
	- uses native Erlang OTP queue module, though I'll need to tag on persistence...

Rate limiting:
Maar denk dat ik concurrency in Elixir nog niet goed genoeg begrijp... weet b.v. niet zeker of er nou al actors zijn waarvoor het concurrency b.v. al automatisch afhandelt ofzo. Want voor state-loze delen (alles buiten GenServers e.a.) lijkt dat me ergens een logische stap in al deze Erlang zooi.
Maar daarnaast zijn er nog wel andere dingen waarvan ik nog weinig weet. Voor rate limiting erbij zeg maar, ik weet nog niet eens zeker of gewoon sleep() gebruiken b.v. je qua CPU cycles keihard verneukt ofzo (danwel voorkomt dat zo'n actor als een GenServer messages in z'n mailbox kan afhandelen). Wellicht ook niet de meest standaard use-case, al lijkt iets met focus op concurrency als dit er wel minder slecht voor dan... andere zooi.
Had nu een throttler (GenServer) die met scheduling (send_interval) credits bijtelde ja. Had die zeg maar centraal b.w.v. garantie dat het globaal was i.p.v. dat verschillende threads (over meerdere nodes?) elk eigen credits konden maken, al kan dat wellicht ook anders.
Bestaande rate-limit libs leken nl. veelal boolean "ja je mag al" vs. "nee nog niet". Wat me dus vragen gaf over het voorkomen dat je CPU cycles allemaal gealloceerd worden aan "are we there yet?" danwel blocking sleeps ofzo.
Maak je maar 1 consumer dan gaat schedulen in het proces inderdaad ook wel (i.i.g. geen "are we there yet?"), maar werk (URLs) zo laat mogelijk over workers (scrapers) alloceren was eigenlijk juist gunstig i.v.m. potentieel verschil in prioriteit, allocatie over een variabel aantal workers, binnen dat RabbitMQ paradigma ack timeouts, ... . Dus komt wel het een en ander bij kijken qua overwegingen...
Tests (concurrency project): sleeping threads blocken niet elkaar (extern); voor sleeping genserver wel inkomende messages (intern)?
concurrent: ```
spawn(fn -> IO.puts(CounterAgent.sleep_client(c)) end)
def sleep_client(pid) do
n = Agent.get(pid, &(&1)); :timer.sleep(1000); n
end
sequential: ```
spawn(fn -> IO.puts(CounterAgent.sleep_server(c)) end)
def sleep_server(pid) do
  Agent.get(pid, fn n -> :timer.sleep(1000); n end)
end

so sleeping seems fine.
so I'll use sequential blocking sleeps within the Throttler, with one Throttler instance per domain (per node).
so far I only had one instance. names/ids seem to solve this. what's the difference?

names: substitute for process ID: ```

create process with name

{:ok, pid} = GenServer.start_link(Api.Throttler, nil, name: :foo)
call from pid

GenServer.call(pid, {:get, "a"})
call from name

GenServer.call(:foo, {:get, "a"})
check if alive

Process.whereis(:foo) # :: pid | port | nil
call from name over cluster

GenServer.call({MyStack, :"node@10.0.3.179"}, :pop)
- ids: defined at the top level as an option in `worker()`: ```
# :id - a name used to identify the child specification internally by the supervisor; defaults to the given module name
worker(Api.AmqpSub, [args], id: :responder)

if the throttler sleeps ('server' block, sequential) I'd need a separate throttler per domain.
alt, current route: make the client sleep; can no longer have more clients per throttler.

  
## scrape_ali_prod_suppliers.ex
fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])

items = Enum.to_list(1..87) |> Enum.map(fn(page) ->
	url = "https://www.alibaba.com/products/F0/S355J2H/----------------------50/#{page}.html"
	data = fetch_dec.(url)
	case Regex.run(~r/page\.setPageData\((.*?)\);/, data) do
	  [_match, json] -> json
		  |> Poison.decode!()
	  #_ -> []
	end
end) |> List.flatten()

suppliers = items |> Enum.map(fn(meta) ->
	case meta |> Map.fetch!("normalList") do
		nil -> []
		list -> list |> Enum.map(fn(item) ->
			item |> Map.fetch!("supplierName")
		end)
	end
end) |> List.flatten()

fname = "/vagrant/S355J2H.json"
save = Fun.curry(&File.write!/2)

(items
|> Poison.encode!()
|> save.(fname).())

(suppliers
|> Poison.encode!()
|> save.("/vagrant/S355J2H_suppliers.json").())


## scrape_phones.ex
fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])

items = Enum.to_list(0..7) |> Enum.map(fn(page) ->
	skip = 48 * page
	url = "https://s.taobao.com/search?q=%E6%89%8B%E6%9C%BA&cps=yes&ppath=20573%3A102132332%3B446%3A370632889%3B446%3A569762281%3B446%3A591510374%3B33187%3A549512232%3B33187%3A45852278%3B33187%3A11138181%3B33187%3A22448952%3B933%3A145161329%3B933%3A373042500%3B933%3A33610188%3B933%3A145161342%3B933%3A33610187%3B933%3A33610189%3B933%3A78284%3B933%3A33610184%3B933%3A6577325%3B933%3A170384958%3B12304004%3A21401%3B12304004%3A21971%3B12304004%3A21402%3B10004%3A649836912%3B10004%3A591194881%3B10004%3A569662099%3B10004%3A686948029%3B10004%3A695920003&app=vproduct&vlist=1&cat=1512&s=#{skip}"
	data = fetch_dec.(url)
	prices = case Regex.run(~r/"spus":(.*?)},"export"/, data) do
	  [_match, json] -> json
		  |> Poison.decode!()
		  |> Enum.map(fn(x) -> x
			|> Map.fetch!("price")
			|> String.to_integer()
		  end)
	  #_ -> []
	end
	arr = case Regex.run(~r/vertical_from_pos":(.*?),"at/, data) do
	  [_match, json] -> json
		  |> Poison.decode!()
		  |> Enum.map(fn(s) -> spu = s
			|> String.split("_")
			|> List.last()
			"https://s.taobao.com/api?m=spudetail&detail_tab=params&pspuid=#{spu}"
			|> fetch_dec.()
			|> Poison.decode!()
			|> Api.Utils.to_atoms()
		  end)
	  #_ -> []
	end
	merged = List.zip([arr, prices])
	|> Enum.map(fn({stats, price}) -> stats
		|> Map.put(:price, price)
		|> Elins.edit([:params], fn(lst) -> lst
			|> Enum.into(%{}, fn(map) -> {
				Map.fetch!(map, :pname),
				Map.fetch!(map, :pvalue)
			} end)
		end).()
	end)
end) |> List.flatten()

#modded = items |> Enum.map(fn(x) -> x
#end)

fname = "/vagrant/phones.json"
save = Fun.curry(&File.write!/2)

(items
|> Poison.encode!()
|> save.(fname).())


## scrape_shoes.ex
fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])
multiplier = 1.00
fname = "/vagrant/shoes.json"

data = Enum.to_list(1..11) |> Enum.map(fn(page) ->
	json = %{ "items(dl)": [ %{ id: "dl@data-id", pic: "img@src", "title": ".item-name", "price": ".c-price", "sold": ".sale-num", comments: ".title span" } ] } |> Poison.encode!()
	"https://602307356.taobao.com/i/asynSearch.htm?mid=w-13113220629-0&pageNo=#{page}"
	|> fetch_dec.()
	|> Api.Utils.de_jsonp()
	|> Api.Parsing.parse(json)
	|> Api.Utils.to_atoms()
	|> Map.fetch!(:items)
end) |> List.flatten() |> Enum.map(fn(x) ->
	json = %{
		title: "h3",
		price: "em.tb-rmb-num",
		spm: "#J_Pine@data-spm",
		sellerid: "#J_Pine@data-sellerid",
		catid: "#J_Pine@data-catid",
		rootid: "#J_Pine@data-rootid",
		itemid: "#J_Pine@data-itemid",
		"attrs(ul.attributes-list li)": [ %{ val: "li@title", kv: "li" } ],
		descUrlScript: "script",
		"gallery(ul#J_UlThumb li)": [ %{ url: "img@data-src" } ]
	} |> Poison.encode!()
	"https://item.taobao.com/item.htm?id=#{x.id}"
	|> fetch_dec.()
	|> Api.Parsing.parse(json)
	|> Api.Utils.to_atoms()
end)

#data = File.read!(fname) |> Poison.decode!() |> Api.Utils.to_atoms()
data |> Enum.at(0)

put_go = fn(x) -> IO.puts(inspect(x)); x end
#|> put_go.()

(dict = ["/vagrant/attrs.tsv", "/vagrant/titles.tsv"]
|> Enum.map(fn(file) -> file
	|> File.read!()
	|> String.split("\r\n")
	|> Enum.filter(fn(x) -> String.length(x) > 0 end)
	|> Enum.map(fn(line) -> line
		|> String.split("\t")
		|> List.to_tuple()
	end)
	|> Map.new()
end)
|> Enum.reduce(&Map.merge/2))

translate = fn(str) ->
	case Map.fetch(dict, str) do
		{:ok, word} -> word
		:error -> str
	end
end

fixed = data |> Enum.map(
	Elins.editVals(%{
		url: fn(_x, item) -> "https://item.taobao.com/item.htm?id=#{item.itemid}" end,
		title: fn(str) -> String.strip(str) end |> translate.(),
		price: fn(str) -> (multiplier * String.to_float(str)) end,  # |> Float.to_string([decimals: 2, compact: false])
		gallery: fn(tbl) -> tbl |> Enum.map(fn(x) -> "http:" <> x.url |> String.replace("50x50", "400x400") end) end,
		attrs: fn(attrs) -> attrs |> Enum.map(fn(x) -> {
			# key
			String.split(x.kv, ":") |> Enum.at(0) |> translate.(),
			# val
			case x.val |> String.split(" ") do
				[v] -> v |> translate.()
				arr -> arr |> Enum.map(translate)
			end
		} end) |> Enum.into(%{}) end,
		descUrl: fn(_x, item) -> case Regex.run(~r/'http:'\ \?\ '([^\']*)'/, item.descUrlScript) do
		  [_match, url] -> "http:" <> url
		end end,
		desc: fn(_x, item) -> item.descUrl |> fetch_dec.() end
	})
)

data |> Enum.at(0)
fixed |> Enum.at(0)
#translated |> Enum.at(0)
data |> Enum.at(0) |> Elins.setVals(%{ gallery: nil, descUrlScript: nil }).()
fixed |> Enum.at(0) |> Elins.setVals(%{ gallery: nil, descUrlScript: nil, descUrl: nil, desc: nil }).()

fixed = fixed |> Enum.map(
	Elins.editVals(%{
#		title: translate
#		attrs: fn(attrs) -> attrs |> Enum.map(fn({k,v}) -> {
#			k |> translate.(),
#			case v do
#				arr when is_list(arr) -> arr |> Enum.map(translate)
#				v -> v |> translate.()
#			end
#		} end) |> Enum.into(%{}) end
	})
)

find = fn(str) -> fixed
	|> Enum.filter(fn(x) -> x
		|> inspect()
		|> String.contains?(str)
	end)
	|> Enum.at(0)
end
# |> Map.fetch!(:url)
# fn(str) -> fixed |> Enum.filter(fn(x) -> x |> inspect() |> String.contains?(str) end) |> Enum.map(fn(x) -> x.attrs |> Enum.filter(fn({k,v}) -> inspect({k,v}) |> String.contains?(str) end) end) end.("")

save = Fun.curry(&File.write!/2)

data
|> Poison.encode!()
|> save.(fname).()

fixed
|> Poison.encode!()
|> save.("/vagrant/fixed.json").()

fixed
|> Enum.map(&(&1.title))
|> Enum.join("\n")
|> save.("/vagrant/titles.txt").()

fixed
|> Enum.map(&(&1.desc))
|> Enum.join("\n")
|> save.("/vagrant/descs.txt").()

uniq = Fun.flow([&MapSet.new/1, &MapSet.to_list/1])
fixed
|> Enum.map(&(&1.attrs))
|> Enum.map(&(Map.keys(&1) ++ Map.values(&1)))
|> List.flatten()
|> uniq.()
|> Enum.sort()
|> Enum.join("\n")
|> save.("/vagrant/attrs.txt").()
	# Agent:

	# static:
	# declare
	defmodule Mix.TasksServer do
	def start_link do
	Agent.start_link(fn -> HashSet.new end, name: __MODULE__)
	end
	def put_task(task, project) do
	item = {task, project}
	Agent.update(__MODULE__, &Set.put(&1, item))
	end
	end
	# use
	Mix.TasksServer.put_task(task, project)
	# dynamic: just make those `__MODULE__`s an extra name variable, then fetch one:
	agent = Mix.TasksServer.start_link
	# dynamic implementation:
	defmodule Chat.Socket do

	def start_link(ws, name) do
	Agent.start_link(ws, name: name)
	end

	def get(name) do
	Agent.get(name, &(&1))
	end

	end
	# keep around the agent's name from creation
	worker(KV.Registry, [:my_manager_name, [name: :my_worker_name]])
	# then call, plugging it into the added param
	Mix.TasksServer.put_task(worker, task, project)
	# do expensive processing outside the agent:
	Agent.get(agent, &(&1)) \|> do_something_expensive()


	# GenServer:

	defmodule Calculator do
	use ExActor.GenServer
	defstart start_link, do: initial_state(0)
	defcast inc(x), state: state, do: new_state(state + x)
	defcall get, state: state, do: reply(state)
	defcast stop, do: stop_server(:normal)
	end
	# call dynamic:
	{:ok, pid} = Calculator.start_link
	Calculator.inc(pid, 10)
	# call static:
	worker(Calculator, [], id: :my_calc)
	Calculator.inc(:my_calc, 10)


	# GenEvent

	defmodule LoggerHandler do
	use GenEvent
	def handle_event({:log, x}, state) do
	{:ok, [x\|state]}
	end
	def handle_call(:messages, state) do
	{:ok, Enum.reverse(state), []}
	end
	end
	# use
	{:ok, pid} = GenEvent.start_link()
	GenEvent.add_handler(pid, LoggerHandler, [])
	GenEvent.notify(pid, {:log, 1})
	GenEvent.notify(pid, {:log, 2})
	GenEvent.call(pid, LoggerHandler, :messages)
	#=> [1, 2]
	GenEvent.call(pid, LoggerHandler, :messages)
	#=> []


	# Tasks:

	# tell:
	Task.start_link(fn -> IO.puts "ok" end)
	# ask:
	task = Task.async(fn -> do_some_work() end)
	res = do_some_other_work()
	res + Task.await(task)
	# supervised, loop:
	worker(Task, [fn -> IO.puts "ok" end])
	# supervised, once:
	worker(Task, [fn -> IO.puts "ok" end], restart: :transient)
	# register supervisor class:
	supervisor(Task.Supervisor, [[name: MyApp.TaskSupervisor]])
	# then dynamicall tell:
	Task.Supervisor.start_child(MyApp.TaskSupervisor, fn ->
	# Do something
	end)
	# or dynamicall ask:
	Task.Supervisor.async(MyApp.TaskSupervisor, fn ->
	# Do something
	end) \|> Task.await()



	# RabbitMQ drop-in (persistent queue by domain) for record structures rather than having to deal with serialization crap:


	# read(key): lookup key on index
	# read_at(key, index): lookup for given secondary index
	# first(): first key in table -- deterministic if using autoincrement on the key (default first) column in a table with type: :ordered_set
	# delete(key): delete row with a given key value -- if only this were row count I could just plug in 1.

	# QLC: pop first
	cursor = lc x inlist :mnesia.table(:Urls), do: x
	\|> :qlc.q() #{:unique, true}
	\|> :qlc.cursor()
	result = :qlc.next_answers(cursor, 1)
	:ok = :qlc.delete_cursor(cursor)
	result

	# Amnesia: pop first
	# deftable Urls, [{ :id, autoincrement }, :url], type: :ordered_set
	key = Urls.first()
	result = Urls.read(key)
	:ok = Urls.delete(key)
	result

	# Amnesia: select, which was suggested in thread
	selection = Urls.select(1, {'_', '$0'}) # or nil
	{[result], continuation} = selection
	# selection \|> Amnesia.Selection.values \|> Enum.each &IO.puts(&1.content)
	key = result \|> List.head() #List?
	:ok = Urls.delete(key)
	result

	# Amnesia: read-iterate?

	# QLC: read-iterate?

	# RabbitMQ: Erlang queue (in/out) per domain on Amnesia
	deftable Queues, [:domain, :queue]
	defcast push(item), state: domain do
	queue = get_queue(domain)
	q2 = :queue.in(item, queue)
	set_queue(domain, q2)
	# q2 = update_queue(domain, fn(q) -> q2 = :queue.in(item, q); {q2, q2} end)
	new_state q2
	end
	defcall pop, state: domain do
	queue = get_queue(domain)
	{out, q2} = :queue.out(queue) # either :empty or {:value, item}
	set_queue(domain, q2)
	# {out, q2} = update_queue(domain, fn(q) -> {_, q2} = res = :queue.out(queue); {res, q2} end)
	set_and_reply(q2, out)
	end
	#defp get_queue(domain) do
	# Amnesia.transaction do
	# Queue.read(domain)
	# end
	#end
	defp get_queue(domain), do: Amnesia.transaction, do: Queues.read(domain).queue
	defp set_queue(domain, queue), do: Amnesia.transaction, do: Queues.write(%Queue{ domain: domain, queue: queue })
	#defp update_queue(domain, fun) do
	# q1 = get_queue(domain)
	# {res, q2} = fun.(q1)
	# set_queue(domain, q2)
	# res
	#end
	Api.QueueStore.push(:queues, item)
	item = Api.QueueStore.pop(:queues)


	# ditto in DETS while keeping {domain, queue} in the GenServer too?
	# def init(), do: _tbl = :ets.new(:Queue, [:public, :named_table])
	defcast push, state: {domain, queue} do
	q2 = :queue.in(item, queue)
	dets_update(domain, q2)
	new_state {domain, q2}
	end
	defcall pop, state: {domain, queue} do
	{out, q2} = :queue.out(queue) # out either :empty or {:value, item}
	dets_update(domain, q2)
	set_and_reply({domain, q2}, out)
	end
	defp dets_update(domain, queue), do: :dets.update_element(:Queues, domain, {2, queue})
	# read: lookup/2
	# seems like double work (and huge copying overhead of immutable copying), more elegant if done once within Amnesia?

	# where are the update functions in (d)ets/(a)mnesia?
	# (d)ets:
	# - update_element(Tab, Key, ElementSpec :: {Pos, Value}) -> boolean()
	# - update_counter(Tab, Key, UpdateOp) -> Result
	# - update_counter(Tab, Key, UpdateOp, Default) -> Result
	# (a)mnesia:
	# - merge-update multiple properties (set tables, not bag):
	Amnesia.transaction do
	new = %{email: "john@gmail.com"}
	User.read(1) \|> Map.merge(new) \|> User.write
	# %{User.read(1) \| email: "john@gmail.com"} \|> User.write
	end
	# - with lenses (for updating one deep property):
	import Elins
	Amnesia.transaction do
	User.read(1) \|> set [:name], "John" \|> User.write
	# User.read(1) \|> edit [:name], fn(s) -> String.upcase(s) end \|> User.write
	end
	# - overwrite on key (won't do partial maps for merge-update):
	Amnesia.transaction do
	%User{id: 1, name: "John", email: "john@example.com"} \|> User.write
	end
	# - dirty_update_counter(Tab, Key, Incr) -> NewVal \| exit({aborted, Reason})
	# - http://stackoverflow.com/questions/10821930/erlang-dynamic-record-editing
	# - ets(Fun, [, Args]) -> ResultOfFun \| exit(Reason)

	# could (a)mnesia even do nested records? what of (d)ets?
	# - (D)ETS really does no schema whatsoever, just insert tuples and get them by key.
	# - mnesia: does key-name schemas
	:mnesia.create_table(:Bar, [
	{:record_name, :MsgIn},
	{:attributes, record_info(:fields, :MsgIn)}
	])
	fun = fn() -> :mnesia.write(:Foo, %{key: 1, val: "foo"}, :write) end
	:mnesia.transaction(fun)
	# ^ can't use in terminal cuz record_info is fake, only works during compilation (haven't seen Elixir equivalents aside from Amnesia itself). Anyway, it seems mnesia itself only enforces the structure of the top-level struct, not its values.
	# - Amnesia also does optional type annotations
	# Amnesia: do operations inside? - not really inside, but see above.
	# Agents?: Agent.get(agent, fn state -> expensive(state) end)

	use Amnesia
	def pop() do
	#:mnesia.start()
	#:mnesia.transaction(
	Amnesia.transaction do
	#fn() ->
	#cursor =
	## for x in :mnesia.table(planemo), do: x
	#lc x inlist :mnesia.table(planemo), do: x
	#\|> :qlc.q() #{:unique, true}
	#\|> :qlc.cursor()
	result =
	#:qlc.next_answers(cursor, 1)
	Urls.first(...)
	:ok =
	#:qlc.delete_cursor(cursor)
	Urls.delete!(...)
	result
	#end
	#)
	end
	end
	fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])

	items = Enum.to_list(1..87) \|> Enum.map(fn(page) ->
	url = "https://www.alibaba.com/products/F0/S355J2H/----------------------50/#{page}.html"
	data = fetch_dec.(url)
	case Regex.run(~r/page\.setPageData\((.*?)\);/, data) do
	[_match, json] -> json
	\|> Poison.decode!()
	#_ -> []
	end
	end) \|> List.flatten()

	suppliers = items \|> Enum.map(fn(meta) ->
	case meta \|> Map.fetch!("normalList") do
	nil -> []
	list -> list \|> Enum.map(fn(item) ->
	item \|> Map.fetch!("supplierName")
	end)
	end
	end) \|> List.flatten()

	fname = "/vagrant/S355J2H.json"
	save = Fun.curry(&File.write!/2)

	(items
	\|> Poison.encode!()
	\|> save.(fname).())

	(suppliers
	\|> Poison.encode!()
	\|> save.("/vagrant/S355J2H_suppliers.json").())
	fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])

	items = Enum.to_list(0..7) \|> Enum.map(fn(page) ->
	skip = 48 * page
	url = "https://s.taobao.com/search?q=%E6%89%8B%E6%9C%BA&cps=yes&ppath=20573%3A102132332%3B446%3A370632889%3B446%3A569762281%3B446%3A591510374%3B33187%3A549512232%3B33187%3A45852278%3B33187%3A11138181%3B33187%3A22448952%3B933%3A145161329%3B933%3A373042500%3B933%3A33610188%3B933%3A145161342%3B933%3A33610187%3B933%3A33610189%3B933%3A78284%3B933%3A33610184%3B933%3A6577325%3B933%3A170384958%3B12304004%3A21401%3B12304004%3A21971%3B12304004%3A21402%3B10004%3A649836912%3B10004%3A591194881%3B10004%3A569662099%3B10004%3A686948029%3B10004%3A695920003&app=vproduct&vlist=1&cat=1512&s=#{skip}"
	data = fetch_dec.(url)
	prices = case Regex.run(~r/"spus":(.*?)},"export"/, data) do
	[_match, json] -> json
	\|> Poison.decode!()
	\|> Enum.map(fn(x) -> x
	\|> Map.fetch!("price")
	\|> String.to_integer()
	end)
	#_ -> []
	end
	arr = case Regex.run(~r/vertical_from_pos":(.*?),"at/, data) do
	[_match, json] -> json
	\|> Poison.decode!()
	\|> Enum.map(fn(s) -> spu = s
	\|> String.split("_")
	\|> List.last()
	"https://s.taobao.com/api?m=spudetail&detail_tab=params&pspuid=#{spu}"
	\|> fetch_dec.()
	\|> Poison.decode!()
	\|> Api.Utils.to_atoms()
	end)
	#_ -> []
	end
	merged = List.zip([arr, prices])
	\|> Enum.map(fn({stats, price}) -> stats
	\|> Map.put(:price, price)
	\|> Elins.edit([:params], fn(lst) -> lst
	\|> Enum.into(%{}, fn(map) -> {
	Map.fetch!(map, :pname),
	Map.fetch!(map, :pvalue)
	} end)
	end).()
	end)
	end) \|> List.flatten()

	#modded = items \|> Enum.map(fn(x) -> x
	#end)

	fname = "/vagrant/phones.json"
	save = Fun.curry(&File.write!/2)

	(items
	\|> Poison.encode!()
	\|> save.(fname).())
	fetch_dec = Fun.flow([&(Api.Utils.fetch!(&1, [], [ follow_redirects: true, timeout: 10_000 ])), &Api.Utils.decode/1, &(&1.body)])
	multiplier = 1.00
	fname = "/vagrant/shoes.json"

	data = Enum.to_list(1..11) \|> Enum.map(fn(page) ->
	json = %{ "items(dl)": [ %{ id: "dl@data-id", pic: "img@src", "title": ".item-name", "price": ".c-price", "sold": ".sale-num", comments: ".title span" } ] } \|> Poison.encode!()
	"https://602307356.taobao.com/i/asynSearch.htm?mid=w-13113220629-0&pageNo=#{page}"
	\|> fetch_dec.()
	\|> Api.Utils.de_jsonp()
	\|> Api.Parsing.parse(json)
	\|> Api.Utils.to_atoms()
	\|> Map.fetch!(:items)
	end) \|> List.flatten() \|> Enum.map(fn(x) ->
	json = %{
	title: "h3",
	price: "em.tb-rmb-num",
	spm: "#J_Pine@data-spm",
	sellerid: "#J_Pine@data-sellerid",
	catid: "#J_Pine@data-catid",
	rootid: "#J_Pine@data-rootid",
	itemid: "#J_Pine@data-itemid",
	"attrs(ul.attributes-list li)": [ %{ val: "li@title", kv: "li" } ],
	descUrlScript: "script",
	"gallery(ul#J_UlThumb li)": [ %{ url: "img@data-src" } ]
	} \|> Poison.encode!()
	"https://item.taobao.com/item.htm?id=#{x.id}"
	\|> fetch_dec.()
	\|> Api.Parsing.parse(json)
	\|> Api.Utils.to_atoms()
	end)

	#data = File.read!(fname) \|> Poison.decode!() \|> Api.Utils.to_atoms()
	data \|> Enum.at(0)

	put_go = fn(x) -> IO.puts(inspect(x)); x end
	#\|> put_go.()

	(dict = ["/vagrant/attrs.tsv", "/vagrant/titles.tsv"]
	\|> Enum.map(fn(file) -> file
	\|> File.read!()
	\|> String.split("\r\n")
	\|> Enum.filter(fn(x) -> String.length(x) > 0 end)
	\|> Enum.map(fn(line) -> line
	\|> String.split("\t")
	\|> List.to_tuple()
	end)
	\|> Map.new()
	end)
	\|> Enum.reduce(&Map.merge/2))

	translate = fn(str) ->
	case Map.fetch(dict, str) do
	{:ok, word} -> word
	:error -> str
	end
	end

	fixed = data \|> Enum.map(
	Elins.editVals(%{
	url: fn(_x, item) -> "https://item.taobao.com/item.htm?id=#{item.itemid}" end,
	title: fn(str) -> String.strip(str) end \|> translate.(),
	price: fn(str) -> (multiplier * String.to_float(str)) end, # \|> Float.to_string([decimals: 2, compact: false])
	gallery: fn(tbl) -> tbl \|> Enum.map(fn(x) -> "http:" <> x.url \|> String.replace("50x50", "400x400") end) end,
	attrs: fn(attrs) -> attrs \|> Enum.map(fn(x) -> {
	# key
	String.split(x.kv, ":") \|> Enum.at(0) \|> translate.(),
	# val
	case x.val \|> String.split(" ") do
	[v] -> v \|> translate.()
	arr -> arr \|> Enum.map(translate)
	end
	} end) \|> Enum.into(%{}) end,
	descUrl: fn(_x, item) -> case Regex.run(~r/'http:'\ \?\ '([^\']*)'/, item.descUrlScript) do
	[_match, url] -> "http:" <> url
	end end,
	desc: fn(_x, item) -> item.descUrl \|> fetch_dec.() end
	})
	)

	data \|> Enum.at(0)
	fixed \|> Enum.at(0)
	#translated \|> Enum.at(0)
	data \|> Enum.at(0) \|> Elins.setVals(%{ gallery: nil, descUrlScript: nil }).()
	fixed \|> Enum.at(0) \|> Elins.setVals(%{ gallery: nil, descUrlScript: nil, descUrl: nil, desc: nil }).()

	fixed = fixed \|> Enum.map(
	Elins.editVals(%{
	# title: translate
	# attrs: fn(attrs) -> attrs \|> Enum.map(fn({k,v}) -> {
	# k \|> translate.(),
	# case v do
	# arr when is_list(arr) -> arr \|> Enum.map(translate)
	# v -> v \|> translate.()
	# end
	# } end) \|> Enum.into(%{}) end
	})
	)

	find = fn(str) -> fixed
	\|> Enum.filter(fn(x) -> x
	\|> inspect()
	\|> String.contains?(str)
	end)
	\|> Enum.at(0)
	end
	# \|> Map.fetch!(:url)
	# fn(str) -> fixed \|> Enum.filter(fn(x) -> x \|> inspect() \|> String.contains?(str) end) \|> Enum.map(fn(x) -> x.attrs \|> Enum.filter(fn({k,v}) -> inspect({k,v}) \|> String.contains?(str) end) end) end.("")

	save = Fun.curry(&File.write!/2)

	data
	\|> Poison.encode!()
	\|> save.(fname).()

	fixed
	\|> Poison.encode!()
	\|> save.("/vagrant/fixed.json").()

	fixed
	\|> Enum.map(&(&1.title))
	\|> Enum.join("\n")
	\|> save.("/vagrant/titles.txt").()

	fixed
	\|> Enum.map(&(&1.desc))
	\|> Enum.join("\n")
	\|> save.("/vagrant/descs.txt").()

	uniq = Fun.flow([&MapSet.new/1, &MapSet.to_list/1])
	fixed
	\|> Enum.map(&(&1.attrs))
	\|> Enum.map(&(Map.keys(&1) ++ Map.values(&1)))
	\|> List.flatten()
	\|> uniq.()
	\|> Enum.sort()
	\|> Enum.join("\n")
	\|> save.("/vagrant/attrs.txt").()