Skip to content

Instantly share code, notes, and snippets.

@by77er
Last active January 24, 2020 01:54
Show Gist options
  • Save by77er/301d851623e1630b7be38357f743eb4d to your computer and use it in GitHub Desktop.
Save by77er/301d851623e1630b7be38357f743eb4d to your computer and use it in GitHub Desktop.
sequentially scrape DeviantArt for Wix image URLs (Obsolete)
defmodule DA.API do
def main(args) do
# prepare :inets
setup()
# arg validation
{named, unnamed, _} = OptionParser.parse(args, aliases: [o: :out], strict: [out: :string])
{first, last} = case unnamed do
[f, l] -> {f, l}
_ -> print_help()
end
# convert args to ints
{first, last} = case {Integer.parse(first), Integer.parse(last)} do
{{f, ""}, {l, ""}} when f >= 0 and f < l -> {f, l}
_ -> print_help()
end
IO.puts("[*] Scraping from ID #{Integer.to_charlist(first, 36)} to #{Integer.to_charlist(last, 36)}...")
# concurrent tasks
num_tasks = System.schedulers_online() * 20
# process all the things
results = first..last
|> Task.async_stream(&DA.API.do_request/1, max_concurrency: num_tasks, on_timeout: :kill_task)
|> Stream.map(&DA.API.sort_result/1)
|> Stream.filter(&(&1 != :error))
|> Enum.to_list()
IO.puts("[*] Finished scraping")
# output
case named do
# output to file
[ out: filename ] -> {:ok, file} = File.open(filename, [:write])
results |> Enum.each(&(IO.puts(file, &1)))
File.close(file)
IO.puts("[*] Output written to " <> filename)
# output to stdout
_ -> results |> Enum.each(&(IO.puts("[>] " <> &1)))
end
end
def print_help do
IO.puts("Usage: elixir da_scrape.exs <start-id> <end-id (> start-id)> [--out <filename>]")
System.halt(1)
end
def setup do
:inets.start()
end
def do_request(i) do
url = "http://orig01.deviantart.net/1234/-d#{Integer.to_charlist(i, 36)}.png"
case :httpc.request(:get, {String.to_charlist(url), []}, [{:autoredirect, false}], []) do
{:ok, {{_version, 301, _reason}, headers, _body}} -> {url, 301, elem(List.keyfind(headers, 'location', 0), 1)}
{:ok, {{_version, 404, _reason}, _headers, _body}} -> {url, 404}
_ -> :error
end
end
def sort_result(res) do
case res do
{:error, _} -> :error
{:ok, :error} -> :error
{:ok, {url, 301, redirect}} -> "#{url} -> 301 -> #{redirect}"
{:ok, {url, 404}} -> "#{url} -> 404"
_ -> :error
end
end
end
DA.API.main(System.argv())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment