Skip to content

Instantly share code, notes, and snippets.

@meadsteve
Forked from michalmuskala/downloader.exs
Last active August 29, 2015 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meadsteve/d4b959877811b31dd7a5 to your computer and use it in GitHub Desktop.
Save meadsteve/d4b959877811b31dd7a5 to your computer and use it in GitHub Desktop.
Parallel downloads in Elixir. Code for the post http://michal.muskala.eu/2015/08/06/parallel-downloads-in-elixir.html
#!/usr/bin/env elixir
defmodule Committer do
defstruct [:name, :email]
def list(repo) do
repo
|> from_repo
|> Stream.unfold(fn str ->
case String.split(str, "\n", parts: 2, trim: true) do
[] -> nil
[value] -> {value, ""}
[value, remaining_str] -> {value, remaining_str}
end
end)
|> Stream.map(&String.split(&1, "|", parts: 2))
|> Stream.map(&Enum.zip([:name, :email], &1))
|> Stream.map(&struct(Committer, &1))
|> Stream.uniq(&(&1.email))
end
def fetch_gravatar(%Committer{email: email}, format \\ :png) do
request = {gravatar_url(email, format), []}
http_opts = [timeout: 5000]
opts = [body_format: :binary, full_result: false]
case :httpc.request(:get, request, http_opts, opts) do
{:ok, {200, body}} ->
{:ok, body}
{:ok, {num, _}} ->
{:error, "response code #{num}"}
{:error, _} = error ->
error
end
end
@base_url "http://www.gravatar.com/avatar/"
@url_params "?d=identicon&s=200"
defp gravatar_url(email, format) do
'#{@base_url}#{email_hash(email)}.#{format}#{@url_params}'
end
defp email_hash(email) do
email
|> String.strip
|> String.downcase
|> hash
|> Base.encode16(case: :lower)
end
defp hash(data), do: :crypto.hash(:md5, data)
defp from_repo(repo) do
args = ["log", ~S{--pretty=format:%an|%ae}, "--encoding=UTF-8"]
case System.cmd("git", args, cd: repo) do
{committers, 0} ->
committers
{_, code} ->
raise RuntimeError, "Getting commiters failed with code #{code}"
end
end
end
defmodule Download do
require Logger
def run(args) do
Application.ensure_all_started(:inets)
{repo, out} = parse_args(args)
File.mkdir_p!(out)
File.cd!(out, fn ->
repo
|> Committer.list
|> Stream.chunk(50, 50, [])
|> Stream.each(&fetch_and_save_batch/1)
|> Stream.run
end)
end
defp fetch_and_save_batch(committers) do
committers
|> Enum.map(&Task.async(fn -> fetch_and_save(&1) end))
|> Enum.map(&Task.await(&1, 10000))
end
defp fetch_and_save(%Committer{name: name} = committer) do
case Committer.fetch_gravatar(committer, :png) do
{:ok, image} ->
File.write!("#{name}.png", image)
Logger.info "downloaded gravatar for #{name}"
{:error, reason} ->
Logger.error "failed to download gravatar for #{name}, because: #{inspect reason}"
end
end
defp parse_args(args) do
case OptionParser.parse(args) do
{_, [repo, out], _} ->
{repo, out}
_ ->
IO.puts "Usage: download repository output_dir\n"
raise "Wrong arguments given to `download`"
end
end
end
Download.run(System.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment