Skip to content

Instantly share code, notes, and snippets.

@michalmuskala
Last active April 1, 2021 11:25
Show Gist options
  • Save michalmuskala/1a3729074f31d9957f51 to your computer and use it in GitHub Desktop.
Save michalmuskala/1a3729074f31d9957f51 to your computer and use it in GitHub Desktop.
Parallel downloads in Elixir. Code for the post http://michal.muskala.eu/2015/08/06/parallel-downloads-in-elixir.html
#!/usr/bin/env elixir
defmodule Committer do
defstruct [:name, :email]
def list(repo) do
repo
|> from_repo
|> Stream.unfold(fn str ->
case String.split(str, "\n", parts: 2, trim: true) do
[] -> nil
[value] -> {value, ""}
list -> List.to_tuple(list)
end
end)
|> Stream.map(&String.split(&1, "|", parts: 2))
|> Stream.map(&Enum.zip([:name, :email], &1))
|> Stream.map(&struct(Committer, &1))
|> Stream.uniq(&(&1.email))
end
def fetch_gravatar(%Committer{email: email}, format \\ :png) do
request = {gravatar_url(email, format), []}
http_opts = [timeout: 5000]
opts = [body_format: :binary, full_result: false]
case :httpc.request(:get, request, http_opts, opts) do
{:ok, {200, body}} ->
{:ok, body}
{:ok, {num, _}} ->
{:error, "response code #{num}"}
{:error, _} = error ->
error
end
end
@base_url "http://www.gravatar.com/avatar/"
@url_params "?d=identicon&s=200"
defp gravatar_url(email, format) do
'#{@base_url}#{email_hash(email)}.#{format}#{@url_params}'
end
defp email_hash(email) do
email
|> String.strip
|> String.downcase
|> hash
|> Base.encode16(case: :lower)
end
defp hash(data), do: :crypto.hash(:md5, data)
defp from_repo(repo) do
args = ["log", ~S{--pretty=format:%an|%ae}, "--encoding=UTF-8"]
case System.cmd("git", args, cd: repo) do
{committers, 0} ->
committers
{_, code} ->
raise RuntimeError, "Getting commiters failed with code #{code}"
end
end
end
defmodule Download do
require Logger
def run(args) do
Application.ensure_all_started(:inets)
{repo, out} = parse_args(args)
File.mkdir_p!(out)
File.cd!(out, fn ->
repo
|> Committer.list
|> Stream.chunk(50, 50, [])
|> Stream.each(&fetch_and_save_batch/1)
|> Stream.run
end)
end
defp fetch_and_save_batch(committers) do
committers
|> Enum.map(&Task.async(fn -> fetch_and_save(&1) end))
|> Enum.map(&Task.await(&1, 10000))
end
defp fetch_and_save(%Committer{name: name} = committer) do
case Committer.fetch_gravatar(committer, :png) do
{:ok, image} ->
File.write!("#{name}.png", image)
Logger.info "downloaded gravatar for #{name}"
{:error, reason} ->
Logger.error "failed to download gravatar for #{name}, because: #{inspect reason}"
end
end
defp parse_args(args) do
case OptionParser.parse(args) do
{_, [repo, out], _} ->
{repo, out}
_ ->
IO.puts "Usage: download repository output_dir\n"
raise "Wrong arguments given to `download`"
end
end
end
Download.run(System.argv)
@mgwidmann
Copy link

The file doesn't actually need to be called "downloader.exs". As long as it's executable and you have the first line (as you do), then you can run as just "downloader" (like the usage error message says).

@meadsteve
Copy link

Firstly great blog post! Really enjoyed it.

One small change I might suggest is updating the unfold. I'd not used it before and the use of list to tuple threw me for a moment. How about the following instead to make the intent clearer for the post:

https://gist.github.com/meadsteve/d4b959877811b31dd7a5#file-downloader-exs-L9

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment