Skip to content

Instantly share code, notes, and snippets.

@1player
Created September 30, 2015 15:32
Show Gist options
  • Save 1player/d3bc6f35b0ac9d0c00ed to your computer and use it in GitHub Desktop.
Save 1player/d3bc6f35b0ac9d0c00ed to your computer and use it in GitHub Desktop.
defmodule Crawlerex do
@url "http://lib.colostate.edu/wildlife/atoz.php?letter=ALL"
@fname "/tmp/animals.html"
def download do
%HTTPoison.Response{body: body} = HTTPoison.get! @url
File.write! @fname, body
end
def run do
@fname
|> File.read!
|> parse_animals
|> Stream.map(&String.split/1)
|> Stream.map(&filter_animal/1)
|> Stream.filter(&(&1))
|> Stream.uniq
|> Enum.map(&IO.puts/1)
end
defp parse_animals(body) do
Floki.find(body, "td > a")
|> Stream.with_index
|> Stream.filter(fn {_, i} -> rem(i, 2) == 0 end)
|> Stream.map(fn {{"a", [{"href", _}], [name]}, _} -> name end)
end
defp filter_animal([name]), do: name
defp filter_animal([name | _rest] = fullname) do
if String.ends_with?(name, ",") do
String.rstrip(name, ?,)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment