Skip to content

Instantly share code, notes, and snippets.

@iskeld
Created September 24, 2017 15:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iskeld/c8195ff5b813f50b3f44039485b763ef to your computer and use it in GitHub Desktop.
Save iskeld/c8195ff5b813f50b3f44039485b763ef to your computer and use it in GitHub Desktop.
Gildan Shirt imager fetcher in Elixir
defmodule ShirtFetcher do
import Meeseeks.CSS
@output_dir "woman_shirts"
def fetch(url) do
root = URI.parse(url)
document = HTTPoison.get!(url)
links = Meeseeks.all(document.body, css("div.itemWrap > a"))
urls = for anchor <- links, href = Meeseeks.Result.attr(anchor, "href"), do: URI.merge(root, href) |> to_string()
shirts = for {:ok, result} <- Task.async_stream(urls, &fetch_color_and_image/1), do: result
if !File.dir?(@output_dir), do: File.mkdir(@output_dir)
for shirt <- shirts, do: save_shirt(shirt)
end
defp save_shirt(%{url: url, color: color}) do
data = HTTPoison.get!(url)
path = Path.join([@output_dir, "#{color}.jpg"])
File.write(path, data.body)
end
defp fetch_color_and_image(url) do
raw_document = HTTPoison.get!(url)
document = Meeseeks.parse(raw_document.body)
%{url: fetch_image(document), color: fetch_color(document)}
end
defp fetch_image(document) do
Meeseeks.one(document, css("a[rel=\"prettyPhoto[pp_gal_col]\"]"))
|> Meeseeks.Result.attr("href")
end
defp fetch_color(document) do
document
|> Meeseeks.one(css("p.product-detail-info-item"))
|> Meeseeks.Result.text()
|> String.trim_leading("Kolor:")
|> String.trim()
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment