Skip to content

Instantly share code, notes, and snippets.

@iantbutler01
Last active February 5, 2020 06:53
Show Gist options
  • Save iantbutler01/65a1ecc1be9cc93953e3e7405c21327d to your computer and use it in GitHub Desktop.
Save iantbutler01/65a1ecc1be9cc93953e3e7405c21327d to your computer and use it in GitHub Desktop.
CrawlerExample.Worker do
alias CrawlerExample.Queue
def work(depth_limit \\ 5) do
case Queue.pop() do
:empty ->
:ok
{:value, [url, depth]} ->
unless depth > depth_limit do
case request_page(url) do
#You can add more robust error handling here, typically If the error is an http error
#then it means the url is likely not crawlable and not worth retrying but
#I usually break it out by code and atleast log the specific error.
{:error, _} ->
:ok
{:ok, body} ->
get_children_urls(url, body)
|> Enum.map(fn c_url ->
Queue.push([depth+1, c_url])
end)
:ok
end
else
:ok
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment