This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import colorama | |
import time | |
from PIL import Image, ImageSequence | |
from list2term import Lines | |
import argparse | |
import traceback | |
import logging | |
import sys | |
import os | |
from ascii_magic import AsciiArt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def work(depth_limit \\ 5) do | |
case Queue.pop() do | |
:empty -> | |
:ok | |
{:value, [url, depth]} -> | |
unless depth > depth_limit do | |
case request_page(url) do | |
#You can add more robust error handling here, typically If the error is an http error | |
#then it means the url is likely not crawlable and not worth retrying but | |
#I usually break it out by code and atleast log the specific error. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@impl true | |
def init(_init_arg) do | |
schedule_write_to_disk(@write_interval) | |
schedule_work(@work_interval) | |
{:ok, :queue.new} | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def handle_info(:schedule_work, state) do | |
unless :queue.is_empty(state) do | |
for _ <- 0..System.schedulers_online() do | |
Task.Supervisor.async_nolink(CrawlerExample.WorkerSupervisor, fn -> | |
CrawlerExample.Worker.work(@depth_limit) | |
end) | |
end | |
end | |
schedule_work(@work_interval) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CrawlerExample.Worker do | |
alias CrawlerExample.Queue | |
def work(depth_limit \\ 5) do | |
case Queue.pop() do | |
:empty -> | |
:ok | |
{:value, [url, depth]} -> | |
unless depth > depth_limit do | |
case request_page(url) do |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defp request_page(url) do | |
case HTTPoison.get(url) do | |
{:error, res} -> | |
{:error, res.reason} | |
{:ok, res} -> | |
case res do | |
%HTTPoison.Response{status_code: 404} -> {:error, 404} | |
%HTTPoison.Response{status_code: 401} -> {:error, 401} | |
%HTTPoison.Response{status_code: 403} -> {:error, 403} | |
%HTTPoison.Response{status_code: 200, body: body} -> {:ok, body} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defp get_children_urls(body) do | |
Floki.find(body, "div#readme") | |
|> Floki.find("div.Box-body") | |
|> Floki.find("a") | |
|> Floki.attribute("href") | |
|> Enum.filter(fn url -> | |
%URI{host: host, path: path} = URI.parse(url) | |
case host do | |
nil -> false | |
_ -> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CrawlerExample.Worker do | |
alias CrawlerExample.Queue | |
def work(depth_limit \\ 5) do | |
case Queue.pop() do | |
:empty -> | |
:ok | |
{:value, [url, depth]} -> | |
unless depth > depth_limit do | |
case request_page(url) do |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CrawlerExample.Queue do | |
use GenServer | |
#client | |
def start_link(opts \\ []) do | |
GenServer.start_link(__MODULE__, nil, opts) | |
end | |
def load_from_file(file_path) do | |
GenServer.cast(__MODULE__, {:load_state_from_disk, file_path}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CrawlerExample.Queue do | |
use GenServer | |
#client | |
def start_link(opts \\ []) do | |
GenServer.start_link(__MODULE__, nil, opts) | |
end | |
#server | |
@impl true |
NewerOlder