Skip to content

Instantly share code, notes, and snippets.

@zstix
Last active August 31, 2020 23:22
Show Gist options
  • Save zstix/cb29c4ac16a7c2b5e3bb85c76a019294 to your computer and use it in GitHub Desktop.
Save zstix/cb29c4ac16a7c2b5e3bb85c76a019294 to your computer and use it in GitHub Desktop.
A quick script to ping all of the site on a sitemap and check status codes
defmodule Crawl do
def process_resp({:ok, %{body: xml}}) do
XmlToMap.naive_map(xml)
end
def get_urls(xml) do
xml
|> Map.get("urlset")
|> Map.get("url")
|> Enum.map(&Map.get(&1, "loc"))
end
def update_url(url) do
url
|> String.replace("https://developer.newrelic.com", "localhost:8000")
|> Kernel.<>("/")
end
def get_page_status(url) do
url
|> HTTPoison.get()
|> case do
{:ok, %{status_code: code}} -> {code, url}
_ -> {:error, url}
end
end
def run do
"http://localhost:8000/sitemap.xml"
|> HTTPoison.get()
|> process_resp()
|> get_urls()
|> Enum.map(&update_url/1)
|> Enum.take(5) # For testing, just try the first 5
|> Task.async_stream(&get_page_status/1, max_concurrency: 5)
|> Enum.map(fn {:ok, result} -> result end)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment