Last active
June 27, 2016 12:55
-
-
Save eidge/02ea5672e9f5cbe63e5150b70eaa928c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Weather.NOAA.DirectoryListingHTMLParser do | |
@moduledoc """ | |
Implements utilities to parse NOAA's cycle directory at | |
http://nomads.ncep.noaa.gov/cgi-bin/filter_<model-name>.pl | |
""" | |
@doc """ | |
Takes an html input an returns a list of available cycles. | |
Args: | |
- html - The HTML input to be parsed, string | |
Returns a list of cycle maps (%{date: Timex.Date, cycle: integer}) | |
""" | |
def cycles(html) do | |
Regex.scan(~r/<a[^<]*>(gfs\.\d*)<\/a>/, html) | |
|> Enum.map(&parse_date_and_cycle/1) | |
end | |
defp parse_date_and_cycle(match) do | |
{date, cycle} = extract_date_and_cycle(match) | |
%{date: parse_date(date), cycle: parse_cycle(cycle)} | |
end | |
defp extract_date_and_cycle(match) do | |
match | |
|> List.last | |
|> String.replace("gfs.", "") | |
|> String.split_at(8) | |
end | |
defp parse_date(str) do | |
Timex.parse!(str, "{YYYY}{0M}{D}") |> Timex.to_date | |
end | |
defp parse_cycle(str) do | |
String.to_integer(str) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment