Last active
August 23, 2021 00:44
-
-
Save nattaylor/005e65f675bb20b21a9a to your computer and use it in GitHub Desktop.
Area Forecast Discussion parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Parse the NWS Area Forecast Discussion (AFD) | |
* | |
* Adds casing and formating to the AFD, which is published as an | |
* all uppercase `<pre>` block with hard line breaks and no formatting. | |
* | |
* Usage: <?php $afd = new AreaForecastDiscussion("BOX"); echo $afd->generateHTML(); ?> | |
*/ | |
class AreaForecastDiscussion { | |
const NEEDLE_META = "/^[0-9]{3}?$\\n^.*?$\\n^.*?$\\n\\n/m"; | |
const NEEDLE_HEADERS = "/^AREA FORECAST DISCUSSION?$\\n^.*?$\\n^.*?$\\n\\n/m"; | |
const NEEDLE_SYNOPSIS = "/^\\.SYNOPSIS\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_NEARTERM = "/^\\.NEAR TERM.*?\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_SHORTTERM = "/^\\.SHORT TERM.*?\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_LONGTERM = "/^\\.LONG TERM.*?\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_AVIATION = "/^\\.AVIATION.*?\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_MARINE = "/^\\.MARINE.*?\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_BOX = "/^\\.BOX WATCHES.*?\\.\\.\\.$.*?^&&$/sm"; | |
const NEEDLE_AUTHORS = "/^\\$\\$.*/ms"; | |
const DAYS = "/Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday/i"; | |
const SECTIONS = "META,HEADERS,SYNOPSIS,NEARTERM,SHORTTERM,LONGTERM,AVIATION,MARINE,BOX,AUTHORS"; | |
const AFD_BASEURL = 'http://forecast.weather.gov/product.php?product=AFD&format=TXT&version=1&site=%1$s&issuedby=%1$s'; | |
public $sections = array(); | |
public $meta = array(); | |
public $afd = ""; | |
public $location = ""; | |
function __construct($location = "BOX", $sectionsList = self::SECTIONS) { | |
$this->location = $location; | |
$this->afd = $this->getAfd($location); | |
$this->sections = $this->splitSections(explode(",", $sectionsList)); | |
} | |
private function getAfd($location) { | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, sprintf(self::AFD_BASEURL, $location)); | |
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.48 Safari/537.36"); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
$response = curl_exec($ch); | |
preg_match("/(<pre.*<\\/pre>)/s", $response, $matches); | |
return substr($matches[0], strpos($matches[0], "\n"), -6); | |
} | |
/** | |
* Split the plaintext AFD into sections as follows | |
* * Meta: | |
* .Tracking | |
* .Title | |
* .Authors | |
* * Sections: | |
* .SYNOPSIS | |
* .NEAR TERM | |
* .SHORT TERM | |
* .LONG TERM | |
* .AVIATION | |
* .MARINE | |
* .BOX WATCHES | |
* | |
* @param array $sectionsList list of sections to extract | |
* @return array [description] | |
*/ | |
private function splitSections($sectionsList) { | |
$sections = array(); | |
foreach ($sectionsList as $section) { | |
//Stop trying to be fancy, you don't even know what this does | |
try { | |
preg_match(constant("self::NEEDLE_$section"), $this->afd, $matches); | |
} catch (Exception $e) { | |
//do something here | |
} | |
if(count($matches) == 0) { | |
trigger_error ( "Section \"$section\" not found." ); | |
break; | |
} | |
//TODO: Make this less fragile (e.g. magic #3) | |
$isMeta = ( $section == 'META' || $section == 'HEADERS' || $section == 'AUTHORS' ) ? TRUE : FALSE; | |
if($isMeta) { | |
$this->meta[strtolower($section)] = $matches[0]; | |
} else { | |
$heading = substr($matches[0], 0, strpos($matches[0], "\n")); | |
$text = strtolower(substr($matches[0], strpos($matches[0], "\n")+1,-3)); | |
$sections[strtolower($section)]["heading"] = $heading; | |
$sections[strtolower($section)]["text"] = $text; | |
} | |
} | |
return $sections; | |
} | |
public function generateHTML() { | |
$html = ""; | |
foreach($this->sections as $section) { | |
$heading = $this->formatSectionHeading($section['heading']); | |
$text = $this->formatSectionText($section['text']); | |
$html .= "<h2>$heading</h2>\n"; | |
$html .= "<p>$text</p>\n\n"; | |
} | |
return $html; | |
} | |
private function formatSectionHeading($heading) { | |
//Remove dots | |
$heading = preg_replace("/^\./", "", preg_replace("/\\.\\.\\.$/", "", $heading)); | |
//Handle slashes e.g. TITLE /TIME THROUGH TIME/ | |
if(preg_match("/BOX/",$heading) == 0 && preg_match_all("/\\//", $heading) == 2) { | |
$heading = preg_replace("/\\//", "<br />", $heading, 1); | |
$heading = preg_replace("/\\//", "", $heading); | |
} | |
return ucwords(strtolower($heading)); | |
} | |
private function formatSectionText($text) { | |
//Casing | |
$text = ucfirst($text); | |
$text = preg_replace_callback("/^\D|\n\n\D/", function($matches) {return strtoupper($matches[0]);}, $text); | |
$text = preg_replace_callback("/^\n\D/", function($matches) {return strtoupper($matches[0]);}, $text); | |
$text = preg_replace_callback("/((\.[a-z])|(\.\ [a-z]))/", function($matches) {return strtoupper($matches[0]);}, $text); | |
$text = preg_replace_callback(self::DAYS, function($matches) {return ucfirst($matches[0]);}, $text); | |
//Breaks | |
$text = preg_replace( "/\n\n/", "<br /><br />", $text); | |
$text = preg_replace( "/\\.\\.\\.$/m", "...<br /><br />", $text); | |
$text = preg_replace( "/confidence\./", "confidence.<br /><br />", $text); | |
return $text; | |
} | |
} | |
$afd = new AreaForecastDiscussion("BOX"); | |
?> | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>AFD</title> | |
<style> | |
body { | |
margin: 0 auto; | |
max-width: 30em; | |
font-size: 1.2rem; | |
line-height: 1.8rem; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Area Forecast Discussion</h1> | |
<p><a href="http://forecast.weather.gov/product.php?product=AFD&format=TXT&version=1&site=BOX&issuedby=BOX">From the NWS</a>.</p> | |
<?php echo $afd->generateHTML(); ?> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment