Created
May 16, 2011 12:01
-
-
Save omundy/974324 to your computer and use it in GitHub Desktop.
Advanced scraping demo with "regex" parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* Advanced scraping demo with "regex" parsing. Retrieves current | |
* weather in any city and colors the background accordingly. | |
* The math below for normalization could use some work. | |
* Owen Mundy Copyright 2011 GNU/GPL */ | |
?> | |
<html> | |
<head> | |
<style> | |
body { margin:20; font:1.0em/1.4em Arial, Helvetica, sans-serif; } | |
.text { font:10.0em/1.0em Arial, Helvetica, sans-serif; color:#000; font-weight:bold; } | |
.navlist { list-style:none; margin:0; position:absolute; top:20px; left:200px } | |
.navlist li { float:left; margin-right:10px; } | |
</style> | |
</head> | |
<body onLoad="document.f.q.focus();"> | |
<form method="GET" action="<?php print $_SERVER['PHP_SELF']; ?>" name="f"> | |
<input type="text" name="q" value="<?php print $_GET['q'] ?>" /> | |
<input type="submit" /> | |
</form> | |
<ul class="navlist"> | |
<li><a href="?q=anchorage+alaska">anchorage</a></li> | |
<li><a href="?q=toronto+canada">toronto</a></li> | |
<li><a href="?q=new+york+ny">nyc</a></li> | |
<li><a href="?q=london+uk">london</a></li> | |
<li><a href="?q=houston+texas">houston</a></li> | |
<li><a href="?q=linz+austria">linz</a></li> | |
<li><a href="?q=rome+italy">rome</a></li> | |
<li><a href="?q=cairo+egypt">cairo</a></li> | |
<li><a href="?q=new+delhi+india">new delhi</a></li> | |
<li><a href="?q=mars">mars</a></li> | |
</ul> | |
<?php | |
// make sure the form has been sent | |
if (isset($_GET['q'])) | |
{ | |
// get contents of url in an array | |
if ($str = file_get_contents('http://www.google.com/search?q=weather+in+' | |
. str_replace(" ","+",$_GET['q']))) | |
{ | |
// use regular expressions to extract only what we need... | |
// 1, 2, or 3 digits followed by any version of the degree symbol | |
$pattern = "/[0-9]{1,3}[º°]C/"; | |
// match the pattern with a C or with an F | |
if (preg_match_all($pattern, $str, $data) > 0) | |
{ | |
$scale = "C"; | |
} | |
else | |
{ | |
$pattern = "/[0-9]{1,3}[º°]F/"; | |
if (preg_match_all($pattern, $str, $data) > 0) | |
{ | |
$scale = "F"; | |
} | |
} | |
// remove html | |
$temp_str = strip_tags($data[0][0]); | |
// remove everything except numbers and points | |
$temp = ereg_replace("[^0-9..]", "", $temp_str); | |
if ($temp) | |
{ | |
// what is the scale? | |
if ($scale == "C"){ | |
// convert ºC to ºF | |
$tempc = $temp; | |
$tempf = ($temp*1.8)+32; | |
} | |
else if ($scale == "F") | |
{ | |
// convert ºF to ºC | |
$tempc = ($temp-32)/1.8; | |
$tempf = $temp; | |
} | |
// normalize the number | |
$color = round($tempf/140,1)*10; | |
// cool -> warm | |
// scale -20 to: 120 | |
$color_scale = array( | |
'0, 0,255', | |
'0,128,255', | |
'0,255,255', | |
'0,255,128', | |
'0,255,0', | |
'128,255,0', | |
'255,255,0', | |
'255,128,0', | |
'255, 0,0' | |
); | |
?> | |
<style> body { background:rgb(<?php print $color_scale[$color] ?>) }</style> | |
<div class="text"><?php print round($tempc,1) ."°C " ?></div> | |
<?php print round($tempf,1) ?>°F | |
<?php | |
} | |
else | |
{ | |
print "city not found"; | |
} | |
} | |
} | |
?> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment