Skip to content

Instantly share code, notes, and snippets.

@rileydutton
Created June 13, 2011 22:51
Show Gist options
  • Save rileydutton/1023932 to your computer and use it in GitHub Desktop.
Save rileydutton/1023932 to your computer and use it in GitHub Desktop.
Website Scraper
<?php
include 'phpQuery-onefile.php';
$file = 'http://www.usbr.gov/lc/region/g4000/riverdata/gage-month-table.cfm?GAGE=3'; // see below for source
// loads the file
// basically think of your php script as a regular HTML page running client side with jQuery. This loads whatever file you want to be the current page
phpQuery::newDocumentFileHTML($file);
//Work our way through the DOM tree.
$lastTr = pq("table tr:last-child")->prev("tr")->prev("tr");
$testTd = $lastTr[0]->find("td");
//Is the last row empty? If so use the row before it.
if($testTd[1]->text() == "") {
$lastTr = $lastTr->prev('tr');
}
//Get the td's inside the table row.
$infoTd = $lastTr[0]->find("td");
//We have to do it this way for some reason (instead of just accessing the indexes directly?)
foreach($infoTd as $k=>$td) {
$content = trim(pq($td)->text());
if($k == 1) {
$elevation = $content;
}
else if($k == 3) {
$pctfull = $content;
}
else if($k == 4) {
$release = $content;
}
}
//$elevation = trim(pq($infoTd[1])->text());
//$pctfull = trim(pq($infoTd[3])->text());
//$release = trim(pq($infoTd[4])->text());
$avgTr = pq("table tr:last-child")->prev("tr");
$avgTd = $avgTr[0]->find("th");
foreach($avgTd as $k=>$td) {
$content = trim(pq($td)->text());
if($k == 1) {
$avgelevation = $content;
}
else if($k == 4) {
$avgrelease = $content;
}
}
print("Elevation: " . $elevation);
print("Percent Full" . $pctfull);
print("Release" . $release);
print("Avg. Elevation" . $avgelevation);
print("Avg. Release" . $avgrelease);
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment