Skip to content

Instantly share code, notes, and snippets.

@sasezaki
Created March 27, 2009 11:17
Show Gist options
  • Save sasezaki/86644 to your computer and use it in GitHub Desktop.
Save sasezaki/86644 to your computer and use it in GitHub Desktop.
<?php
//php scraperHanko.php -a 03 -m 50810100
require_once 'Zend/Loader.php';
Zend_Loader::registerautoload();
try {
$getOpt = new Zend_Console_Getopt(array('mastercode|m=s' =>'mastercode', 'areacode|a=s' => 'arecode'));
$getOpt->parse();
$client = new Zend_Http_Client(getRequestUrlToGetCookie($getOpt->areacode, $getOpt->mastercode));
$client->setCookieJar();
$client->request();
Diggin_Scraper::setHttpClient($client);
$col = new Diggin_Scraper();
$col->process('//td[1]', 'hour => TEXT')
->process('//td[2]', 'pollen => TEXT')
->process('//td[3]', 'wd => TEXT')
->process('//td[4]', 'ws => TEXT')
->process('//td[5]', 'temp => TEXT')
->process('//td[6]', 'prec => TEXT')
->process('//td[7]', 'prec_bool => TEXT');
$scraper = new Diggin_Scraper();
$scraper->process('//table[@id="dgd1"]//tr[position()!=1 and position() !=2]', array('rows[]' => $col))
->scrape('http://kafun.taiki.go.jp/Hyou2.aspx');
//today
var_dump('today', $rows = $scraper->rows);
//now
var_dump('now', end($rows));
} catch (Diggin_Exception $de) {
die($de);
} catch (Zend_Exception $ze) {
die($ze);
} catch (Exception $e) {
die($e);
}
//Services_Hanakoからりゅうよう
function getRequestUrlToGetCookie($area_code, $master_code) {
if (!preg_match('#^\\d{2}$#', $area_code)) {
throw new Exception('Invalid area code : [' . $area_code . ']');
}
if (!preg_match('#^\\d{8}$#', $master_code)) {
throw new Exception('Invalid master code : [' . $master_code . ']');
}
return 'http://kafun.taiki.go.jp/Hyou0.aspx'
. sprintf('?MstCode=%s&AreaCode=%s',
$master_code,
$area_code);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment