Skip to content

Instantly share code, notes, and snippets.

@okonomi
Created May 11, 2009 01:48
Show Gist options
  • Save okonomi/109834 to your computer and use it in GitHub Desktop.
Save okonomi/109834 to your computer and use it in GitHub Desktop.
Diggin_Scraperでドコモの機種情報をスクレイピング
<?php
require_once 'Diggin/Scraper.php';
function getDevice($value)
{
preg_match('/([^(]+)(((.*)))?/iu', $value, $match);
$device = $match[1];
return $device;
}
function getFont($value)
{
preg_match_all('/((([^)]*)))?(\d+)×(\d+)/iu', $value, $match);
$fonts = array();
for ($i = 0; $i < count($match[0]); $i++) {
$key = !empty($match[2][$i]) ? $match[2][$i] : $i;
$fonts[$key] = array(
'width' => $match[3][$i],
'height' => $match[4][$i],
);
}
return $fonts;
}
function getCharactor($value)
{
$value = (string)$value;
preg_match_all('/((([^\n]*))\n?)?(\d+)/iu', $value, $match);
$characters = array();
for ($i = 0; $i < count($match[0]); $i++) {
$key = !empty($match[2][$i]) ? $match[2][$i] : $i;
$characters[$key] = $match[3][$i];
}
return $characters;
}
function getBrowser($value)
{
preg_match_all('/(\d+)[^\d]+(\d+)((([^\d)]*)))?/iu', $value, $match);
$screens = array();
for ($i = 0; $i < count($match[0]); $i++) {
$key = !empty($match[4][$i]) ? $match[4][$i] : $i;
$screens[$key] = array(
'width' => $match[1][$i],
'height' => $match[2][$i],
);
}
return $screens;
}
function getDisplay($value)
{
preg_match_all('/((?([^\d)]*))?)?(\d+)×(\d+)/iu', $value, $match);
$screens = array();
for ($i = 0; $i < count($match[0]); $i++) {
$key = !empty($match[2][$i]) ? $match[2][$i] : $i;
$screens[$key] = array(
'width' => $match[3][$i],
'height' => $match[4][$i],
);
}
return $screens;
}
function getColor($value)
{
preg_match_all('/(白黒|カラー)(\d+)/iu', $value, $match);
$color = array(
'type' => $match[1][0],
'depth' => $match[2][0],
);
return $color;
}
try{
$url = 'http://www.nttdocomo.co.jp/service/imode/make/content/spec/screen_area/index.html';
$profile = new Diggin_Scraper();
$profile->process('/td[last()-5]/span[@class="txt"]', 'device => "TEXT", getDevice')
->process('/td[last()-4]/span[@class="txt"]', 'font => "TEXT", getFont')
->process('/td[last()-3]/span[@class="txt"]', 'charactor => "RAW", getCharactor')
->process('/td[last()-2]/span[@class="txt"]', 'browser => "TEXT", getBrowser')
->process('/td[last()-1]/span[@class="txt"]', 'display => "TEXT", getDisplay')
->process('/td[last()-0]/span[@class="txt"]', 'color => "TEXT", getColor');
$scraper = new Diggin_Scraper();
$scraper->process('//table/tr[@class="acenter"]', array('profile[]' => $profile))
->scrape($url);
} catch (Diggin_Scraper_Exception $e) {
die($e->getMessage());
}
require_once 'Zend/Debug.php';
Zend_Debug::dump($scraper->getResults());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment