Skip to content

Instantly share code, notes, and snippets.

@tohokuaiki
Created October 23, 2019 08:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tohokuaiki/e95dc17fdc2b06e8095e65f06c037972 to your computer and use it in GitHub Desktop.
Save tohokuaiki/e95dc17fdc2b06e8095e65f06c037972 to your computer and use it in GitHub Desktop.
Tリーグの試合結果からデータを一覧で取得
<?php
$urls=<<<EOF
https://tleague.jp/match/?season=2018&month=201810
https://tleague.jp/match/?season=2018&month=201811
https://tleague.jp/match/?season=2018&month=201812
https://tleague.jp/match/?season=2018&month=201901
https://tleague.jp/match/?season=2018&month=201902
https://tleague.jp/match/?season=2018&month=201903
https://tleague.jp/match/?season=2019&month=201910
https://tleague.jp/match/?season=2019&month=201911
https://tleague.jp/match/?season=2019&month=201912
EOF;
foreach (explode("\n", $urls) as $url){
$html = getPage(trim($url));
$dom = new DOMDocument("1.0");
if (@$dom->loadHTML($html)){
$xpath = new DOMXPath($dom);
foreach ($xpath->query('//li/div[@class="field"]') as $k=>$line) {
if ($k < 1) continue;
$data = [];
$props = ['date', 'time', 'sex', 'home', 'result', 'away', 'arena', 'media', 'ticket'];
$tmp = [];
foreach ($props as $prop){
$_d = $xpath->query('div[@class="cell-'.$prop.'"]', $line);
$d = trim($_d->item(0)->textContent);
switch ($prop){
case 'date':
$tmp[$prop] = substr($d, 0, 10);
break;
case 'time':
$d = trim(preg_replace('@(.*?)@', '', $d));
$tmp[$prop] = $d;
break;
case 'sex':
case 'home':
case 'away':
case 'arena':
$tmp[$prop] = $d;
break;
case 'result':
$result_link = $xpath->query('div/a', $_d->item(0));
$href = $result_link->item(0)->getAttribute('href');
$match_url = 'https://tleague.jp' . $href;
$page = new DOMDocument("1.0");
if (@$page->loadHTML(getPage($match_url))){
$xpage = new DOMXPath($page);
$text = $xpage->query('//ul[@class="item-spec reset"]//li')->item(2)->textContent;
$num = 0;
if (preg_match('/入場者数:([\d,]+)人/', $text, $m)){
$num = intval(str_replace(',', '', $m[1]));
}
$tmp[$prop] = $num;
}
break;
}
}
echo implode(',', $tmp). "\n";
}
}
}
function getPage($url)
{
$cache_file = __DIR__.'/cache/'.md5($url).'.html';
if (file_exists($cache_file)){
$html = file_get_contents($cache_file);
}
else {
sleep(1);
ob_start();
system('wget -O - "'. $url .'"');
$html = ob_get_clean();
file_put_contents($cache_file, $html);
}
return $html;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment