Created
October 23, 2019 08:36
-
-
Save tohokuaiki/e95dc17fdc2b06e8095e65f06c037972 to your computer and use it in GitHub Desktop.
Tリーグの試合結果からデータを一覧で取得
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$urls=<<<EOF | |
https://tleague.jp/match/?season=2018&month=201810 | |
https://tleague.jp/match/?season=2018&month=201811 | |
https://tleague.jp/match/?season=2018&month=201812 | |
https://tleague.jp/match/?season=2018&month=201901 | |
https://tleague.jp/match/?season=2018&month=201902 | |
https://tleague.jp/match/?season=2018&month=201903 | |
https://tleague.jp/match/?season=2019&month=201910 | |
https://tleague.jp/match/?season=2019&month=201911 | |
https://tleague.jp/match/?season=2019&month=201912 | |
EOF; | |
foreach (explode("\n", $urls) as $url){ | |
$html = getPage(trim($url)); | |
$dom = new DOMDocument("1.0"); | |
if (@$dom->loadHTML($html)){ | |
$xpath = new DOMXPath($dom); | |
foreach ($xpath->query('//li/div[@class="field"]') as $k=>$line) { | |
if ($k < 1) continue; | |
$data = []; | |
$props = ['date', 'time', 'sex', 'home', 'result', 'away', 'arena', 'media', 'ticket']; | |
$tmp = []; | |
foreach ($props as $prop){ | |
$_d = $xpath->query('div[@class="cell-'.$prop.'"]', $line); | |
$d = trim($_d->item(0)->textContent); | |
switch ($prop){ | |
case 'date': | |
$tmp[$prop] = substr($d, 0, 10); | |
break; | |
case 'time': | |
$d = trim(preg_replace('@(.*?)@', '', $d)); | |
$tmp[$prop] = $d; | |
break; | |
case 'sex': | |
case 'home': | |
case 'away': | |
case 'arena': | |
$tmp[$prop] = $d; | |
break; | |
case 'result': | |
$result_link = $xpath->query('div/a', $_d->item(0)); | |
$href = $result_link->item(0)->getAttribute('href'); | |
$match_url = 'https://tleague.jp' . $href; | |
$page = new DOMDocument("1.0"); | |
if (@$page->loadHTML(getPage($match_url))){ | |
$xpage = new DOMXPath($page); | |
$text = $xpage->query('//ul[@class="item-spec reset"]//li')->item(2)->textContent; | |
$num = 0; | |
if (preg_match('/入場者数:([\d,]+)人/', $text, $m)){ | |
$num = intval(str_replace(',', '', $m[1])); | |
} | |
$tmp[$prop] = $num; | |
} | |
break; | |
} | |
} | |
echo implode(',', $tmp). "\n"; | |
} | |
} | |
} | |
function getPage($url) | |
{ | |
$cache_file = __DIR__.'/cache/'.md5($url).'.html'; | |
if (file_exists($cache_file)){ | |
$html = file_get_contents($cache_file); | |
} | |
else { | |
sleep(1); | |
ob_start(); | |
system('wget -O - "'. $url .'"'); | |
$html = ob_get_clean(); | |
file_put_contents($cache_file, $html); | |
} | |
return $html; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment