Simple sport results parser in PHP using XPath. For more information visit http://martinsikora.com/parsing-html-pages-using-xpath
<?php | |
$curl = curl_init('http://www.livescore.com/soccer/england/'); | |
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); | |
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); | |
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10'); | |
$html = curl_exec($curl); | |
curl_close($curl); | |
if (!$html) { | |
die("something's wrong!"); | |
} | |
//var_dump(strlen($data)); | |
$dom = new DOMDocument(); | |
@$dom->loadHTML($html); | |
$xpath = new DOMXPath($dom); | |
$scores = array(); | |
$tableRows = $xpath->query('//table[1]//tr[4]//table//tr[1]/td[5]//table//tr'); | |
foreach ($tableRows as $row) { | |
// fetch all 'tds' inside this 'tr' | |
$td = $xpath->query('td', $row); | |
$match = array(); | |
// check league heading | |
if ($td->length == 1 && $xpath->query('td/b', $row)->length == 1) { | |
// cut the country name and leave just the league | |
$league = substr($xpath->query('td/text()', $row)->item(1)->textContent, 3); | |
$scores[$league] = array(); | |
} elseif ($td->length == 2) { // date | |
$month = date('m', strtotime(substr($td->item(1)->textContent, 0, strpos($td->item(1)->textContent, ' ')))); | |
$day = sprintf('%02s', preg_replace('/[^0-9]/i', '', substr($td->item(1)->textContent, strpos($td->item(1)->textContent, ' ') + 1))); | |
$thisMonth = date('m'); | |
$thisYear = date('Y'); | |
if ($thisMonth - $month < 0) { | |
$date = ($thisYear - 1) . '-' . $month . '-' . $day; | |
} elseif ($thisMonth - $month > 0) { | |
$date = ($thisYear + 1) . '-' . $month . '-' . $day; | |
} else { | |
$date = $thisYear . '-' . $thisMonth . '-' . $day; | |
} | |
} elseif ($td->length == 4) { // check match result | |
/** | |
* first column contains match status. This can be: | |
* FT - match finished | |
* Pen. - match finished after penalties | |
* Postp. - match postponed to another day | |
* hh:mm - upcoming match | |
* mm' - pending match | |
*/ | |
$status = preg_replace('/[^a-zA-Z0-9\'\.:]*/i', '', $td->item(0)->textContent); | |
if ($status == 'FT') { | |
$match['status'] = 'finished'; | |
} elseif ($status == 'Pen.') { | |
$match['status'] = 'penalties'; | |
} elseif ($status == 'Postp.') { | |
$match['status'] = 'postponed'; | |
} elseif (preg_match('/[0-9]{2}:[0-9]{2}/', $status)) { | |
$match['status'] = 'upcoming'; | |
$match['begin'] = $status; | |
} elseif (strpos($status, "'") !== false) { | |
$match['status'] = 'pending'; | |
$match['time'] = trim($status, "'"); | |
} else { | |
$match['status'] = 'unknown'; | |
} | |
$match['team1'] = $td->item(1)->textContent; | |
list($score1, $score2) = explode('-', $td->item(2)->textContent); | |
$match['team2'] = $td->item(3)->textContent; | |
$match['team1score'] = trim($score1); | |
$match['team2score'] = trim($score2); | |
$match['date'] = $date; | |
$scores[$league][] = $match; | |
} | |
} | |
print_r($scores); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment