Skip to content

Instantly share code, notes, and snippets.

@martinsik
Created October 2, 2012 19:45
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 11 You must be signed in to fork a gist
  • Save martinsik/3822834 to your computer and use it in GitHub Desktop.
Save martinsik/3822834 to your computer and use it in GitHub Desktop.
Simple sport results parser in PHP using XPath. For more information visit http://martinsikora.com/parsing-html-pages-using-xpath
<?php
$curl = curl_init('http://www.livescore.com/soccer/england/');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10');
$html = curl_exec($curl);
curl_close($curl);
if (!$html) {
die("something's wrong!");
}
//var_dump(strlen($data));
$dom = new DOMDocument();
@$dom->loadHTML($html);
$xpath = new DOMXPath($dom);
$scores = array();
$tableRows = $xpath->query('//table[1]//tr[4]//table//tr[1]/td[5]//table//tr');
foreach ($tableRows as $row) {
// fetch all 'tds' inside this 'tr'
$td = $xpath->query('td', $row);
$match = array();
// check league heading
if ($td->length == 1 && $xpath->query('td/b', $row)->length == 1) {
// cut the country name and leave just the league
$league = substr($xpath->query('td/text()', $row)->item(1)->textContent, 3);
$scores[$league] = array();
} elseif ($td->length == 2) { // date
$month = date('m', strtotime(substr($td->item(1)->textContent, 0, strpos($td->item(1)->textContent, ' '))));
$day = sprintf('%02s', preg_replace('/[^0-9]/i', '', substr($td->item(1)->textContent, strpos($td->item(1)->textContent, ' ') + 1)));
$thisMonth = date('m');
$thisYear = date('Y');
if ($thisMonth - $month < 0) {
$date = ($thisYear - 1) . '-' . $month . '-' . $day;
} elseif ($thisMonth - $month > 0) {
$date = ($thisYear + 1) . '-' . $month . '-' . $day;
} else {
$date = $thisYear . '-' . $thisMonth . '-' . $day;
}
} elseif ($td->length == 4) { // check match result
/**
* first column contains match status. This can be:
* FT - match finished
* Pen. - match finished after penalties
* Postp. - match postponed to another day
* hh:mm - upcoming match
* mm' - pending match
*/
$status = preg_replace('/[^a-zA-Z0-9\'\.:]*/i', '', $td->item(0)->textContent);
if ($status == 'FT') {
$match['status'] = 'finished';
} elseif ($status == 'Pen.') {
$match['status'] = 'penalties';
} elseif ($status == 'Postp.') {
$match['status'] = 'postponed';
} elseif (preg_match('/[0-9]{2}:[0-9]{2}/', $status)) {
$match['status'] = 'upcoming';
$match['begin'] = $status;
} elseif (strpos($status, "'") !== false) {
$match['status'] = 'pending';
$match['time'] = trim($status, "'");
} else {
$match['status'] = 'unknown';
}
$match['team1'] = $td->item(1)->textContent;
list($score1, $score2) = explode('-', $td->item(2)->textContent);
$match['team2'] = $td->item(3)->textContent;
$match['team1score'] = trim($score1);
$match['team2score'] = trim($score2);
$match['date'] = $date;
$scores[$league][] = $match;
}
}
print_r($scores);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment