Skip to content

Instantly share code, notes, and snippets.

@johanlaidlaw
Created January 11, 2012 20:35
Show Gist options
  • Save johanlaidlaw/1596626 to your computer and use it in GitHub Desktop.
Save johanlaidlaw/1596626 to your computer and use it in GitHub Desktop.
Scraper for transfermarkt
<?php
include_once('../simple_html_dom.php');
function scraping($url) {
// create HTML DOM
$html = file_get_html($url);
// Find the table that has class="tabelle_spieler" in the source code
// There are two of these tables and we want the second one therefor 1 (index start by 0)
$table = $html->find('.tabelle_spieler',1);
// Go through each <tr> in the table
$array_of_tr = $table->find('tr');
foreach($array_of_tr as $row){
// echo "\n"; // Print newline
// Go through each <td> inside the <tr>
echo $row->find('td',1)->plaintext."\n";
}
$transfer_table = $html->find('.standard_tabelle',1);
echo gettype($transfer_table);
$all_trs = $transfer_table->find('tr');
$index_to_show = array(0,1,3,5);
echo "\n\nTransfer History";
foreach($all_trs as $tr){
foreach($tr->find('td') as $index => $td){
if(in_array($index, $index_to_show))
echo str_replace("&nbsp;","",$td->plaintext)." - ";
}
echo "\n";
}
// Clean up memory
$html->clear();
unset($html);
}
// Run the function
scraping('http://www.transfermarkt.co.uk/en/marcus-hahnemann/transfers/spieler_4140.html');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment