Skip to content

Instantly share code, notes, and snippets.

@alexstone
Created June 8, 2014 17:27
Show Gist options
  • Save alexstone/26efb6290923e9052e03 to your computer and use it in GitHub Desktop.
Save alexstone/26efb6290923e9052e03 to your computer and use it in GitHub Desktop.
Parse through Chomicron page for comic book sales figures
<?php
system("clear");
$data = file_get_contents("http://www.comichron.com/monthlycomicssales/2014/2014-04.html");
$dom = new domDocument;
@$dom->loadHTML($data);
$dom->preserveWhiteSpace = false;
$tables = $dom->getElementsByTagName("table");
// Find the largest table to isolate the table we want to work with
$top_row_count = 0;
foreach($tables as $table) {
$rows = $table->getElementsByTagName("tr");
if($rows->length > $top_row_count) {
$top_row_count = $rows->length;
$data_rows = $rows;
}
}
foreach($data_rows as $data_row) {
$cols = $data_row->getElementsByTagName("td");
$rank = $cols->item(0)->nodeValue;
$title = preg_replace("/\n/", "", $cols->item(1)->nodeValue);
$issue_no = $cols->item(2)->nodeValue;
$cover_price = preg_replace("/[^0-9\.]/", "", $cols->item(3)->nodeValue);
$publisher = $cols->item(4)->nodeValue;
$est_sales = preg_replace("/\,/", "", $cols->item(5)->nodeValue);
// Stop once we get to the trade paperbacks
if(strtolower($title) == "trade paperback title")
break;
// Make sure we only work with validated rows
if(!empty($est_sales)) {
echo "$title #$issue_no, ranked #$rank, selling $est_sales copies at $cover_price\t($" . $est_sales * $cover_price . ")\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment