Skip to content

Instantly share code, notes, and snippets.

@nash-ye
Created May 2, 2016 21:10
Show Gist options
  • Save nash-ye/f2587c622a4dca6c7fb222498834131d to your computer and use it in GitHub Desktop.
Save nash-ye/f2587c622a4dca6c7fb222498834131d to your computer and use it in GitHub Desktop.
<?php
$hosts = [];
$doc = new DOMDocument();
$doc->loadHTMLFile("report-2015.html");
$historyTable = $doc->getElementById('history');
$historyTableRows = $historyTable->getElementsByTagName('tr');
foreach ($historyTableRows as $historyTableRow) {
$urlCell = $historyTableRow->childNodes[0];
$url = trim($urlCell->firstChild->textContent);
$urlHost = parse_url($url, PHP_URL_HOST);
if (empty($urlHost)) {
continue;
}
if (! isset($hosts[$urlHost])) {
$hosts[$urlHost] = 1;
} else {
$hosts[$urlHost]++;
}
}
arsort($hosts, SORT_NUMERIC);
file_put_contents('top-hosts-2015.json', json_encode($hosts));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment