-
-
Save vasi/6e907535a04a565c3e93 to your computer and use it in GitHub Desktop.
Generate a histogram of HTML elements used on a web page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class ElementIterator extends FilterIterator implements RecursiveIterator { | |
public function __construct($nodelist) { | |
return parent::__construct(new IteratorIterator($nodelist)); | |
} | |
public function getChildren() { | |
return new self($this->current()->childNodes); | |
} | |
public function hasChildren() { | |
return !empty($this->current()->childNodes); | |
} | |
public function accept() { | |
return isset($this->current()->tagName); | |
} | |
} | |
function count_tags($url) { | |
$doc = new DOMDocument; | |
@$doc->loadHTMLFile($url); | |
$it = new RecursiveIteratorIterator( | |
new ElementIterator($doc->childNodes), | |
RecursiveIteratorIterator::SELF_FIRST | |
); | |
$elems = iterator_to_array($it, FALSE); | |
$tags = array_map(function($elem) { return $elem->tagName; }, $elems); | |
$counts = array_count_values($tags); | |
arsort($counts); | |
return $counts; | |
} | |
$url = $argv[1]; | |
$histogram = count_tags($url); | |
foreach ($histogram as $tag => $count) { | |
printf("%-10s %5d\n", $tag, $count); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment