Skip to content

Instantly share code, notes, and snippets.

@vasi
Created January 5, 2015 21:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vasi/6e907535a04a565c3e93 to your computer and use it in GitHub Desktop.
Save vasi/6e907535a04a565c3e93 to your computer and use it in GitHub Desktop.
Generate a histogram of HTML elements used on a web page
<?php
class ElementIterator extends FilterIterator implements RecursiveIterator {
public function __construct($nodelist) {
return parent::__construct(new IteratorIterator($nodelist));
}
public function getChildren() {
return new self($this->current()->childNodes);
}
public function hasChildren() {
return !empty($this->current()->childNodes);
}
public function accept() {
return isset($this->current()->tagName);
}
}
function count_tags($url) {
$doc = new DOMDocument;
@$doc->loadHTMLFile($url);
$it = new RecursiveIteratorIterator(
new ElementIterator($doc->childNodes),
RecursiveIteratorIterator::SELF_FIRST
);
$elems = iterator_to_array($it, FALSE);
$tags = array_map(function($elem) { return $elem->tagName; }, $elems);
$counts = array_count_values($tags);
arsort($counts);
return $counts;
}
$url = $argv[1];
$histogram = count_tags($url);
foreach ($histogram as $tag => $count) {
printf("%-10s %5d\n", $tag, $count);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment