Skip to content

Instantly share code, notes, and snippets.

@NeilMasters
Created October 27, 2016 12:13
Show Gist options
  • Save NeilMasters/215b54a7aa97840e42551ac5a0773b60 to your computer and use it in GitHub Desktop.
Save NeilMasters/215b54a7aa97840e42551ac5a0773b60 to your computer and use it in GitHub Desktop.
<?php
if(isset($argv[1])) {
class Xml {
public $url;
}
class Url {
public $loc;
}
$xml = new Xml();
$url = new Url();
$url->loc = $argv[1];
$xml->url[] = $url;
} else {
$sitemap = "sitemap.xml";
$xml = new SimpleXMLElement(file_get_contents($sitemap));
}
function getTextBetweenTags($string, $tagname){
$d = new DOMDocument();
@$d->loadHTML($string);
$return = array();
foreach($d->getElementsByTagName($tagname) as $item){
$return[] = $item->textContent;
}
return $return;
}
foreach($xml->url as $url)
{
$uri = isset($argv[1])
? $argv[1]
: reset($url->loc);
$pageContents = file_get_contents($uri);
$h1Tags = getTextBetweenTags($pageContents, 'h1');
$h2Tags = getTextBetweenTags($pageContents, 'h2');
$h3Tags = getTextBetweenTags($pageContents, 'h3');
$h4Tags = getTextBetweenTags($pageContents, 'h4');
$h1Str = '';
foreach($h1Tags as $tag) {
$h1Str .= $tag ? "H1: " . trim($tag) . "\n" : "H1: Empty\n";
}
$h2Str = '';
foreach($h2Tags as $tag) {
$h2Str .= $tag ? "H2: " . trim($tag) . "\n" : "H2: Empty\n";
}
$h3Str = '';
foreach($h3Tags as $tag) {
$h3Str .= $tag ? "H3:" . trim($tag) . "\n" : "H3: Empty\n";
}
$h4Str = '';
foreach($h4Tags as $tag) {
$h4Str .= $tag ? "H4: " . trim($tag) . "\n" : "H4: Empty\n";
}
echo sprintf("%s,\"%s\",\"%s\",\"%s\",\"%s\"\n", $uri, $h1Str, $h2Str, $h3Str, $h4Str);
usleep(500);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment