Skip to content

Instantly share code, notes, and snippets.

@pothi
Created December 16, 2017 11:24
Show Gist options
  • Save pothi/ac9ac97928d4568d0270acabe5d21377 to your computer and use it in GitHub Desktop.
Save pothi/ac9ac97928d4568d0270acabe5d21377 to your computer and use it in GitHub Desktop.
Fetch URLs from sitemap!
<?php
$sitemapURL = 'https://www.tinywp.in/sitemap.xml';
/**
* fetch all URLs from sitemap.xml
*/
$xmlobject = simplexml_load_file($sitemapURL);
function findChildXMLs($xml, $parent='') {
$child_xmls = [];
foreach( $xml as $key => $value ) {
if( count($value->sitemap ) )
$child_xmls[] = findChildXMLs($value, $key);
else
$child_xmls[] = (string)$value->loc;
}
return $child_xmls;
}
$sitemaps = findChildXMLs($xmlobject);
$totalSitemaps = count($sitemaps);
$urls = [];
$count = 1;
foreach( $sitemaps as $xml ) {
echo "Processing... ($count/$totalSitemaps) " . (string)$xml . PHP_EOL;
$count++;
$individual_sitemap = simplexml_load_file($xml);
foreach( $individual_sitemap->url as $url ) {
$urls[] = (string)$url->loc;
}
// break;
}
echo 'Total URLs: ' . count($urls) . PHP_EOL;
// print_r($urls);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment