Last active
January 18, 2023 21:43
-
-
Save artlung/613e6ac577a170bf2b4b9045486f129e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* | |
* This is a quick way to turn a simple text file | |
* with a very long list of urls in a text file (sitemap-urls.txt) | |
* Where "very long" is an expected url number greater than 10,000 | |
* If loaded without a valid query parameter "page" it will load a | |
* Site Index site map, otherwise load the individual XML site map | |
* 10,000 urls into a valid XML Sitemap: | |
* http://en.wikipedia.org/wiki/Sitemaps | |
* Put this file sitemap.xml.php and sitemap-urls.txt at | |
* the webroot http://example.com/sitemap.xml.php | |
* Then add the text in quotes below to your robots.txt file as a new line: | |
* "Sitemap: http://example.com/sitemap.xml.php" | |
* | |
* Questions? email joe@artlung.com | |
* | |
* Based on https://gist.github.com/artlung/210438 | |
*/ | |
$per_page = 10000; | |
$filename = 'sitemap-urls.txt'; | |
$urls = file($filename); | |
$filectime = filectime($filename); | |
$urls = array_map('trim', $urls); | |
$page = isset($_GET['page']) ? (int)$_GET['page'] : 0; | |
$sitemap = array(); | |
foreach($urls as $url) { | |
if ($url != '') { | |
$priority = '0.5'; | |
$sitemap[] = array( | |
'loc' => $url, | |
'lastmod' => date('Y-m-d',$filectime), | |
'changefreq' => 'weekly', | |
'priority' => $priority, | |
); | |
} | |
} | |
$pages = array_chunk($sitemap, $per_page); | |
$page_numbers = range(1, count($pages)); | |
header('Content-Type: text/xml'); | |
echo '<?xml version=\'1.0\' encoding=\'UTF-8\'?>'; | |
echo "\n"; | |
$path = explode('?', $_SERVER['REQUEST_URI']); | |
$path = array_shift($path); | |
$url = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off' ? 'https' : 'http') . '://' . $_SERVER['HTTP_HOST'] . $path . "?page="; | |
$lastmod = date('Y-m-d',$filectime); | |
if (!in_array($page, $page_numbers)) { | |
// Valid Page Number | |
echo '<sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | |
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd">'; | |
echo "\n"; | |
foreach ($page_numbers as $pg_num) { | |
echo "\t<sitemap>\n"; | |
echo "\t\t<loc>" . htmlentities($url) . $pg_num . "</loc>\n"; | |
echo "\t\t<lastmod>{$lastmod}</lastmod>\n"; | |
echo "\t</sitemap>\n"; | |
} | |
echo '</sitemapindex>'; | |
} else { | |
// Output the Site Map | |
echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">'; | |
echo "\n"; | |
foreach ($pages[$page-1] as $link) { | |
echo "\t<url>\n"; | |
echo "\t\t<loc>" . htmlentities($link['loc']) . "</loc>\n"; | |
echo "\t\t<lastmod>{$link['lastmod']}</lastmod>\n"; | |
echo "\t\t<changefreq>{$link['changefreq']}</changefreq>\n"; | |
echo "\t\t<priority>{$link['priority']}</priority>\n"; | |
echo "\t</url>\n"; | |
} | |
echo '</urlset>'; | |
} | |
@Vision20202 I updated to use the null coalescing operator available in PHP7. If you are using something older that won't work. What version of PHP are you using?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Reported by @Vision20202:
Code updated to address this issue.