Skip to content

Instantly share code, notes, and snippets.

@artlung
Created February 3, 2017 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save artlung/fa3d4410a77973986aaa4ff4a74443d6 to your computer and use it in GitHub Desktop.
Save artlung/fa3d4410a77973986aaa4ff4a74443d6 to your computer and use it in GitHub Desktop.
keyword1
keyword2
<?php
/**
*
* This is a quick way to turn a simple text file
* with a very long list of keywords in a text file (keywords.txt)
* Where "very long" is an expected url number greater than 10,000
* If loaded without a valid query parameter "page" it will load a
* Site Index site map, otherwise load the individual XML site map
* 10,000 urls into a valid XML Sitemap:
* http://en.wikipedia.org/wiki/Sitemaps
* Put this file sitemap.xml.php and sitemap-urls.txt at
* the webroot http://example.com/sitemap.xml.php
* Then add the text in quotes below to your robots.txt file as a new line:
* "Sitemap: http://example.com/sitemap-siteindex-from-keywords.php
*
* Questions? email joe@artlung.com
*
* Based on https://gist.github.com/artlung/210438
*/
function makeUrl($keyword) {
return "http://example.com/{$keyword}.html";
}
$per_page = 10000;
$filename = 'keywords.txt';
$urls = file($filename);
$filectime = filectime($filename);
$urls = array_map('trim', $urls);
$page = (int)$_GET['page'];
$sitemap = array();
foreach($urls as $url) {
if ($url != '') {
$priority = '0.5';
$sitemap[] = array(
'loc' => $url,
'lastmod' => date('Y-m-d',$filectime),
'changefreq' => 'weekly',
'priority' => $priority,
);
}
}
$pages = array_chunk($sitemap, $per_page);
$page_numbers = range(1, count($pages));
header('Content-Type: text/xml');
echo '<?xml version=\'1.0\' encoding=\'UTF-8\'?>';
echo "\n";
$path = explode('?', $_SERVER['REQUEST_URI']);
$path = array_shift($path);
$url = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off' ? 'https' : 'http') . '://' . $_SERVER['HTTP_HOST'] . $path . "?page=";
$lastmod = date('Y-m-d',$filectime);
if (!in_array($page, $page_numbers)) {
// Valid Page Number
echo '<sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd">';
echo "\n";
foreach ($page_numbers as $pg_num) {
echo "\t<sitemap>\n";
echo "\t\t<loc>" . htmlentities($url) . $pg_num . "</loc>\n";
echo "\t\t<lastmod>{$lastmod}</lastmod>\n";
echo "\t</sitemap>\n";
}
echo '</sitemapindex>';
} else {
// Output the Site Map
echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">';
echo "\n";
foreach ($pages[$page-1] as $link) {
echo "\t<url>\n";
echo "\t\t<loc>" . htmlentities(makeUrl($link['loc'])) . "</loc>\n";
echo "\t\t<lastmod>{$link['lastmod']}</lastmod>\n";
echo "\t\t<changefreq>{$link['changefreq']}</changefreq>\n";
echo "\t\t<priority>{$link['priority']}</priority>\n";
echo "\t</url>\n";
}
echo '</urlset>';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment