Skip to content

Instantly share code, notes, and snippets.

@WinterSilence
Last active May 6, 2016 09:41
Show Gist options
  • Save WinterSilence/2c1dfcc6b8df3c329c96a9c1ca4707dd to your computer and use it in GitHub Desktop.
Save WinterSilence/2c1dfcc6b8df3c329c96a9c1ca4707dd to your computer and use it in GitHub Desktop.
Sitemap refactoring
<?php
namespace Sitemap;
/**
* A class for generating sitemaps.
* @see http://sitemaps.org
*/
class Sitemap
{
/**
* @var WriterInterface
*/
private $writer;
/**
* Sets writer.
* @param WriterInterface $writer
* @return void
*/
public function __construct(WriterInterface $writer)
{
$this->writer = $writer;
}
/**
* Adds an URL to sitemap.
*
* @param string $location
* @param array $optional optional elements\tags
* @return self
*/
public function addUrl($location, array $optional = array())
{
$this->writer->writeUrl($location, $optional);
return $this;
}
/**
* Finishes writing and return info.
* @return array
*/
public function save()
{
return $this->writer->save();
}
/**
* Returns an array of URLs written
*
* @param string $baseUrl base URL of all the sitemaps written
* @return array URLs of sitemaps written
*/
public function getSitemapUrls($baseUrl)
{
$urls = [];
foreach ($this->writer->getFiles() as $file) {
$urls[$file] = $baseUrl . pathinfo($file, PATHINFO_BASENAME);
}
return $urls;
}
}
<?php
namespace Sitemap;
class Writer implements WriterInterface
{
const MAX_URL_LENGTH = 2047;
const MAX_URLS = 50000;
const MAX_FILE_SIZE = 10475520;
/**
* Valid values of "changefreq" element.
*/
const ALWAYS = 'always';
const HOURLY = 'hourly';
const DAILY = 'daily';
const WEEKLY = 'weekly';
const MONTHLY = 'monthly';
const YEARLY = 'yearly';
const NEVER = 'never';
/**
* @var integer Maximum allowed number of bytes per single file.
*/
private $maxFileSize = self::MAX_FILE_SIZE;
/**
* @var integer Current file size written
*/
private $fileSize = 0;
/**
* @var integer Maximum allowed number of URLs in a single file.
*/
private $maxUrls = self::MAX_URLS;
/**
* @var integer number of URLs added
*/
private $urlsCount = 0;
/**
* @var string path to the file to be written
*/
private $filePath = './sitemap.xml';
/**
* @var integer number of files written
*/
private $fileCount = 0;
/**
* @var array path of files written
*/
private $writtenFilePaths = [];
/**
* @var integer number of URLs to be kept in memory before writing it to file
*/
private $bufferSize = 500;
/**
* @var XMLWriter
*/
private $writer;
public function __construct(array $config = array())
{
foreach ($config as $property => $value) {
if (!property_exists($this, $property)) {
throw new \InvalidArgumentException(
'Property "' . $property . '" not defined.'
);
}
$method = 'set' . ucfirst($property);
if (!method_exists($this, $method)) {
throw new \InvalidArgumentException(
'Property "' . $property . '" not writable.'
);
}
$this->{$method}($value);
}
}
/**
*
* @param string $filePath path of the file to write to
* @throws \InvalidArgumentException
*/
private function setFilePath($path)
{
$dir = dirname($path);
if (!is_dir($dir)) {
throw new \InvalidArgumentException(
'Directory "' . $dir . '" not exists.'
);
}
$this->filePath = $path;
}
/**
* Sets maximum allowed number of URLs in a single file. No more than 50000 URLs.
*
* @param int $number
*/
private function setMaxUrls($number)
{
if ($priority <= 1 || $priority >= self::MAX_URLS) {
throw new \InvalidArgumentException(
'Invalid value (' . $number . ') of max URLs.'
);
}
$this->maxUrls = (int)$number;
}
/**
* Sets number of URLs to be kept in memory before writing it to file.
*
* @param int $number
*/
private function setBufferSize($number)
{
if ($number < 1) {
throw new \InvalidArgumentException(
'Invalid value (' . $number . ') of buffer size.'
);
}
$this->bufferSize = (int)$number;
}
/**
* Sets maximum allowed number of bytes per single file. No more than 10485760 bytes(10MB), 1 record ~ 10240 bytes(10KB).
*
* @param int $bytes
*/
private function setMaxFileSize($bytes)
{
if ($bytes <= 10240 || $bytes >= self::MAX_FILE_SIZE) {
throw new \InvalidArgumentException(
'Invalid value (' . $bytes . ') of max file size.'
);
}
$this->maxFileSize = (int)$bytes;
}
/**
* Creates new file.
*/
private function createNewFile()
{
$this->fileSize = 0;
$this->fileCount++;
$filePath = $this->getCurrentFilePath();
$this->writtenFilePaths[] = $filePath;
@unlink($filePath);
$this->writer = new \XMLWriter;
$this->writer->openMemory();
$this->writer->startDocument('1.0', 'UTF-8');
$this->writer->setIndent(true);
$this->writer->startElement('urlset');
$this->writer->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
}
/**
* Writes closing tags to current file
*/
private function finishFile()
{
if ($this->writer !== null) {
$this->writer->endElement();
$this->writer->endDocument();
$this->flush();
}
}
/**
* Flushes buffer into file
*/
private function flush()
{
$buffer = $this->writer->flush(true);
$this->fileSize += mb_strlen($buffer, '8bit');
file_put_contents($this->getCurrentFilePath(), $buffer, FILE_APPEND);
}
/**
* @return string path of currently opened file
*/
private function getCurrentFilePath()
{
if ($this->fileCount < 2) {
return $this->filePath;
}
$parts = pathinfo($this->filePath);
return $parts['dirname'] . DIRECTORY_SEPARATOR . $parts['filename'] . '_' . $this->fileCount . '.' . $parts['extension'];
}
/**
* Finishes writing.
*/
public function save()
{
$this->finishFile();
return $this->writtenFilePaths;
}
/**
* Sets "loc" element.
* @param string $location
* @return self
*/
private function writeLoc($location)
{
if (false === filter_var($location, FILTER_VALIDATE_URL)) {
throw new \InvalidArgumentException(
'The location URL "' . $location . '" is invalid.'
);
}
if (mb_strlen($location, 'UTF-8') > self::MAX_URL_LENGTH) {
throw new \InvalidArgumentException(
'The location must be less than ' . (self::MAX_URL_LENGTH + 1) . ' characters.'
);
}
$this->writer->writeElement('loc', $location);
return $this;
}
/**
* Sets "lastmod" element.
* @param string|int $date
* @return self
*/
private function writeLastmod($date)
{
$this->writer->writeElement('lastmod', date('c', $date));
return $this;
}
/**
* Returns valid values of "changefreq" element.
* @return array
*/
public function getValidFrequencies()
{
return array(
self::ALWAYS,
self::HOURLY,
self::DAILY,
self::WEEKLY,
self::MONTHLY,
self::YEARLY,
self::NEVER
);
}
/**
* Sets "changefreq" element.
* @param string $frequency
* @return self
* @throws \InvalidArgumentException
*/
private function writeChangefreq($frequency)
{
if (!in_array($frequency, $this->getValidFrequencies(), true)) {
throw new \InvalidArgumentException(
'Invalid value (' . $frequency . ') of "changefreq" element.'
);
}
$this->writer->writeElement('changefreq', $frequency);
return $this;
}
/**
* Sets "priority" element.
* @param string|float $priority
* @return self
* @throws \InvalidArgumentException
*/
private function writePriority($priority)
{
if (!is_numeric($priority) || $priority < 0 || $priority > 1) {
throw new \InvalidArgumentException(
'Invalid value (' . $priority . ') of "priority" element.'
);
}
$this->writer->writeElement('priority', number_format($priority, 1, '.', ''));
return $this;
}
public function writeUrl($url, array $optional = array())
{
if ($this->urlsCount == 0) {
$this->createNewFile();
} elseif ($this->urlsCount % $this->maxUrls == 0 || $this->fileSize >= $this->maxFileSize) {
$this->finishFile();
$this->createNewFile();
}
if ($this->urlsCount % $this->bufferSize == 0) {
$this->flush();
}
$this->writer->startElement('url');
$elements = array('loc' => $url) + $optional;
foreach ($elements as $element => $value) {
$method = 'write' . ucfirst($element);
if (!method_exists($this, $method)) {
throw new \InvalidArgumentException(
'Element "' . $element . '" not defined.'
);
}
$this->{$method}($value);
}
$this->writer->endElement();
$this->urlsCount++;
}
public function getFiles()
{
return $this->writtenFilePaths;
}
/**
* Finishes writing.
*/
public function __destruct()
{
$this->finishFile();
}
}
<?php
namespace Sitemap;
interface WriterInterface
{
public function __construct(array $config = array());
public function writeUrl($url, array $optional = array());
public function getFiles();
public function save();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment