Skip to content

Instantly share code, notes, and snippets.

@dlundgren
Last active February 15, 2017 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dlundgren/490c6c86e30f6b5f2283 to your computer and use it in GitHub Desktop.
Save dlundgren/490c6c86e30f6b5f2283 to your computer and use it in GitHub Desktop.
Emulated file stream that is Unicode aware
<?php
namespace Feral;
/**
* Represents the given contents as a file.
*
* Uses the php://memory system to handle this
*
* @package Feral
*/
class MemoryFile
extends UnicodeFile
{
public function __construct($contents)
{
$memFile = fopen('php://memory', 'r+');
fwrite($memFile, $contents);
rewind($memFile);
parent::__construct($memFile);
}
}
<?php
namespace Feral;
/**
* Represents an open file handle to be used in place of an SplFileObject
*
* @package Feral
*/
class ResourceStreamWrapper
{
/**
* @var resource
*/
private $handle;
public function __construct($handle)
{
$this->handle = $handle;
}
/**
* Tests for end-of-file on a file pointer
*
* @see {http://php.net/manual/en/function.feof.php}
*
* @return bool true if the file pointer is at EOF or an error occurs (including socket timeout); otherwise returns false.
*/
public function eof()
{
return feof($this->handle);
}
/**
* Rewind the position of a file pointer
*
* @see {http://php.net/manual/en/function.rewind.php}
*
* @return bool true on success or false on failure.
*/
public function rewind()
{
return rewind($this->handle);
}
/**
* Seeks on a file pointer
*
* @see {http://php.net/manual/en/function.fseek.php}
*
* @param int $offset
* @param int $whence [optional]
* @return int Upon success, returns 0; otherwise, returns -1. Note that seeking past EOF is not considered an error.
*/
public function fseek($offset, $whence = SEEK_SET)
{
return fseek($this->handle, $offset, $whence);
}
/**
* Gets character from file pointer
*
* @see {http://php.net/manual/en/function.fgetc.php}
*
* @return string a string containing a single character read from the file pointed to by handle. Returns false on EOF.
*/
public function fgetc()
{
return fgetc($this->handle);
}
/**
* Returns the current position of the file read/write pointer
*
* @see {http://php.net/manual/en/function.ftell.php}
*
* @return int If an error occurs, returns false.
*/
public function ftell()
{
return ftell($this->handle);
}
}
<?php
namespace Feral;
/**
* Represents a unicode encoded string
*
* Javascript is UTF-8 aware and we need to process it as such, and regular string functions in PHP
* are not UTF-8 aware.
*
* This code does not use mb_* functions to help prevent the users settings from interfering.
*
* @package Jaspa\Stream
*/
class Unicode
{
const EOF = -1;
/**
* The file object
*
* @var \SplFileObject|ResourceWrapper
*/
protected $stream;
/**
* The length of the file
*
* @var int
*/
protected $length = 0;
public function __construct($stream)
{
if (!($stream instanceof ResourceWrapper || $stream instanceof \SplFileObject)) {
throw new \InvalidArgumentException("Stream must be an instance of ResourceWrapper or \\SplFileObject");
}
$this->stream = $stream;
// pre-calculate the length based on unicode...
for (; ;) {
if ($this->nextChar() !== false) {
$this->length++;
continue;
}
if ($this->stream->eof()) {
break;
}
throw new \InvalidArgumentException("Invalid unicode around character $this->length");
}
$this->stream->rewind();
}
/**
* Returns the length of the file
*
* @return int
*/
public function length()
{
return $this->length;
}
/**
* Peeks at the next character
*
* @return bool|int
*/
public function peekNextChar()
{
return $this->nextChar(true);
}
/**
* @see http://il.php.net/manual/en/function.chr.php#88611
* @param int $u
* @return string
*/
public function fromCharCode($u)
{
return mb_convert_encoding('&#' . intval($u) . ';', 'UTF-8', 'HTML-ENTITIES');
}
/**
* Returns the next character
*
* @see {https://en.wikipedia.org/wiki/UTF-8}
* @param bool $peek
* @return string The character
*/
public function nextChar($peek = false)
{
$c = $this->stream->fgetc();
if ($c === false) {
return false; // EOF
}
$n = ord($c);
if ($n < 128) {
$peek && $this->stream->fseek(-1, SEEK_CUR);
return $c;
}
if ($n < 194 || $n > 253) {
// bad first byte || out of range
$peek && $this->stream->fseek(-1, SEEK_CUR);
return false;
}
$n1 = ord($this->stream->fgetc());
if ($n <= 223) {
$peek && $this->stream->fseek(-2, SEEK_CUR);
return $this->fromCharCode(($n & 31) << 6 | ($n1 & 63));
}
$n2 = ord($this->stream->fgetc());
if ($n <= 239) {
$peek && $this->stream->fseek(-3, SEEK_CUR);
return $this->fromCharCode(($n & 15) << 12 | ($n1 & 63) << 6 | ($n2 & 63));
}
$n3 = ord($this->stream->fgetc());
if ($n <= 244) {
$peek && $this->stream->fseek(-4, SEEK_CUR);
return $this->fromCharCode(($n & 15) << 18 | ($n1 & 63) << 12 | ($n2 & 63) << 6 | ($n3 & 63));
}
// couldn't find a character...
return false;
}
/**
* Returns whether or not the file is eof
*
* @return bool
*/
public function eof()
{
return $this->stream->eof();
}
/**
* Returns the position in the file that the stream is currently at
*
* @return int
*/
public function index()
{
$idx = $this->stream->ftell();
return $idx === false ? 0 : $idx;
}
/**
* Gets the character at the given offset
*
* @param $offset
* @return bool|int
*/
public function charAt($offset)
{
if ($offset > $this->length) {
return self::EOF;
}
$curPos = $this->stream->ftell();
$this->stream->fseek($offset);
$c = $this->nextChar();
$this->stream->fseek($curPos);
return $c;
}
/**
* Advances the stream by length
*
* @param int $len
* @return bool True on success
*/
public function advance($len)
{
return $this->stream->fseek($len, SEEK_CUR) === 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment