Last active
February 15, 2017 16:00
-
-
Save dlundgren/490c6c86e30f6b5f2283 to your computer and use it in GitHub Desktop.
Emulated file stream that is Unicode aware
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Feral; | |
/** | |
* Represents the given contents as a file. | |
* | |
* Uses the php://memory system to handle this | |
* | |
* @package Feral | |
*/ | |
class MemoryFile | |
extends UnicodeFile | |
{ | |
public function __construct($contents) | |
{ | |
$memFile = fopen('php://memory', 'r+'); | |
fwrite($memFile, $contents); | |
rewind($memFile); | |
parent::__construct($memFile); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Feral; | |
/** | |
* Represents an open file handle to be used in place of an SplFileObject | |
* | |
* @package Feral | |
*/ | |
class ResourceStreamWrapper | |
{ | |
/** | |
* @var resource | |
*/ | |
private $handle; | |
public function __construct($handle) | |
{ | |
$this->handle = $handle; | |
} | |
/** | |
* Tests for end-of-file on a file pointer | |
* | |
* @see {http://php.net/manual/en/function.feof.php} | |
* | |
* @return bool true if the file pointer is at EOF or an error occurs (including socket timeout); otherwise returns false. | |
*/ | |
public function eof() | |
{ | |
return feof($this->handle); | |
} | |
/** | |
* Rewind the position of a file pointer | |
* | |
* @see {http://php.net/manual/en/function.rewind.php} | |
* | |
* @return bool true on success or false on failure. | |
*/ | |
public function rewind() | |
{ | |
return rewind($this->handle); | |
} | |
/** | |
* Seeks on a file pointer | |
* | |
* @see {http://php.net/manual/en/function.fseek.php} | |
* | |
* @param int $offset | |
* @param int $whence [optional] | |
* @return int Upon success, returns 0; otherwise, returns -1. Note that seeking past EOF is not considered an error. | |
*/ | |
public function fseek($offset, $whence = SEEK_SET) | |
{ | |
return fseek($this->handle, $offset, $whence); | |
} | |
/** | |
* Gets character from file pointer | |
* | |
* @see {http://php.net/manual/en/function.fgetc.php} | |
* | |
* @return string a string containing a single character read from the file pointed to by handle. Returns false on EOF. | |
*/ | |
public function fgetc() | |
{ | |
return fgetc($this->handle); | |
} | |
/** | |
* Returns the current position of the file read/write pointer | |
* | |
* @see {http://php.net/manual/en/function.ftell.php} | |
* | |
* @return int If an error occurs, returns false. | |
*/ | |
public function ftell() | |
{ | |
return ftell($this->handle); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Feral; | |
/** | |
* Represents a unicode encoded string | |
* | |
* Javascript is UTF-8 aware and we need to process it as such, and regular string functions in PHP | |
* are not UTF-8 aware. | |
* | |
* This code does not use mb_* functions to help prevent the users settings from interfering. | |
* | |
* @package Jaspa\Stream | |
*/ | |
class Unicode | |
{ | |
const EOF = -1; | |
/** | |
* The file object | |
* | |
* @var \SplFileObject|ResourceWrapper | |
*/ | |
protected $stream; | |
/** | |
* The length of the file | |
* | |
* @var int | |
*/ | |
protected $length = 0; | |
public function __construct($stream) | |
{ | |
if (!($stream instanceof ResourceWrapper || $stream instanceof \SplFileObject)) { | |
throw new \InvalidArgumentException("Stream must be an instance of ResourceWrapper or \\SplFileObject"); | |
} | |
$this->stream = $stream; | |
// pre-calculate the length based on unicode... | |
for (; ;) { | |
if ($this->nextChar() !== false) { | |
$this->length++; | |
continue; | |
} | |
if ($this->stream->eof()) { | |
break; | |
} | |
throw new \InvalidArgumentException("Invalid unicode around character $this->length"); | |
} | |
$this->stream->rewind(); | |
} | |
/** | |
* Returns the length of the file | |
* | |
* @return int | |
*/ | |
public function length() | |
{ | |
return $this->length; | |
} | |
/** | |
* Peeks at the next character | |
* | |
* @return bool|int | |
*/ | |
public function peekNextChar() | |
{ | |
return $this->nextChar(true); | |
} | |
/** | |
* @see http://il.php.net/manual/en/function.chr.php#88611 | |
* @param int $u | |
* @return string | |
*/ | |
public function fromCharCode($u) | |
{ | |
return mb_convert_encoding('&#' . intval($u) . ';', 'UTF-8', 'HTML-ENTITIES'); | |
} | |
/** | |
* Returns the next character | |
* | |
* @see {https://en.wikipedia.org/wiki/UTF-8} | |
* @param bool $peek | |
* @return string The character | |
*/ | |
public function nextChar($peek = false) | |
{ | |
$c = $this->stream->fgetc(); | |
if ($c === false) { | |
return false; // EOF | |
} | |
$n = ord($c); | |
if ($n < 128) { | |
$peek && $this->stream->fseek(-1, SEEK_CUR); | |
return $c; | |
} | |
if ($n < 194 || $n > 253) { | |
// bad first byte || out of range | |
$peek && $this->stream->fseek(-1, SEEK_CUR); | |
return false; | |
} | |
$n1 = ord($this->stream->fgetc()); | |
if ($n <= 223) { | |
$peek && $this->stream->fseek(-2, SEEK_CUR); | |
return $this->fromCharCode(($n & 31) << 6 | ($n1 & 63)); | |
} | |
$n2 = ord($this->stream->fgetc()); | |
if ($n <= 239) { | |
$peek && $this->stream->fseek(-3, SEEK_CUR); | |
return $this->fromCharCode(($n & 15) << 12 | ($n1 & 63) << 6 | ($n2 & 63)); | |
} | |
$n3 = ord($this->stream->fgetc()); | |
if ($n <= 244) { | |
$peek && $this->stream->fseek(-4, SEEK_CUR); | |
return $this->fromCharCode(($n & 15) << 18 | ($n1 & 63) << 12 | ($n2 & 63) << 6 | ($n3 & 63)); | |
} | |
// couldn't find a character... | |
return false; | |
} | |
/** | |
* Returns whether or not the file is eof | |
* | |
* @return bool | |
*/ | |
public function eof() | |
{ | |
return $this->stream->eof(); | |
} | |
/** | |
* Returns the position in the file that the stream is currently at | |
* | |
* @return int | |
*/ | |
public function index() | |
{ | |
$idx = $this->stream->ftell(); | |
return $idx === false ? 0 : $idx; | |
} | |
/** | |
* Gets the character at the given offset | |
* | |
* @param $offset | |
* @return bool|int | |
*/ | |
public function charAt($offset) | |
{ | |
if ($offset > $this->length) { | |
return self::EOF; | |
} | |
$curPos = $this->stream->ftell(); | |
$this->stream->fseek($offset); | |
$c = $this->nextChar(); | |
$this->stream->fseek($curPos); | |
return $c; | |
} | |
/** | |
* Advances the stream by length | |
* | |
* @param int $len | |
* @return bool True on success | |
*/ | |
public function advance($len) | |
{ | |
return $this->stream->fseek($len, SEEK_CUR) === 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment