Skip to content

Instantly share code, notes, and snippets.

@AlexanderKomkov
Last active November 29, 2020 11:36
Show Gist options
  • Save AlexanderKomkov/6f7ef3ecc8dc31997a826eaa55b11ee6 to your computer and use it in GitHub Desktop.
Save AlexanderKomkov/6f7ef3ecc8dc31997a826eaa55b11ee6 to your computer and use it in GitHub Desktop.
Читатель больших Excel XLSX файлов на PHP
<?php
$inputFileName = public_path() . '/test.xlsx';
$xlsxReader = \App\Repositories\Xlsx\Reader::create();
$xlsxReader->setStorageSharedString('file');
$xlsxReader->open($inputFileName);
$xlsxReader->reading(function($sheet, $row, $cells) {
if (in_array($row, [1,2,3,4,5])) {
dump($cells);
};
});
$xlsxReader->close();
dump($xlsxReader->getMaxMemoryUsage());
dump($xlsxReader->getTime());
<?php
namespace App\Repositories\Xlsx;
class Reader
{
/** @var integer memory_get_usage() **/
protected $start_memory = 0;
/** @var integer memory_get_usage() **/
protected $max_memory = 0;
/** @var integer mmicrotime(true) **/
protected $start_time = 0;
/** @var \ZipArchive **/
protected $zip;
/** @var boolean **/
protected $is_open_zip = false;
/** @var array **/
protected $zip_files = [];
/** @var string ram|file **/
protected $storage_shared_string = 'ram';
/** @var string **/
protected $dir_shared_string = '';
/** @var string **/
protected $name_shared_string = '';
/** @var string **/
protected $file_shared_string = '';
/** @var string **/
protected $shared_string = '';
/** @var array **/
protected $sheets = [];
/** @var sresource **/
protected $stream_shared_string;
/** @var integer **/
protected $bytes_shared_string = 200;
/** @var XML Resource **/
protected $parser = null;
/** @var string **/
protected $encodingXmlParser = 'UTF-8';
/** @var string **/
protected $current_file = '';
/** @var string **/
protected $current_content = '';
/** @var string **/
protected $current_tag = '';
/** @var array **/
protected $current_attribs = [];
/** @var array **/
protected $current_row = [];
/** @var array **/
protected $current_cell_attribs = [];
/** @var integer **/
protected $shared_string_index = 0;
/** @var integer **/
protected $current_sheet_index = 1;
/** @var array **/
protected $shared_string_data = [];
/** @var array **/
protected $rows = [];
/**
* Construct
*
* @return void
*/
private function __construct()
{
$this->start_memory = memory_get_usage();
$this->start_time = microtime(true);
}
/**
* Create Reader
*
* @return \App\Repositories\Xlsx\Reader
*/
public static function create()
{
$class = static::class;
return new $class;
}
/**
* Start Tag
*
* @param [object] $parser
* @param [string] $name
* @param [array] $attribs
*
* @return [void]
*/
protected function startTag($parser, $name, $attribs)
{
$this->current_tag = $name;
$this->current_attribs = $attribs;
switch ($name)
{
case 'ROW':
$this->current_row['attribs'] = $attribs;
break;
case 'C':
$this->current_cell_attribs = $attribs;
break;
case 'T':
case 'V':
$this->current_content = '';
break;
}
}
/**
* End Tag
*
* @param [object] $parser
* @param [string] $name
*
* @return [void]
*/
protected function endTag($parser, $name)
{
$this->current_tag = '';
$this->current_attribs = [];
switch ($name)
{
case 'ROW':
$this->saveRowSheet();
$this->current_row = [];
break;
case 'C':
$this->current_cell_attribs = [];
break;
case 'T':
$this->saveSharedStringData($this->current_content);
$this->shared_string_index++;
break;
case 'V':
$this->current_row['cells'][] = [
'data' => $this->current_content,
'attribs' => $this->current_cell_attribs
];
break;
}
}
/**
* Contents
*
* @param [type] $parser [description]
* @param [type] $data [description]
*
* @return [void]
*/
protected function contents($parser, $data)
{
switch ($this->current_tag)
{
case 'T':
case 'V':
$this->current_content .= $data;
break;
}
}
/**
* Start Save Shared Strings
*
* @return [void]
*/
protected function startSaveSharedStrings()
{
switch ($this->storage_shared_string)
{
case 'file':
if (!is_dir($this->dir_shared_string)) mkdir($dir, 0777, true);
$this->stream_shared_string = fopen($this->file_shared_string, 'w');
break;
}
}
/**
* End Save Shared Strings
*
* @return [void]
*/
protected function endSaveSharedStrings()
{
switch ($this->storage_shared_string)
{
case 'file':
fclose($this->stream_shared_string);
break;
}
}
/**
* Save Shared Strings
*
* @param [sresource] $[stream]
*
* @return [void]
*/
protected function saveSharedStrings($stream)
{
if ($stream)
{
$this->current_file = $this->shared_string;
$this->parser = xml_parser_create($this->encodingXmlParser);
xml_set_object($this->parser, $this);
xml_set_element_handler($this->parser, "startTag", "endTag");
xml_set_character_data_handler($this->parser, "contents");
$this->startSaveSharedStrings();
while (!feof($stream))
{
$data = fread($stream, 4096);
xml_parse($this->parser, $data, feof($stream));
}
$this->endSaveSharedStrings();
xml_parser_free($this->parser);
$this->parser = null;
}
}
/**
* Set Storage Shared String
*
* @param [string] $[storage]
*
* @return [void]
*/
public function setStorageSharedString($storage)
{
$this->storage_shared_string = (in_array($storage, ['ram', 'file'])) ? $storage : 'ram';
}
/**
* Set Dir Shared String
*
* @param [string] $[dir]
*
* @return [void]
*/
public function setDirSharedString($dir)
{
if (is_dir($dir)) {
$this->dir_shared_string = $dir;
$this->file_shared_string = $this->dir_shared_string . '/' . $this->name_shared_string . '.txt';
}
}
/**
* Set Name Shared String
*
* @param [string] $[name]
*
* @return [void]
*/
public function setNameSharedString($name)
{
if (is_string($name) && !empty($name)) {
$this->name_shared_string = $dir;
$this->file_shared_string = $this->dir_shared_string . '/' . $this->name_shared_string . '.txt';
}
}
/**
* Set Encoding Xml Parser
*
* @param [string] $[encoding]
*
* @return [void]
*/
public function setEncodingXmlParser($encoding)
{
$this->encodingXmlParser = $encoding;
}
/**
* Set Bytes Shared String
*
* @param [integer] $[bytes]
*
* @return [void]
*/
public function setBytesSharedString($bytes)
{
$this->bytes_shared_string = $bytes;
}
/**
* Open XLSX
*
* @param [string] $[path] [<path to XLSX>]
*
* @return [void]
*/
public function open($path)
{
if (!empty($path) && file_exists($path) && ends_with($path, '.xlsx'))
{
if (empty($this->dir_shared_string)) {
$this->dir_shared_string = dirname($path);
}
if (empty($this->name_shared_string)) {
$this->name_shared_string = 'shared_string_' . str_slug(str_replace('.', ' ', basename($path)), '_');
}
$this->file_shared_string = $this->dir_shared_string . '/' . $this->name_shared_string . '.txt';
$this->zip = new \ZipArchive;
if ($this->zip->open($path))
{
$this->is_open_zip = true;
for ($i = 0; $i < $this->zip->numFiles; $i++)
{
$filename = $this->zip->getNameIndex($i);
if ($filename == 'xl/sharedStrings.xml') {
$this->shared_string = $filename;
}
if (starts_with($filename, 'xl/worksheets/')) {
$this->sheets[] = $filename;
}
}
$this->sheets = array_values(array_sort($this->sheets));
if (!empty($this->shared_string))
{
$stream = $this->zip->getStream($this->shared_string);
if ($stream)
{
$this->saveSharedStrings($stream);
fclose($stream);
}
$this->updateMaxMemory();
}
}
}
}
/**
* Save Shared String Data
*
* @param [string] $data [description]
*
* @return [void]
*/
protected function saveSharedStringData($data)
{
$data = trim($data);
switch ($this->storage_shared_string)
{
case 'file':
$str = $this->shared_string_index . ':' . $data;
$bytes = strlen($str) + 2;
if ($this->bytes_shared_string <= $bytes) {
$str = $this->shared_string_index . ':';
$bytes = strlen($str) + 2;
}
$str = $str . str_repeat(' ', $this->bytes_shared_string - $bytes) . "\r\n";
fwrite($this->stream_shared_string, $str);
break;
default:
$this->shared_string_data[$this->shared_string_index] = $data;
break;
}
}
/**
* Get Shared Data To Index
*
* @param [string] $index [index]
*
* @return [void]
*/
protected function getSharedData($index)
{
$index = (int) trim($index);
switch ($this->storage_shared_string)
{
case 'file':
fseek($this->stream_shared_string, $index * $this->bytes_shared_string);
if (($data = fgets($this->stream_shared_string)) !== false)
{
$start = (string) $index . ':';
if (starts_with($data, $start))
{
return trim(str_replace($start, '', $data));
}
}
return '';
break;
}
return (isset($this->shared_string_data[$index])) ? $this->shared_string_data[$index] : '';
}
/**
* Save Row Sheet To Cache
*
* @param [string] $data [description]
*
* @return [void]
*/
protected function saveRowSheet()
{
$index = $this->current_row['attribs']['R'];
$row = [];
if (isset($this->current_row['cells']))
{
foreach ($this->current_row['cells'] as $cell)
{
$t = isset($cell['attribs']['T']) ? $cell['attribs']['T'] : false;
$r = isset($cell['attribs']['R']) ? $cell['attribs']['R'] : false;
$sym_cell = (!empty($r)) ? str_replace($index, '', $r) : false;
$value = (!empty($t) && $t == 's') ? $this->getSharedData($cell['data']) : trim($cell['data']);
$row[$sym_cell] = $value;
}
}
$this->rows[$index] = $row;
}
/**
* Get Memory Usage
*
* @return [string]
*/
protected function updateMaxMemory()
{
$bytes = memory_get_usage() - $this->start_memory;
if ($bytes > $this->max_memory) {
$this->max_memory = $bytes;
}
}
/**
* Format Bytes
*
* @param [integer] $[bytes]
* @return [string]
*/
protected function formatBytes($bytes)
{
if ($bytes < 1000 * 1024) {
return number_format($bytes / 1024, 2) . " KB";
}
elseif ($bytes < 1000 * 1048576) {
return number_format($bytes / 1048576, 2) . " MB";
}
elseif ($bytes < 1000 * 1073741824) {
return number_format($bytes / 1073741824, 2) . " GB";
}
else {
return number_format($bytes / 1099511627776, 2) . " TB";
}
}
/**
* Get Sheets
*
* @return [boolean]
*/
public function reading(\Closure $callback)
{
$this->updateMaxMemory();
if (!empty($this->sheets))
{
foreach ($this->sheets as $i => $sheet)
{
$streamSheet = $this->zip->getStream($sheet);
if ($streamSheet)
{
$this->stream_shared_string = fopen($this->file_shared_string, 'r');
$this->current_file = $sheet;
$this->current_sheet_index = $i + 1;
$this->parser = xml_parser_create($this->encodingXmlParser);
xml_set_object($this->parser, $this);
xml_set_element_handler($this->parser, "startTag", "endTag");
xml_set_character_data_handler($this->parser, "contents");
while (!feof($streamSheet))
{
$data = fread($streamSheet, 4096);
xml_parse($this->parser, $data, feof($streamSheet));
if (!empty($this->rows))
{
foreach ($this->rows as $row => $cells)
{
$callback($this->current_sheet_index, $row, $cells);
}
}
$this->updateMaxMemory();
$this->rows = [];
}
xml_parser_free($this->parser);
$this->parser = null;
fclose($this->stream_shared_string);
fclose($streamSheet);
}
}
}
return true;
}
/**
* Get Time
*
* @return [string]
*/
public function getTime()
{
$time = microtime(true) - $this->start_time;
return 'Временная метка: ' . round($time, 3) . ' s';
}
/**
* Get Memory Usage
*
* @return [string]
*/
public function getMemoryUsage()
{
$bytes = memory_get_usage() - $this->start_memory;
$f_bytes = $this->formatBytes($bytes);
return 'Использовано памяти: ' . $f_bytes;
}
/**
* Get Memory Usage
*
* @return [string]
*/
public function getMaxMemoryUsage()
{
$this->updateMaxMemory();
$bytes = $this->max_memory;
$f_bytes = $this->formatBytes($bytes);
return 'Максимальное значение использованной памяти: ' . $f_bytes;
}
/**
* Close XLSX
*
* @return [void]
*/
public function close()
{
$this->updateMaxMemory();
if ($this->is_open_zip) {
if ($this->zip->close()) $this->is_open_zip = false;
}
if ($this->storage_shared_string == 'file' && file_exists($this->file_shared_string)) {
unlink($this->file_shared_string);
}
$this->shared_string_data = [];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment