Last active
November 29, 2020 11:36
-
-
Save AlexanderKomkov/6f7ef3ecc8dc31997a826eaa55b11ee6 to your computer and use it in GitHub Desktop.
Читатель больших Excel XLSX файлов на PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$inputFileName = public_path() . '/test.xlsx'; | |
$xlsxReader = \App\Repositories\Xlsx\Reader::create(); | |
$xlsxReader->setStorageSharedString('file'); | |
$xlsxReader->open($inputFileName); | |
$xlsxReader->reading(function($sheet, $row, $cells) { | |
if (in_array($row, [1,2,3,4,5])) { | |
dump($cells); | |
}; | |
}); | |
$xlsxReader->close(); | |
dump($xlsxReader->getMaxMemoryUsage()); | |
dump($xlsxReader->getTime()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Repositories\Xlsx; | |
class Reader | |
{ | |
/** @var integer memory_get_usage() **/ | |
protected $start_memory = 0; | |
/** @var integer memory_get_usage() **/ | |
protected $max_memory = 0; | |
/** @var integer mmicrotime(true) **/ | |
protected $start_time = 0; | |
/** @var \ZipArchive **/ | |
protected $zip; | |
/** @var boolean **/ | |
protected $is_open_zip = false; | |
/** @var array **/ | |
protected $zip_files = []; | |
/** @var string ram|file **/ | |
protected $storage_shared_string = 'ram'; | |
/** @var string **/ | |
protected $dir_shared_string = ''; | |
/** @var string **/ | |
protected $name_shared_string = ''; | |
/** @var string **/ | |
protected $file_shared_string = ''; | |
/** @var string **/ | |
protected $shared_string = ''; | |
/** @var array **/ | |
protected $sheets = []; | |
/** @var sresource **/ | |
protected $stream_shared_string; | |
/** @var integer **/ | |
protected $bytes_shared_string = 200; | |
/** @var XML Resource **/ | |
protected $parser = null; | |
/** @var string **/ | |
protected $encodingXmlParser = 'UTF-8'; | |
/** @var string **/ | |
protected $current_file = ''; | |
/** @var string **/ | |
protected $current_content = ''; | |
/** @var string **/ | |
protected $current_tag = ''; | |
/** @var array **/ | |
protected $current_attribs = []; | |
/** @var array **/ | |
protected $current_row = []; | |
/** @var array **/ | |
protected $current_cell_attribs = []; | |
/** @var integer **/ | |
protected $shared_string_index = 0; | |
/** @var integer **/ | |
protected $current_sheet_index = 1; | |
/** @var array **/ | |
protected $shared_string_data = []; | |
/** @var array **/ | |
protected $rows = []; | |
/** | |
* Construct | |
* | |
* @return void | |
*/ | |
private function __construct() | |
{ | |
$this->start_memory = memory_get_usage(); | |
$this->start_time = microtime(true); | |
} | |
/** | |
* Create Reader | |
* | |
* @return \App\Repositories\Xlsx\Reader | |
*/ | |
public static function create() | |
{ | |
$class = static::class; | |
return new $class; | |
} | |
/** | |
* Start Tag | |
* | |
* @param [object] $parser | |
* @param [string] $name | |
* @param [array] $attribs | |
* | |
* @return [void] | |
*/ | |
protected function startTag($parser, $name, $attribs) | |
{ | |
$this->current_tag = $name; | |
$this->current_attribs = $attribs; | |
switch ($name) | |
{ | |
case 'ROW': | |
$this->current_row['attribs'] = $attribs; | |
break; | |
case 'C': | |
$this->current_cell_attribs = $attribs; | |
break; | |
case 'T': | |
case 'V': | |
$this->current_content = ''; | |
break; | |
} | |
} | |
/** | |
* End Tag | |
* | |
* @param [object] $parser | |
* @param [string] $name | |
* | |
* @return [void] | |
*/ | |
protected function endTag($parser, $name) | |
{ | |
$this->current_tag = ''; | |
$this->current_attribs = []; | |
switch ($name) | |
{ | |
case 'ROW': | |
$this->saveRowSheet(); | |
$this->current_row = []; | |
break; | |
case 'C': | |
$this->current_cell_attribs = []; | |
break; | |
case 'T': | |
$this->saveSharedStringData($this->current_content); | |
$this->shared_string_index++; | |
break; | |
case 'V': | |
$this->current_row['cells'][] = [ | |
'data' => $this->current_content, | |
'attribs' => $this->current_cell_attribs | |
]; | |
break; | |
} | |
} | |
/** | |
* Contents | |
* | |
* @param [type] $parser [description] | |
* @param [type] $data [description] | |
* | |
* @return [void] | |
*/ | |
protected function contents($parser, $data) | |
{ | |
switch ($this->current_tag) | |
{ | |
case 'T': | |
case 'V': | |
$this->current_content .= $data; | |
break; | |
} | |
} | |
/** | |
* Start Save Shared Strings | |
* | |
* @return [void] | |
*/ | |
protected function startSaveSharedStrings() | |
{ | |
switch ($this->storage_shared_string) | |
{ | |
case 'file': | |
if (!is_dir($this->dir_shared_string)) mkdir($dir, 0777, true); | |
$this->stream_shared_string = fopen($this->file_shared_string, 'w'); | |
break; | |
} | |
} | |
/** | |
* End Save Shared Strings | |
* | |
* @return [void] | |
*/ | |
protected function endSaveSharedStrings() | |
{ | |
switch ($this->storage_shared_string) | |
{ | |
case 'file': | |
fclose($this->stream_shared_string); | |
break; | |
} | |
} | |
/** | |
* Save Shared Strings | |
* | |
* @param [sresource] $[stream] | |
* | |
* @return [void] | |
*/ | |
protected function saveSharedStrings($stream) | |
{ | |
if ($stream) | |
{ | |
$this->current_file = $this->shared_string; | |
$this->parser = xml_parser_create($this->encodingXmlParser); | |
xml_set_object($this->parser, $this); | |
xml_set_element_handler($this->parser, "startTag", "endTag"); | |
xml_set_character_data_handler($this->parser, "contents"); | |
$this->startSaveSharedStrings(); | |
while (!feof($stream)) | |
{ | |
$data = fread($stream, 4096); | |
xml_parse($this->parser, $data, feof($stream)); | |
} | |
$this->endSaveSharedStrings(); | |
xml_parser_free($this->parser); | |
$this->parser = null; | |
} | |
} | |
/** | |
* Set Storage Shared String | |
* | |
* @param [string] $[storage] | |
* | |
* @return [void] | |
*/ | |
public function setStorageSharedString($storage) | |
{ | |
$this->storage_shared_string = (in_array($storage, ['ram', 'file'])) ? $storage : 'ram'; | |
} | |
/** | |
* Set Dir Shared String | |
* | |
* @param [string] $[dir] | |
* | |
* @return [void] | |
*/ | |
public function setDirSharedString($dir) | |
{ | |
if (is_dir($dir)) { | |
$this->dir_shared_string = $dir; | |
$this->file_shared_string = $this->dir_shared_string . '/' . $this->name_shared_string . '.txt'; | |
} | |
} | |
/** | |
* Set Name Shared String | |
* | |
* @param [string] $[name] | |
* | |
* @return [void] | |
*/ | |
public function setNameSharedString($name) | |
{ | |
if (is_string($name) && !empty($name)) { | |
$this->name_shared_string = $dir; | |
$this->file_shared_string = $this->dir_shared_string . '/' . $this->name_shared_string . '.txt'; | |
} | |
} | |
/** | |
* Set Encoding Xml Parser | |
* | |
* @param [string] $[encoding] | |
* | |
* @return [void] | |
*/ | |
public function setEncodingXmlParser($encoding) | |
{ | |
$this->encodingXmlParser = $encoding; | |
} | |
/** | |
* Set Bytes Shared String | |
* | |
* @param [integer] $[bytes] | |
* | |
* @return [void] | |
*/ | |
public function setBytesSharedString($bytes) | |
{ | |
$this->bytes_shared_string = $bytes; | |
} | |
/** | |
* Open XLSX | |
* | |
* @param [string] $[path] [<path to XLSX>] | |
* | |
* @return [void] | |
*/ | |
public function open($path) | |
{ | |
if (!empty($path) && file_exists($path) && ends_with($path, '.xlsx')) | |
{ | |
if (empty($this->dir_shared_string)) { | |
$this->dir_shared_string = dirname($path); | |
} | |
if (empty($this->name_shared_string)) { | |
$this->name_shared_string = 'shared_string_' . str_slug(str_replace('.', ' ', basename($path)), '_'); | |
} | |
$this->file_shared_string = $this->dir_shared_string . '/' . $this->name_shared_string . '.txt'; | |
$this->zip = new \ZipArchive; | |
if ($this->zip->open($path)) | |
{ | |
$this->is_open_zip = true; | |
for ($i = 0; $i < $this->zip->numFiles; $i++) | |
{ | |
$filename = $this->zip->getNameIndex($i); | |
if ($filename == 'xl/sharedStrings.xml') { | |
$this->shared_string = $filename; | |
} | |
if (starts_with($filename, 'xl/worksheets/')) { | |
$this->sheets[] = $filename; | |
} | |
} | |
$this->sheets = array_values(array_sort($this->sheets)); | |
if (!empty($this->shared_string)) | |
{ | |
$stream = $this->zip->getStream($this->shared_string); | |
if ($stream) | |
{ | |
$this->saveSharedStrings($stream); | |
fclose($stream); | |
} | |
$this->updateMaxMemory(); | |
} | |
} | |
} | |
} | |
/** | |
* Save Shared String Data | |
* | |
* @param [string] $data [description] | |
* | |
* @return [void] | |
*/ | |
protected function saveSharedStringData($data) | |
{ | |
$data = trim($data); | |
switch ($this->storage_shared_string) | |
{ | |
case 'file': | |
$str = $this->shared_string_index . ':' . $data; | |
$bytes = strlen($str) + 2; | |
if ($this->bytes_shared_string <= $bytes) { | |
$str = $this->shared_string_index . ':'; | |
$bytes = strlen($str) + 2; | |
} | |
$str = $str . str_repeat(' ', $this->bytes_shared_string - $bytes) . "\r\n"; | |
fwrite($this->stream_shared_string, $str); | |
break; | |
default: | |
$this->shared_string_data[$this->shared_string_index] = $data; | |
break; | |
} | |
} | |
/** | |
* Get Shared Data To Index | |
* | |
* @param [string] $index [index] | |
* | |
* @return [void] | |
*/ | |
protected function getSharedData($index) | |
{ | |
$index = (int) trim($index); | |
switch ($this->storage_shared_string) | |
{ | |
case 'file': | |
fseek($this->stream_shared_string, $index * $this->bytes_shared_string); | |
if (($data = fgets($this->stream_shared_string)) !== false) | |
{ | |
$start = (string) $index . ':'; | |
if (starts_with($data, $start)) | |
{ | |
return trim(str_replace($start, '', $data)); | |
} | |
} | |
return ''; | |
break; | |
} | |
return (isset($this->shared_string_data[$index])) ? $this->shared_string_data[$index] : ''; | |
} | |
/** | |
* Save Row Sheet To Cache | |
* | |
* @param [string] $data [description] | |
* | |
* @return [void] | |
*/ | |
protected function saveRowSheet() | |
{ | |
$index = $this->current_row['attribs']['R']; | |
$row = []; | |
if (isset($this->current_row['cells'])) | |
{ | |
foreach ($this->current_row['cells'] as $cell) | |
{ | |
$t = isset($cell['attribs']['T']) ? $cell['attribs']['T'] : false; | |
$r = isset($cell['attribs']['R']) ? $cell['attribs']['R'] : false; | |
$sym_cell = (!empty($r)) ? str_replace($index, '', $r) : false; | |
$value = (!empty($t) && $t == 's') ? $this->getSharedData($cell['data']) : trim($cell['data']); | |
$row[$sym_cell] = $value; | |
} | |
} | |
$this->rows[$index] = $row; | |
} | |
/** | |
* Get Memory Usage | |
* | |
* @return [string] | |
*/ | |
protected function updateMaxMemory() | |
{ | |
$bytes = memory_get_usage() - $this->start_memory; | |
if ($bytes > $this->max_memory) { | |
$this->max_memory = $bytes; | |
} | |
} | |
/** | |
* Format Bytes | |
* | |
* @param [integer] $[bytes] | |
* @return [string] | |
*/ | |
protected function formatBytes($bytes) | |
{ | |
if ($bytes < 1000 * 1024) { | |
return number_format($bytes / 1024, 2) . " KB"; | |
} | |
elseif ($bytes < 1000 * 1048576) { | |
return number_format($bytes / 1048576, 2) . " MB"; | |
} | |
elseif ($bytes < 1000 * 1073741824) { | |
return number_format($bytes / 1073741824, 2) . " GB"; | |
} | |
else { | |
return number_format($bytes / 1099511627776, 2) . " TB"; | |
} | |
} | |
/** | |
* Get Sheets | |
* | |
* @return [boolean] | |
*/ | |
public function reading(\Closure $callback) | |
{ | |
$this->updateMaxMemory(); | |
if (!empty($this->sheets)) | |
{ | |
foreach ($this->sheets as $i => $sheet) | |
{ | |
$streamSheet = $this->zip->getStream($sheet); | |
if ($streamSheet) | |
{ | |
$this->stream_shared_string = fopen($this->file_shared_string, 'r'); | |
$this->current_file = $sheet; | |
$this->current_sheet_index = $i + 1; | |
$this->parser = xml_parser_create($this->encodingXmlParser); | |
xml_set_object($this->parser, $this); | |
xml_set_element_handler($this->parser, "startTag", "endTag"); | |
xml_set_character_data_handler($this->parser, "contents"); | |
while (!feof($streamSheet)) | |
{ | |
$data = fread($streamSheet, 4096); | |
xml_parse($this->parser, $data, feof($streamSheet)); | |
if (!empty($this->rows)) | |
{ | |
foreach ($this->rows as $row => $cells) | |
{ | |
$callback($this->current_sheet_index, $row, $cells); | |
} | |
} | |
$this->updateMaxMemory(); | |
$this->rows = []; | |
} | |
xml_parser_free($this->parser); | |
$this->parser = null; | |
fclose($this->stream_shared_string); | |
fclose($streamSheet); | |
} | |
} | |
} | |
return true; | |
} | |
/** | |
* Get Time | |
* | |
* @return [string] | |
*/ | |
public function getTime() | |
{ | |
$time = microtime(true) - $this->start_time; | |
return 'Временная метка: ' . round($time, 3) . ' s'; | |
} | |
/** | |
* Get Memory Usage | |
* | |
* @return [string] | |
*/ | |
public function getMemoryUsage() | |
{ | |
$bytes = memory_get_usage() - $this->start_memory; | |
$f_bytes = $this->formatBytes($bytes); | |
return 'Использовано памяти: ' . $f_bytes; | |
} | |
/** | |
* Get Memory Usage | |
* | |
* @return [string] | |
*/ | |
public function getMaxMemoryUsage() | |
{ | |
$this->updateMaxMemory(); | |
$bytes = $this->max_memory; | |
$f_bytes = $this->formatBytes($bytes); | |
return 'Максимальное значение использованной памяти: ' . $f_bytes; | |
} | |
/** | |
* Close XLSX | |
* | |
* @return [void] | |
*/ | |
public function close() | |
{ | |
$this->updateMaxMemory(); | |
if ($this->is_open_zip) { | |
if ($this->zip->close()) $this->is_open_zip = false; | |
} | |
if ($this->storage_shared_string == 'file' && file_exists($this->file_shared_string)) { | |
unlink($this->file_shared_string); | |
} | |
$this->shared_string_data = []; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment