Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
<?php
/*
SimpleXLSX php class v0.6.8
MS Excel 2007 workbooks reader
Example 1:
$xlsx = new SimpleXLSX('book.xlsx');
print_r( $xlsx->rows() );
Example 2:
$xlsx = new SimpleXLSX('book.xlsx');
print_r( $xlsx->rowsEx() );
Example 3:
$xlsx = new SimpleXLSX('book.xlsx');
print_r( $xlsx->rows(2) ); // second worksheet
Example 4.1:
$xlsx = new SimpleXLSX('book.xlsx');
print_r( $xlsx->sheetNames() ); // array( 1 => 'Sheet 1', 3 => 'Catalog' );
Example 4.2:
$xlsx = new SimpleXLSX('book.xlsx');
echo 'Sheet Name 2 = '.$xlsx->sheetName(2);
Example 5:
$xlsx = new SimpleXLSX('book.xlsx');
if ($xslx->success())
print_r( $xlsx->rows() );
else
echo 'xlsx error: '.$xslx->error();
Example 6:
$xslx = new SimpleXLSX( file_get_contents('http://www.example.com/example.xlsx'), true);
list($num_cols, $num_rows) = $xlsx->dimension(2);
echo $xlsx->sheetName(2).':'.$num_cols.'x'.$num_rows;
v0.6.8 (2013-10-13) fixed dimension() where 1 row only, fixed rowsEx() empty cells indexes (Daniel Stastka)
v0.6.7 (2013-08-10) fixed unzip (mac), added $debug param to _constructor to display errors
v0.6.6 (2013-06-03) +entryExists(),
v0.6.5 (2013-03-18) fixed sheetName()
v0.6.4 (2013-03-13) rowsEx(), _parse(): fixed date column type & format detection
v0.6.3 (2013-03-13) rowsEx(): fixed formulas, added date type 'd', added format 'format'
dimension(): fixed empty sheet dimension
+ sheetNames() - returns array( sheet_id => sheet_name, sheet_id2 => sheet_name2 ...)
v0.6.2 (2012-10-04) fixed empty cells, rowsEx() returns type and formulas now
v0.6.1 (2012-09-14) removed "raise exception" and fixed _unzip
v0.6 (2012-09-13) success(), error(), __constructor( $filename, $is_data = false )
v0.5.1 (2012-09-13) sheetName() fixed
v0.5 (2012-09-12) sheetName()
v0.4 sheets(), sheetsCount(), unixstamp( $excelDateTime )
v0.3 - fixed empty cells (Gonzo patch)
*/
class SimpleXLSX {
// Don't remove this string! Created by Sergey Schuchkin from http://www.sibvision.ru - professional php developers team 2010-2013
private $workbook;
private $sheets;
private $styles;
private $hyperlinks;
private $package = array(
'filename' => '',
'mtime' => 0,
'size' => 0,
'comment' => '',
'entries' => array()
);
private $sharedstrings;
private $error = false;
private $debug = false;
// scheme
const SCHEMA_REL_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
const SCHEMA_REL_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
const SCHEMA_REL_WORKSHEET = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
const SCHEMA_REL_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles';
private $workbook_cell_formats = array();
private $built_in_cell_formats = array(
0 => 'General',
1 => '0',
2 => '0.00',
3 => '#,##0',
4 => '#,##0.00',
9 => '0%',
10 => '0.00%',
11 => '0.00E+00',
12 => '# ?/?',
13 => '# ??/??',
14 => 'mm-dd-yy',
15 => 'd-mmm-yy',
16 => 'd-mmm',
17 => 'mmm-yy',
18 => 'h:mm AM/PM',
19 => 'h:mm:ss AM/PM',
20 => 'h:mm',
21 => 'h:mm:ss',
22 => 'm/d/yy h:mm',
37 => '#,##0 ;(#,##0)',
38 => '#,##0 ;[Red](#,##0)',
39 => '#,##0.00;(#,##0.00)',
40 => '#,##0.00;[Red](#,##0.00)',
44 => '_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)',
45 => 'mm:ss',
46 => '[h]:mm:ss',
47 => 'mmss.0',
48 => '##0.0E+0',
49 => '@',
27 => '[$-404]e/m/d',
30 => 'm/d/yy',
36 => '[$-404]e/m/d',
50 => '[$-404]e/m/d',
57 => '[$-404]e/m/d',
59 => 't0',
60 => 't0.00',
61 => 't#,##0',
62 => 't#,##0.00',
67 => 't0%',
68 => 't0.00%',
69 => 't# ?/?',
70 => 't# ??/??',
);
function __construct( $filename, $is_data = false, $debug = false ) {
$this->debug = $debug;
$this->_unzip( $filename, $is_data );
$this->_parse();
}
function sheets() {
return $this->sheets;
}
function sheetsCount() {
return count($this->sheets);
}
function sheetName( $worksheet_id) {
foreach( $this->workbook->sheets->sheet as $s ) {
if ( $s->attributes('r',true)->id == 'rId'.$worksheet_id)
return (string) $s['name'];
}
return false;
}
function sheetNames() {
$result = array();
foreach( $this->workbook->sheets->sheet as $s ) {
$result[ substr( $s->attributes('r',true)->id, 3) ] = (string) $s['name'];
}
return $result;
}
function worksheet( $worksheet_id ) {
if ( isset( $this->sheets[ $worksheet_id ] ) ) {
$ws = $this->sheets[ $worksheet_id ];
if (isset($ws->hyperlinks)) {
$this->hyperlinks = array();
foreach( $ws->hyperlinks->hyperlink as $hyperlink ) {
$this->hyperlinks[ (string) $hyperlink['ref'] ] = (string) $hyperlink['display'];
}
}
return $ws;
} else {
$this->error( 'Worksheet '.$worksheet_id.' not found. Try $xlsx->rows('.implode(') or $xlsx->rows(', array_keys($this->sheets)).')' );
return false;
}
}
function dimension( $worksheet_id = 1 ) {
if (($ws = $this->worksheet( $worksheet_id)) === false)
return false;
$ref = (string) $ws->dimension['ref'];
if( strpos( $ref, ':') !== false ) {
$d = explode(':', $ref);
if(!isset($d[1]))
return array(0,0);
$index = $this->_columnIndex( $d[1] );
return array( $index[0]+1, $index[1]+1);
} else if ( strlen( $ref ) ) { // 0.6.8
$index = $this->_columnIndex( $ref );
return array( $index[0]+1, $index[1]+1);
} else
return array(0,0);
}
// sheets numeration: 1,2,3....
function rows( $worksheet_id = 1 ) {
if (($ws = $this->worksheet( $worksheet_id)) === false)
return false;
$rows = array();
$curR = 0;
list($cols,) = $this->dimension( $worksheet_id );
foreach ($ws->sheetData->row as $row) {
// echo 'row<br />';
foreach ($row->c as $c) {
list($curC,) = $this->_columnIndex((string) $c['r']);
$rows[ $curR ][ $curC ] = $this->value($c);
}
for ($i = 0; $i < $cols; $i++)
if (!isset($rows[$curR][$i]))
$rows[ $curR ][ $i ] = '';
ksort( $rows[ $curR ] );
$curR++;
}
return $rows;
}
function rowsEx( $worksheet_id = 1 ) {
if (($ws = $this->worksheet( $worksheet_id)) === false)
return false;
$rows = array();
$curR = 0;
list($cols,) = $this->dimension( $worksheet_id );
foreach ($ws->sheetData->row as $row) {
foreach ($row->c as $c) {
list($curC,) = $this->_columnIndex((string) $c['r']);
$t = (string)$c['t'];
$s = (int)$c['s'];
if ($s > 0 && isset($this->workbook_cell_formats[ $s ])) {
$format = $this->workbook_cell_formats[ $s ]['format'];
if ( strpos($format,'m') !== false )
$t = 'd';
} else
$format = '';
$rows[ $curR ][ $curC ] = array(
'type' => $t,
'name' => (string) $c['r'],
'value' => $this->value($c),
'href' => $this->href( $c ),
'f' => (string) $c->f,
'format' => $format
);
}
for ($i = 0; $i < $cols; $i++) {
if ( !isset($rows[$curR][$i]) ) {
// 0.6.8
for ($c = '', $j = $i; $j >= 0; $j = intval($j / 26) - 1)
$c = chr( $j % 26 + 65 ) . $c;
$rows[ $curR ][$i] = array(
'type' => '',
// 'name' => chr($i + 65).($curR+1),
'name' => $c.($curR+1),
'value' => '',
'href' => '',
'f' => '',
'format' => ''
);
}
}
ksort( $rows[ $curR ] );
$curR++;
}
return $rows;
}
// thx Gonzo
private function _columnIndex( $cell = 'A1' ) {
if (preg_match("/([A-Z]+)(\d+)/", $cell, $matches)) {
$col = $matches[1];
$row = $matches[2];
$colLen = strlen($col);
$index = 0;
for ($i = $colLen-1; $i >= 0; $i--)
$index += (ord($col{$i}) - 64) * pow(26, $colLen-$i-1);
return array($index-1, $row-1);
} else
throw new Exception("Invalid cell index.");
}
function value( $cell ) {
// Determine data type
$dataType = (string) $cell['t'];
switch ($dataType) {
case "s":
// Value is a shared string
if ((string)$cell->v != '') {
$value = $this->sharedstrings[intval($cell->v)];
} else {
$value = '';
}
break;
case "b":
// Value is boolean
$value = (string)$cell->v;
if ($value == '0') {
$value = false;
} else if ($value == '1') {
$value = true;
} else {
$value = (bool)$cell->v;
}
break;
case "inlineStr":
// Value is rich text inline
$value = $this->_parseRichText($cell->is);
break;
case "e":
// Value is an error message
if ((string)$cell->v != '') {
$value = (string)$cell->v;
} else {
$value = '';
}
break;
default:
// Value is a string
$value = (string)$cell->v;
// Check for numeric values
if (is_numeric($value) && $dataType != 's') {
if ($value == (int)$value) $value = (int)$value;
elseif ($value == (float)$value) $value = (float)$value;
}
}
return $value;
}
function href( $cell ) {
return isset( $this->hyperlinks[ (string) $cell['r'] ] ) ? $this->hyperlinks[ (string) $cell['r'] ] : '';
}
function styles() {
return $this->styles;
}
function _unzip( $filename, $is_data = false ) {
// Clear current file
$this->datasec = array();
if ($is_data) {
$this->package['filename'] = 'default.xlsx';
$this->package['mtime'] = time();
$this->package['size'] = strlen( $filename );
$vZ = $filename;
} else {
if (!is_readable($filename)) {
$this->error( 'File not found' );
return false;
}
// Package information
$this->package['filename'] = $filename;
$this->package['mtime'] = filemtime( $filename );
$this->package['size'] = filesize( $filename );
// Read file
$oF = fopen($filename, 'rb');
$vZ = fread($oF, $this->package['size']);
fclose($oF);
}
// Cut end of central directory
/* $aE = explode("\x50\x4b\x05\x06", $vZ);
if (count($aE) == 1) {
$this->error('Unknown format');
return false;
}
*/
if ( ($pcd = strrpos( $vZ, "\x50\x4b\x05\x06" )) === false ) {
$this->error('Unknown format');
return false;
}
$aE = array(
0 => substr( $vZ, 0, $pcd ),
1 => substr( $vZ, $pcd + 3 )
);
// Normal way
$aP = unpack('x16/v1CL', $aE[1]);
$this->package['comment'] = substr($aE[1], 18, $aP['CL']);
// Translates end of line from other operating systems
$this->package['comment'] = strtr($this->package['comment'], array("\r\n" => "\n", "\r" => "\n"));
// Cut the entries from the central directory
$aE = explode("\x50\x4b\x01\x02", $vZ);
// Explode to each part
$aE = explode("\x50\x4b\x03\x04", $aE[0]);
// Shift out spanning signature or empty entry
array_shift($aE);
// Loop through the entries
foreach ($aE as $vZ) {
$aI = array();
$aI['E'] = 0;
$aI['EM'] = '';
// Retrieving local file header information
// $aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL', $vZ);
$aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL/v1EFL', $vZ);
// Check if data is encrypted
// $bE = ($aP['GPF'] && 0x0001) ? TRUE : FALSE;
$bE = false;
$nF = $aP['FNL'];
$mF = $aP['EFL'];
// Special case : value block after the compressed data
if ($aP['GPF'] & 0x0008) {
$aP1 = unpack('V1CRC/V1CS/V1UCS', substr($vZ, -12));
$aP['CRC'] = $aP1['CRC'];
$aP['CS'] = $aP1['CS'];
$aP['UCS'] = $aP1['UCS'];
// 2013-08-10
$vZ = substr($vZ, 0, -12);
if (substr($vZ,-4) === "\x50\x4b\x07\x08")
$vZ = substr($vZ, 0, -4);
}
// Getting stored filename
$aI['N'] = substr($vZ, 26, $nF);
if (substr($aI['N'], -1) == '/') {
// is a directory entry - will be skipped
continue;
}
// Truncate full filename in path and filename
$aI['P'] = dirname($aI['N']);
$aI['P'] = $aI['P'] == '.' ? '' : $aI['P'];
$aI['N'] = basename($aI['N']);
$vZ = substr($vZ, 26 + $nF + $mF);
if ( strlen($vZ) != $aP['CS'] ) { // check only if availabled
$aI['E'] = 1;
$aI['EM'] = 'Compressed size is not equal with the value in header information.';
} else {
if ($bE) {
$aI['E'] = 5;
$aI['EM'] = 'File is encrypted, which is not supported from this class.';
} else {
switch($aP['CM']) {
case 0: // Stored
// Here is nothing to do, the file ist flat.
break;
case 8: // Deflated
$vZ = gzinflate($vZ);
break;
case 12: // BZIP2
if (! extension_loaded('bz2')) {
if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') {
@dl('php_bz2.dll');
} else {
@dl('bz2.so');
}
}
if (extension_loaded('bz2')) {
$vZ = bzdecompress($vZ);
} else {
$aI['E'] = 7;
$aI['EM'] = "PHP BZIP2 extension not available.";
}
break;
default:
$aI['E'] = 6;
$aI['EM'] = "De-/Compression method {$aP['CM']} is not supported.";
}
if (! $aI['E']) {
if ($vZ === FALSE) {
$aI['E'] = 2;
$aI['EM'] = 'Decompression of data failed.';
} else {
if (strlen($vZ) != $aP['UCS']) {
$aI['E'] = 3;
$aI['EM'] = 'Uncompressed size is not equal with the value in header information.';
} else {
if (crc32($vZ) != $aP['CRC']) {
$aI['E'] = 4;
$aI['EM'] = 'CRC32 checksum is not equal with the value in header information.';
}
}
}
}
}
}
$aI['D'] = $vZ;
// DOS to UNIX timestamp
$aI['T'] = mktime(($aP['FT'] & 0xf800) >> 11,
($aP['FT'] & 0x07e0) >> 5,
($aP['FT'] & 0x001f) << 1,
($aP['FD'] & 0x01e0) >> 5,
($aP['FD'] & 0x001f),
(($aP['FD'] & 0xfe00) >> 9) + 1980);
//$this->Entries[] = &new SimpleUnzipEntry($aI);
$this->package['entries'][] = array(
'data' => $aI['D'],
'error' => $aI['E'],
'error_msg' => $aI['EM'],
'name' => $aI['N'],
'path' => $aI['P'],
'time' => $aI['T']
);
} // end for each entries
}
function getPackage() {
return $this->package;
}
function entryExists( $name ) { // 0.6.6
$dir = dirname( $name );
$name = basename( $name );
foreach( $this->package['entries'] as $entry)
if ( $entry['path'] == $dir && $entry['name'] == $name)
return true;
return false;
}
function getEntryData( $name ) {
$dir = dirname( $name );
$name = basename( $name );
foreach( $this->package['entries'] as $entry)
if ( $entry['path'] == $dir && $entry['name'] == $name)
return $entry['data'];
$this->error('Unknown format');
return false;
}
function getEntryXML( $name ) {
if ( ($entry_xml = $this->getEntryData( $name ))
&& ($entry_xmlobj = simplexml_load_string( $entry_xml )))
return $entry_xmlobj;
$this->error('Entry not found: '.$name );
return false;
}
function unixstamp( $excelDateTime ) {
$d = floor( $excelDateTime ); // seconds since 1900
$t = $excelDateTime - $d;
return ($d > 0) ? ( $d - 25569 ) * 86400 + $t * 86400 : $t * 86400;
}
function error( $set = false ) {
if ($set) {
$this->error = $set;
if ($this->debug)
trigger_error( __CLASS__.': '.$set, E_USER_WARNING );
} else {
return $this->error;
}
}
function success() {
return !$this->error;
}
function _parse() {
// Document data holders
$this->sharedstrings = array();
$this->sheets = array();
// $this->styles = array();
// Read relations and search for officeDocument
if ( $relations = $this->getEntryXML("_rels/.rels" ) ) {
foreach ($relations->Relationship as $rel) {
if ($rel["Type"] == SimpleXLSX::SCHEMA_REL_OFFICEDOCUMENT) {
// echo 'workbook found<br />';
// Found office document! Read workbook & relations...
// Workbook
if ( $this->workbook = $this->getEntryXML( $rel['Target'] )) {
// echo 'workbook read<br />';
if ( $workbookRelations = $this->getEntryXML( dirname($rel['Target']) . '/_rels/workbook.xml.rels' )) {
// echo 'workbook relations<br />';
// Loop relations for workbook and extract sheets...
foreach ($workbookRelations->Relationship as $workbookRelation) {
$path = dirname($rel['Target']) . '/' . $workbookRelation['Target'];
if ($workbookRelation['Type'] == SimpleXLSX::SCHEMA_REL_WORKSHEET) { // Sheets
// echo 'sheet<br />';
if ( $sheet = $this->getEntryXML( $path ) ) {
$this->sheets[ str_replace( 'rId', '', (string) $workbookRelation['Id']) ] = $sheet;
// echo '<pre>'.htmlspecialchars( print_r( $sheet, true ) ).'</pre>';
}
} else if ($workbookRelation['Type'] == SimpleXLSX::SCHEMA_REL_SHAREDSTRINGS && $this->entryExists( $path )) { // 0.6.6
// echo 'sharedstrings<br />';
if ( $sharedStrings = $this->getEntryXML( $path ) ) {
foreach ($sharedStrings->si as $val) {
if (isset($val->t)) {
$this->sharedstrings[] = (string)$val->t;
} elseif (isset($val->r)) {
$this->sharedstrings[] = $this->_parseRichText($val);
}
}
}
} else if ($workbookRelation['Type'] == SimpleXLSX::SCHEMA_REL_STYLES) {
$this->styles = $this->getEntryXML( $path );
$nf = array();
if ( $this->styles->numFmts->numFmt != NULL )
foreach( $this->styles->numFmts->numFmt as $v )
$nf[ (int) $v['numFmtId'] ] = (string) $v['formatCode'];
if ( $this->styles->cellXfs->xf != NULL )
foreach( $this->styles->cellXfs->xf as $v ) {
$v = (array) $v->attributes();
$v = $v['@attributes'];
if (isset($this->built_in_cell_formats[ $v['numFmtId'] ]) )
$v['format'] = $this->built_in_cell_formats[ $v['numFmtId'] ];
else if (isset($nf[ $v['numFmtId'] ]))
$v['format'] = $nf[ $v['numFmtId'] ];
else
$v['format'] = '';
$this->workbook_cell_formats[] = $v;
}
// print_r( $this->workbook_cell_formats );
}
}
break;
}
}
}
}
}
// Sort sheets
ksort($this->sheets);
}
private function _parseRichText($is = null) {
$value = array();
if (isset($is->t)) {
$value[] = (string)$is->t;
} else {
foreach ($is->r as $run) {
$value[] = (string)$run->t;
}
}
return implode(' ', $value);
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.