<?php

/**
 * I had to parse an XLSX spreadsheet (which should damn well have been a CSV!)
 * but the usual tools were hitting the memory limit pretty quick. I found that
 * manually parsing the XML worked pretty well. Note that this, most likely,
 * won't work if cells contain anything more than text or a number (so formulas,
 * graphs, etc ..., I don't know what'd happen).
 */

$inputFile = '/path/to/spreadsheet.xlsx';
$dir = '/path/to/tmp/dir';

// Unzip
$zip = new ZipArchive();
$zip->open($inputFile);
$zip->extractTo($dir);

// Open up shared strings & the first worksheet
$strings = simplexml_load_file($dir . '/xl/sharedStrings.xml');
$sheet   = simplexml_load_file($dir . '/xl/worksheets/sheet1.xml');

// Parse the rows
$xlrows = $sheet->sheetData->row;

foreach ($xlrows as $xlrow) {
    $arr = array();
    
    // In each row, grab it's value
    foreach ($xlrow->c as $cell) {
        $v = (string) $cell->v;
        
        // If it has a "t" (type?) of "s" (string?), use the value to look up string value
        if (isset($cell['t']) && $cell['t'] == 's') {
            $s  = array();
            $si = $strings->si[(int) $v];
            
            // Register & alias the default namespace or you'll get empty results in the xpath query
            $si->registerXPathNamespace('n', 'http://schemas.openxmlformats.org/spreadsheetml/2006/main');

            // Cat together all of the 't' (text?) node values
            foreach($si->xpath('.//n:t') as $t) {
                $s[] = (string) $t;
            }

            $v = implode($s);
        }
        
        $arr[] = $v;
    }
    
    // Assuming the first row are headers, stick them in the headers array
    if (count($headers) == 0) {
        $headers = $arr;
    } else {
        // Combine the row with the headers - make sure we have the same column count
        $values = array_pad($arr, count($headers), '');
        $row    = array_combine($headers, $values);
        
        /**
         * Here, do whatever you like with the [header => value] assoc array in $row.
         * It might be useful just to run this script without any code here, to watch
         * memory usage simply iterating over your spreadsheet.
         */
    }
}

@unlink($dir);
@unlink($inputFile);