Skip to content

Instantly share code, notes, and snippets.

@pizzavomito
Last active May 4, 2021 04:45
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pizzavomito/072d45ee10317261c04c7c5021aa368c to your computer and use it in GitHub Desktop.
Save pizzavomito/072d45ee10317261c04c7c5021aa368c to your computer and use it in GitHub Desktop.
Reading CSV file with SplFileObject. Define a header or manage the header line from CSV file. Returns rows in arrays with header values as array keys.
<?php
/**
* MIT License
*
* Copyright (c) 2017 Pascal Roux
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Class CsvReader
*
* Reading CSV file with SplFileObject.
* Define a header or manage the header line from CSV file.
* Returns rows in arrays with header values as array keys.
*
* If a header is defined then for each row we will check that
* the number of columns corresponds to the number of header elements.
* If this does not match then the line is considered invalid.
* An invalid line in a CSV file will be returned as an array
* comprising a single null field unless using skipInvalid.
*
* Use the safeMode so that the reader still returns invalid lines.
*/
class CsvReader implements \Iterator, \Countable
{
/**
* @var array;
*/
private $data;
/**
* @var bool
*/
private $hasHeader = false;
/**
* @var array
*/
private $header = [];
/**
* @var int
*/
private $countHeader = 0;
/**
* @var string
*/
private $filename;
/**
* @var \SplFileObject
*/
private $file;
/**
* @var int
*/
private $count = -1;
/**
* @var array
*/
private $invalids = [];
/**
* @var bool
*/
private $skipInvalid = false;
/**
* @var bool
*/
private $safeMode = false;
/**
* CsvReader constructor.
* @param $filename
* @param string $delimiter
* @param string $enclosure
* @param string $escape
*/
public function __construct($filename, $delimiter=',', $enclosure='"', $escape='\\')
{
$this->filename = $filename;
$this->file = new \SplFileObject($this->filename, 'r');
$this->file->setFlags(
\SplFileObject::READ_CSV
| \SplFileObject::READ_AHEAD
| \SplFileObject::SKIP_EMPTY
| \SplFileObject::DROP_NEW_LINE
);
$this->file->setCsvControl($delimiter, $enclosure, $escape);
}
/**
* {@inheritdoc}
*/
public function __destruct()
{
unset($this->file);
}
/**
* @throws \Exception
*/
private function computeHeader()
{
if ($this->countHeader === 0 && $this->hasHeader === true) {
$currentPosition = $this->file->key();
$this->file->seek(0);
$this->header = $this->file->current();
$this->file->seek($currentPosition);
}
if ($this->countHeader === 0 && count($this->header) > 0) {
$header = array_unique(array_filter($this->header, 'is_string'));
if ($this->header !== $header) {
throw new \Exception('The header must be empty or a flat array with unique string values');
}
$this->header = $header;
$this->countHeader = count($this->header);
}
}
/**
* Convert current line to array with header values as array keys
*/
private function buildData()
{
if ($this->file->valid() === false) {
return;
}
$this->data = [];
$line = $this->combineHeader();
$this->data = $line;
}
/**
* @return array
*/
private function combineHeader()
{
$this->computeHeader();
if ($this->hasHeader === true && $this->file->key() === 0) {
$this->file->next();
}
$line = $this->file->current();
if ($this->safeMode === false) {
try {
$this->ensureIsValid($line);
} catch (\LogicException $e) {
$this->invalids[$this->key()] = $e->getMessage();
if ($this->skipInvalid === true) {
$this->file->next();
$this->buildData();
}
return [null];
}
}
if ($this->countHeader > 0) {
$line = array_slice(array_pad($line, $this->countHeader, null), 0, $this->countHeader);
//fill header keys
$line = array_combine($this->header, $line);
}
return $line;
}
/**
* Returns true if the file contains at least one invalid line. False otherwise.
*
* @return bool
*/
public function check()
{
$this->read();
return (bool)count($this->invalids) > 0;
}
/**
* @param $line
*/
private function ensureIsValid($line)
{
if ($this->countHeader > 0) {
$countColumn = count($line);
if ($this->countHeader !== $countColumn) {
throw new \LogicException(
sprintf(
'header has %d elements. Found %d columns on line %d.',
$this->countHeader,
$countColumn,
$this->file->key()
)
);
}
}
}
/**
* Read the entire file
*
* @return array
*/
public function read()
{
$rows = [];
$this->file->rewind();
while ($this->file->eof() === false) {
$this->file->next();
$this->buildData();
$rows[] = $this->data;
}
$this->file->rewind();
return $rows;
}
/**
* @return int
*/
public function count()
{
if (-1 === $this->count) {
$this->rewind();
$count = iterator_count($this->file);
if ($count > 0 && $this->hasHeader === true) {
$count--;
}
$this->count = $count;
$this->rewind();
}
return $this->count;
}
/**
* Return the first row
*
* @return array
*/
public function first()
{
return $this->rewind()->current();
}
/**
* Move to specified line
*
* @param int $position
* @return $this
*/
public function move($position)
{
$this->file->seek($position);
return $this;
}
/**
* Rewind iterator to the first element
*/
public function rewind()
{
if ($this->file) {
$this->file->rewind();
}
$this->buildData();
return $this;
}
/**
* Return the current row
*
* @return array
*/
public function current()
{
$this->buildData();
return $this->data;
}
/**
* Return the key of the current row
*
* @return int
*/
public function key()
{
return $this->file->key();
}
/**
* Return the next row
*
* @return array
*/
public function next()
{
$this->file->next();
$this->buildData();
return $this->data;
}
/**
* Check if current position is valid
*
* @return bool
*/
public function valid()
{
return $this->file->valid();
}
/**
* @param bool $hasHeader
* @return CsvReader
*/
public function setHasHeader($hasHeader)
{
$this->hasHeader = $hasHeader;
return $this;
}
/**
* @param array $header
* @return CsvReader
*/
public function setHeader(array $header)
{
$this->header = $header;
return $this;
}
/**
* @param bool $skipInvalid
* @return CsvReader
*/
public function setSkipInvalid($skipInvalid)
{
$this->skipInvalid = $skipInvalid;
return $this;
}
/**
* @param bool $safeMode
* @return CsvReader
*/
public function setSafeMode($safeMode)
{
$this->safeMode = $safeMode;
return $this;
}
/**
* @return array
*/
public function getInvalids()
{
return $this->invalids;
}
/**
* @return array
*/
public function getHeader()
{
return $this->header;
}
}
@ibering
Copy link

ibering commented Oct 28, 2020

Hi @pizzavomito

could you please add some license information 🙂

Best regards
Irina

@pizzavomito
Copy link
Author

Hi @ibering

it's done :)

@ibering
Copy link

ibering commented Dec 18, 2020

Hi @pizzavomito

Thanks :)

@trungx
Copy link

trungx commented May 4, 2021

awesome class!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment