Skip to content

Instantly share code, notes, and snippets.

@juanparati
Created February 19, 2018 10:54
Show Gist options
  • Save juanparati/97db4c7c0f93689337f542dff1aea31a to your computer and use it in GitHub Desktop.
Save juanparati/97db4c7c0f93689337f542dff1aea31a to your computer and use it in GitHub Desktop.
CSV reader that support field maps
<?php
/**
* Class Model_CSVReader
*
* Parse and read CSV files as a stream keeping a low memory footprint
*/
class Model_CSVReader
{
/**
* @var null|resource CSV file pointer
*/
protected $fp = null;
/**
* @var string CSV charset encoding
* @link http://php.net/manual/en/function.mb-convert-encoding.php
*/
protected $charset;
/**
* Default decimal separator
*
* @var string
*/
protected $decimal_sep;
/**
* @var string CSV column delimiter
* @link http://php.net/manual/en/function.fgetcsv.php
*/
protected $delimiter;
/**
* @var string
* @link http://php.net/manual/en/function.fgetcsv.php
*/
protected $closure_char;
/**
* Column -> Field name map
* @var array
*/
protected $fieldmap = [];
/**
* Field properties (Only when field map is used)
*
* @var array
*/
protected $field_props = [];
/**
* Model_CSVReader constructor.
*
* @param string $file
* @param string $delimiter
* @param string $closure_char
* @param string $charset
* @param string $currency
*/
public function __construct($file, $delimiter = ';', $closure_char = '"', $charset = 'UTF-8', $decimal_sep = ',')
{
ini_set('auto_detect_line_endings', true);
$this->fp = fopen($file, "r");
if (!$this->fp)
{
Log::instance()->add(Log::ERROR, 'Unable to read CSV file: ' . $file);
throw new Exception('Unable to read CSV file: ' . $file);
}
$this->delimiter = $delimiter;
$this->closure_char = empty($closure_char) ? '"' : $closure_char;
$this->charset = $charset;
$this->decimal_sep = $decimal_sep;
}
/**
* Set the field mapping (Used with CSV that have header columns)
*
* @param $fields
* @param int $header_row
* @return bool
*/
public function set_mapfield($fields, $header_row = 0)
{
// Reset fieldmap and properties
$this->fieldmap = [];
$this->field_props = [];
// Reset pointer position
if ($header_row !== false)
$this->seekLine($header_row);
$columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char);
// Ignore empty header line
if (empty($columns))
return false;
// Ignore lines with less than 2 columns
if (count($columns) < 2)
return false;
// Encode columns
$columns = array_map([$this, 'encode'], $columns);
// Map fields
foreach ($fields as $k => $field)
{
if ($field === false || !isset($field['column']))
continue;
if (is_int($field['column']))
$this->fieldmap[$k] = $field['column'];
else
$this->fieldmap[$k] = array_search($field['column'], $columns);
}
$this->field_props = $fields;
return true;
}
/**
* Read entire data from the CSV and return it structured according to the map fields.
* It is not recommended to use this function for big CSV files.
*
* @param int $header_row
* @return array
*/
public function read($header_row = 1)
{
$this->seekLine($header_row);
$records = [];
// Read CSV
while(($row = $this->readLine()))
$records[] = $row;
return $records;
}
/**
* Read the CSV file line by line
*
* @return array|bool
*/
public function readLine()
{
$columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char);
if (!$columns)
return false;
// Detect empty lines
if (count($columns) === 1)
return true;
$frow = [];
if (empty($this->fieldmap))
$frow[] = $columns;
else
{
foreach ($this->fieldmap as $k => $columnmap)
{
if (isset($columns[$columnmap]))
{
$value = $columns[$columnmap];
// Remove characters
if (!empty($this->field_props[$k]['remove']))
$value = str_replace($this->field_props[$k]['remove'], '', $value);
// Replace characters
if (!empty($this->field_props[$k]['replace']))
{
foreach ($this->field_props[$k]['replace'] as $search_str => $replace_str)
$value = str_replace($search_str, $replace_str, $value);
}
// Extract word segments
if (isset($this->field_props[$k]['segment']) && is_int($this->field_props[$k]['segment']))
{
$segments = explode(' ', $value);
$value = empty($segments[$this->field_props[$k]['segment']]) ? '' : $segments[$this->field_props[$k]['segment']];
}
// Cast
if (!empty($this->field_props[$k]['cast']))
{
switch ($this->field_props[$k]['cast'])
{
case 'int':
case 'integer':
$value = (int)$value;
break;
case 'float':
$value = (float)$value;
break;
case 'string':
$value = (string)$value;
break;
}
}
// Apply exclusion list
if (!empty($this->field_props[$k]['exclude']))
{
if (Arr::expression_found($this->field_props[$k]['exclude'], $value))
$frow['exclude'] = true;
}
// Convert decimal values
$currency = Sanitize::check_currency($value, $this->decimal_sep);
// Save value or string
$frow[$k] = $currency === false ? $this->encode($value) : $currency;
}
}
// Set static values
foreach ($this->field_props as $k => $props)
{
if (is_array($props) && array_key_exists('static_value', $props))
$frow[$k] = $props['static_value'];
}
}
return $frow;
}
/**
* Seek the file pointer to an specific line
*
* @param $line
* @return bool
*/
public function seekLine($line)
{
// Reset file pointer position
rewind($this->fp);
$current = 0;
do
{
if ($line === $current)
return true;
$current++;
} while (fgets($this->fp) !== false);
return false;
}
/**
* Encode a text to UTF-8
*
* @param $text
* @return string
*/
protected function encode($text)
{
if ($this->charset === 'UTF-8')
return $text;
return mb_convert_encoding($text, 'UTF-8', $this->charset);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment