Created
February 19, 2018 10:54
-
-
Save juanparati/97db4c7c0f93689337f542dff1aea31a to your computer and use it in GitHub Desktop.
CSV reader that support field maps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Class Model_CSVReader | |
* | |
* Parse and read CSV files as a stream keeping a low memory footprint | |
*/ | |
class Model_CSVReader | |
{ | |
/** | |
* @var null|resource CSV file pointer | |
*/ | |
protected $fp = null; | |
/** | |
* @var string CSV charset encoding | |
* @link http://php.net/manual/en/function.mb-convert-encoding.php | |
*/ | |
protected $charset; | |
/** | |
* Default decimal separator | |
* | |
* @var string | |
*/ | |
protected $decimal_sep; | |
/** | |
* @var string CSV column delimiter | |
* @link http://php.net/manual/en/function.fgetcsv.php | |
*/ | |
protected $delimiter; | |
/** | |
* @var string | |
* @link http://php.net/manual/en/function.fgetcsv.php | |
*/ | |
protected $closure_char; | |
/** | |
* Column -> Field name map | |
* @var array | |
*/ | |
protected $fieldmap = []; | |
/** | |
* Field properties (Only when field map is used) | |
* | |
* @var array | |
*/ | |
protected $field_props = []; | |
/** | |
* Model_CSVReader constructor. | |
* | |
* @param string $file | |
* @param string $delimiter | |
* @param string $closure_char | |
* @param string $charset | |
* @param string $currency | |
*/ | |
public function __construct($file, $delimiter = ';', $closure_char = '"', $charset = 'UTF-8', $decimal_sep = ',') | |
{ | |
ini_set('auto_detect_line_endings', true); | |
$this->fp = fopen($file, "r"); | |
if (!$this->fp) | |
{ | |
Log::instance()->add(Log::ERROR, 'Unable to read CSV file: ' . $file); | |
throw new Exception('Unable to read CSV file: ' . $file); | |
} | |
$this->delimiter = $delimiter; | |
$this->closure_char = empty($closure_char) ? '"' : $closure_char; | |
$this->charset = $charset; | |
$this->decimal_sep = $decimal_sep; | |
} | |
/** | |
* Set the field mapping (Used with CSV that have header columns) | |
* | |
* @param $fields | |
* @param int $header_row | |
* @return bool | |
*/ | |
public function set_mapfield($fields, $header_row = 0) | |
{ | |
// Reset fieldmap and properties | |
$this->fieldmap = []; | |
$this->field_props = []; | |
// Reset pointer position | |
if ($header_row !== false) | |
$this->seekLine($header_row); | |
$columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char); | |
// Ignore empty header line | |
if (empty($columns)) | |
return false; | |
// Ignore lines with less than 2 columns | |
if (count($columns) < 2) | |
return false; | |
// Encode columns | |
$columns = array_map([$this, 'encode'], $columns); | |
// Map fields | |
foreach ($fields as $k => $field) | |
{ | |
if ($field === false || !isset($field['column'])) | |
continue; | |
if (is_int($field['column'])) | |
$this->fieldmap[$k] = $field['column']; | |
else | |
$this->fieldmap[$k] = array_search($field['column'], $columns); | |
} | |
$this->field_props = $fields; | |
return true; | |
} | |
/** | |
* Read entire data from the CSV and return it structured according to the map fields. | |
* It is not recommended to use this function for big CSV files. | |
* | |
* @param int $header_row | |
* @return array | |
*/ | |
public function read($header_row = 1) | |
{ | |
$this->seekLine($header_row); | |
$records = []; | |
// Read CSV | |
while(($row = $this->readLine())) | |
$records[] = $row; | |
return $records; | |
} | |
/** | |
* Read the CSV file line by line | |
* | |
* @return array|bool | |
*/ | |
public function readLine() | |
{ | |
$columns = fgetcsv($this->fp, 0, $this->delimiter, $this->closure_char); | |
if (!$columns) | |
return false; | |
// Detect empty lines | |
if (count($columns) === 1) | |
return true; | |
$frow = []; | |
if (empty($this->fieldmap)) | |
$frow[] = $columns; | |
else | |
{ | |
foreach ($this->fieldmap as $k => $columnmap) | |
{ | |
if (isset($columns[$columnmap])) | |
{ | |
$value = $columns[$columnmap]; | |
// Remove characters | |
if (!empty($this->field_props[$k]['remove'])) | |
$value = str_replace($this->field_props[$k]['remove'], '', $value); | |
// Replace characters | |
if (!empty($this->field_props[$k]['replace'])) | |
{ | |
foreach ($this->field_props[$k]['replace'] as $search_str => $replace_str) | |
$value = str_replace($search_str, $replace_str, $value); | |
} | |
// Extract word segments | |
if (isset($this->field_props[$k]['segment']) && is_int($this->field_props[$k]['segment'])) | |
{ | |
$segments = explode(' ', $value); | |
$value = empty($segments[$this->field_props[$k]['segment']]) ? '' : $segments[$this->field_props[$k]['segment']]; | |
} | |
// Cast | |
if (!empty($this->field_props[$k]['cast'])) | |
{ | |
switch ($this->field_props[$k]['cast']) | |
{ | |
case 'int': | |
case 'integer': | |
$value = (int)$value; | |
break; | |
case 'float': | |
$value = (float)$value; | |
break; | |
case 'string': | |
$value = (string)$value; | |
break; | |
} | |
} | |
// Apply exclusion list | |
if (!empty($this->field_props[$k]['exclude'])) | |
{ | |
if (Arr::expression_found($this->field_props[$k]['exclude'], $value)) | |
$frow['exclude'] = true; | |
} | |
// Convert decimal values | |
$currency = Sanitize::check_currency($value, $this->decimal_sep); | |
// Save value or string | |
$frow[$k] = $currency === false ? $this->encode($value) : $currency; | |
} | |
} | |
// Set static values | |
foreach ($this->field_props as $k => $props) | |
{ | |
if (is_array($props) && array_key_exists('static_value', $props)) | |
$frow[$k] = $props['static_value']; | |
} | |
} | |
return $frow; | |
} | |
/** | |
* Seek the file pointer to an specific line | |
* | |
* @param $line | |
* @return bool | |
*/ | |
public function seekLine($line) | |
{ | |
// Reset file pointer position | |
rewind($this->fp); | |
$current = 0; | |
do | |
{ | |
if ($line === $current) | |
return true; | |
$current++; | |
} while (fgets($this->fp) !== false); | |
return false; | |
} | |
/** | |
* Encode a text to UTF-8 | |
* | |
* @param $text | |
* @return string | |
*/ | |
protected function encode($text) | |
{ | |
if ($this->charset === 'UTF-8') | |
return $text; | |
return mb_convert_encoding($text, 'UTF-8', $this->charset); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment