Skip to content

Instantly share code, notes, and snippets.

@damncabbage
Created August 31, 2012 00:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save damncabbage/3546822 to your computer and use it in GitHub Desktop.
Save damncabbage/3546822 to your computer and use it in GitHub Desktop.
CSV via File Handle
<?php
class CSVStream {
/**
* List of available source fields.
*
* @var array
*/
protected $fields = array();
/**
* Parameters for the fgetcsv() call.
*
* @var array
*/
protected $fgetcsv = array();
/**
* File handle for the CSV file being iterated.
*
* @var resource
*/
protected $csvHandle = NULL;
/**
* The number of rows in the CSV file before the data starts.
*
* @var integer
*/
protected $headerRows = 0;
/**
* Simple initialization.
*
* @param string $path
* The path to the source file
* @param array $csvcolumns
* Keys are integers. values are array(field name, description).
* @param array $options
* Options applied to this source.
* @param array $fields
* Optional - keys are field names, values are descriptions. Use to override
* the default descriptions, or to add additional source fields which the
* migration will add via other means (e.g., prepareRow()).
*/
public function __construct($path, array $csvcolumns = array(), array $options = array(), array $fields = array()) {
$this->file = $path;
$this->csvHandle = fopen($this->file, 'r');
if (!empty($options['header_rows'])) {
$this->headerRows = $options['header_rows'];
}
else {
$this->headerRows = 0;
}
$this->options = $options;
$this->fields = $fields;
// fgetcsv specific options
foreach (array('length' => NULL, 'delimiter' => ',', 'enclosure' => '"', 'escape' => '\\') as $key => $default) {
$this->fgetcsv[$key] = isset($options[$key]) ? $options[$key] : $default;
}
// One can either pass in an explicit list of column names to use, or if we have
// a header row we can use the names from that
if ($this->headerRows && empty($csvcolumns)) {
$this->csvcolumns = array();
// Skip all but the last header
for ($i = 0; $i < $this->headerRows - 1; $i++) {
$this->getNextLine();
}
$row = $this->getNextLine();
foreach ($row as $header) {
$header = trim($header);
$this->csvcolumns[] = array($header, $header);
}
//fclose($this->csvHandle);
//$this->csvHandle = NULL;
}
else {
$this->csvcolumns = $csvcolumns;
}
}
/**
* Return a string representing the source query.
*
* @return string
*/
public function __toString() {
return $this->file;
}
/**
* Returns a list of fields available to be mapped from the source query.
*
* @return array
* Keys: machine names of the fields (to be passed to addFieldMapping)
* Values: Human-friendly descriptions of the fields.
*/
public function fields() {
$fields = array();
foreach ($this->csvcolumns as $values) {
$fields[$values[0]] = $values[1];
}
// Any caller-specified fields with the same names as extracted fields will
// override them; any others will be added
if ($this->fields) {
$fields = $this->fields + $fields;
}
return $fields;
}
/**
* Return a count of all available source records.
*/
public function computeCount() {
// If the data may have embedded newlines, the file line count won't reflect
// the number of CSV records (one record will span multiple lines). We need
// to scan with fgetcsv to get the true count.
if (!empty($this->options['embedded_newlines'])) {
$csvHandle = fopen($this->file, 'r');
// Skip all but the last header
for ($i = 0; $i < $this->headerRows; $i++) {
fgets($csvHandle);
}
$count = 0;
while ($this->getNextLine()) {
$count++;
}
fclose($csvHandle);
$csvHandle = NULL;
}
else {
// TODO. If this takes too much time/memory, use exec('wc -l')
$count = count(file($this->file));
$count -= $this->headerRows;
}
return $count;
}
/**
* Implementation of MigrateSource::performRewind().
*
* @return void
*/
public function performRewind() {
// Close any previously-opened handle
if (!is_null($this->csvHandle)) {
fclose($this->csvHandle);
$this->csvHandle = NULL;
}
// Load up the first row, skipping the header(s) if necessary
$this->csvHandle = fopen($this->file, 'r');
for ($i = 0; $i < $this->headerRows; $i++) {
$this->getNextLine();
}
}
/**
* Implementation of MigrateSource::getNextRow().
* Return the next line of the source CSV file as an object.
*
* @return null|object
*/
public function getNextRow() {
$row = $this->getNextLine();
if ($row) {
// Set meaningful keys for the columns mentioned in $this->csvcolumns().
foreach ($this->csvcolumns as $int => $values) {
list($key, $description) = $values;
// Copy value to more descriptive string based key and then unset original.
$row[$key] = isset($row[$int]) ? $row[$int] : NULL;
unset($row[$int]);
}
return (object)$row;
}
else {
fclose($this->csvHandle);
$this->csvHandle = NULL;
return NULL;
}
}
protected function getNextLine() {
// escape parameter was added in PHP 5.3.
if (version_compare(phpversion(), '5.3', '<')) {
$row = fgetcsv($this->csvHandle, $this->fgetcsv['length'],
$this->fgetcsv['delimiter'], $this->fgetcsv['enclosure']);
}
else {
$row = fgetcsv($this->csvHandle, $this->fgetcsv['length'],
$this->fgetcsv['delimiter'], $this->fgetcsv['enclosure'],
$this->fgetcsv['escape']);
}
return $row;
}
}
URL Response Code News Error Detected Category
http://example/1 404 11/1/11 Not found
http://example/2 404 10/28/11 Not found
http://example/3 404 6/30/12 Not found
http://example/4 404 6/30/12 Not found
Array
(
[URL] => URL
[Response Code] => Response Code
[News Error] => News Error
[Detected] => Detected
[Category] => Category
)
stdClass Object
(
[URL] => http://example/1
[Response Code] => 404
[News Error] =>
[Detected] => 11/1/11
[Category] => Not found
)
stdClass Object
(
[URL] => http://example/2
[Response Code] => 404
[News Error] =>
[Detected] => 10/28/11
[Category] => Not found
)
stdClass Object
(
[URL] => http://example/3
[Response Code] => 404
[News Error] =>
[Detected] => 6/30/12
[Category] => Not found
)
require dirname(__FILE__).'/csv_stream.php';
$c = new CSV('./example.csv', array(), array('header_rows' => 1));
print_r($c->fields());
print_r($c->getNextRow());
print_r($c->getNextRow());
print_r($c->getNextRow());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment