Skip to content

Instantly share code, notes, and snippets.

@ethanpil
Created July 9, 2015 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ethanpil/8d430fc8c8d7b82c11ab to your computer and use it in GitHub Desktop.
Save ethanpil/8d430fc8c8d7b82c11ab to your computer and use it in GitHub Desktop.
Useful CSV Parsing Function
//Usage
$myData = csvParse(file_get_contents('csv.file'),true);
function csvParse($in, $hasHeaders=false, &$returnHeadersList=NULL){
$in .= "\n"; //NL required to allow reading when the last line hasn't been terminated.
$data=array(); //All Data here
$dataRow=array(); //Row of date being read
$dataInQuotes=false; //Is reading byte currently in Quotes?
$dataTemp = ''; //Storate of Value
$dataLength = strlen($in); //Optimisation
$dataClearance = false; //Value escaped correctly?
$dataMapping=FALSE; //Used for header reindexing
$dataColIndex=0; //not needed?
for($i=0;$i<$dataLength;$i++) {
$c = $in{$i};
$d = ''; if($i<$dataLength-1) $d = $in{$i+1}; //Read ahead a byte (to check for escaped quotes)
if(!$dataInQuotes) {
switch($c) {
case('"'): //About to start a quoted value?
$dataInQuotes=true;
$dataClearance=false;
break;
case(','): //Comma delimeter found
$dataColIndex++;
$dataRow[]=trim($dataTemp);$dataTemp='';
$dataClearance=false;
break;
case("\n"): //End of the row?
$dataRow[]=trim($dataTemp);$dataTemp='';
if($hasHeaders&&$dataMapping===FALSE) {
$dataMapping=$dataRow;
} else {
$data[]=$dataRow;
}
$dataInQuotes=false;
$dataRow=array();
$dataClearance=true;
break;
default:
$dataTemp.=$c; //Not part of the CSV delimited/markers, just add it to the value string
break;
}
} else { //Currently in a quoted value
if($c=='\\'&&$d=='"') { //Check if this quote has been 'escaped', if so, carry on
$i++;
$dataTemp.=$d;
} else {
if($c=='"') { //Unescaped quotation has been found.
$dataInQuotes=false;
} else {
$dataTemp.=$c; //Add to the value string
}
}
}
}
if(!$dataClearance) { //Capture any left over data
$dataRow[]=$dataTemp='';$dataTemp='';
$data[]=$dataRow;
}
//Squarify the data, NULL any elments in the 2d table.
$max = 0; foreach($data as $rowid=>&$row) { if(count($row)>$max) { $max = count($row); } } unset($row);
if($hasHeaders) {
if(count( $dataMapping)>$max) $max = count( $dataMapping);
foreach(range(0,$max-1) as $k) {
if(!isset($dataMapping[$k])) $dataMapping[$k] = $k;
}
$returnHeadersList = $dataMapping;
}
foreach($data as $rowid=>&$row) {
foreach(range(0,$max-1) as $k) {
if($hasHeaders) {
if(!isset($row[$k])) $row[$k] = NULL; //Missing cell, NULL
$remapIndex=$dataMapping[$k];
$row[$remapIndex] = $row[$k]; //Remapping, set new index, unset the old one.
if($k==0) { //Special case
$row[$remapIndex] = $row[$k]; //Remapping, set new index, unset the old one.
unset($row[0]);
} else {
if($remapIndex!=$k) { //Has the index changed?
$row[$remapIndex] = $row[$k]; //Remapping, set new index, unset the old one.
unset($row[$k]); //Removed the old index
} else {
$temp = $row[$k]; //Such a hack, but keeps the order of the re-indexed row
unset($row[$k]);
$row[$k]=$temp;
}
}
} else {
if(!isset($row[$k])) $row[$k] = NULL; //Missing cell, NULL
}
}
} unset($row);
return $data;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment