Created
July 9, 2015 13:29
-
-
Save ethanpil/8d430fc8c8d7b82c11ab to your computer and use it in GitHub Desktop.
Useful CSV Parsing Function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Usage | |
$myData = csvParse(file_get_contents('csv.file'),true); | |
function csvParse($in, $hasHeaders=false, &$returnHeadersList=NULL){ | |
$in .= "\n"; //NL required to allow reading when the last line hasn't been terminated. | |
$data=array(); //All Data here | |
$dataRow=array(); //Row of date being read | |
$dataInQuotes=false; //Is reading byte currently in Quotes? | |
$dataTemp = ''; //Storate of Value | |
$dataLength = strlen($in); //Optimisation | |
$dataClearance = false; //Value escaped correctly? | |
$dataMapping=FALSE; //Used for header reindexing | |
$dataColIndex=0; //not needed? | |
for($i=0;$i<$dataLength;$i++) { | |
$c = $in{$i}; | |
$d = ''; if($i<$dataLength-1) $d = $in{$i+1}; //Read ahead a byte (to check for escaped quotes) | |
if(!$dataInQuotes) { | |
switch($c) { | |
case('"'): //About to start a quoted value? | |
$dataInQuotes=true; | |
$dataClearance=false; | |
break; | |
case(','): //Comma delimeter found | |
$dataColIndex++; | |
$dataRow[]=trim($dataTemp);$dataTemp=''; | |
$dataClearance=false; | |
break; | |
case("\n"): //End of the row? | |
$dataRow[]=trim($dataTemp);$dataTemp=''; | |
if($hasHeaders&&$dataMapping===FALSE) { | |
$dataMapping=$dataRow; | |
} else { | |
$data[]=$dataRow; | |
} | |
$dataInQuotes=false; | |
$dataRow=array(); | |
$dataClearance=true; | |
break; | |
default: | |
$dataTemp.=$c; //Not part of the CSV delimited/markers, just add it to the value string | |
break; | |
} | |
} else { //Currently in a quoted value | |
if($c=='\\'&&$d=='"') { //Check if this quote has been 'escaped', if so, carry on | |
$i++; | |
$dataTemp.=$d; | |
} else { | |
if($c=='"') { //Unescaped quotation has been found. | |
$dataInQuotes=false; | |
} else { | |
$dataTemp.=$c; //Add to the value string | |
} | |
} | |
} | |
} | |
if(!$dataClearance) { //Capture any left over data | |
$dataRow[]=$dataTemp='';$dataTemp=''; | |
$data[]=$dataRow; | |
} | |
//Squarify the data, NULL any elments in the 2d table. | |
$max = 0; foreach($data as $rowid=>&$row) { if(count($row)>$max) { $max = count($row); } } unset($row); | |
if($hasHeaders) { | |
if(count( $dataMapping)>$max) $max = count( $dataMapping); | |
foreach(range(0,$max-1) as $k) { | |
if(!isset($dataMapping[$k])) $dataMapping[$k] = $k; | |
} | |
$returnHeadersList = $dataMapping; | |
} | |
foreach($data as $rowid=>&$row) { | |
foreach(range(0,$max-1) as $k) { | |
if($hasHeaders) { | |
if(!isset($row[$k])) $row[$k] = NULL; //Missing cell, NULL | |
$remapIndex=$dataMapping[$k]; | |
$row[$remapIndex] = $row[$k]; //Remapping, set new index, unset the old one. | |
if($k==0) { //Special case | |
$row[$remapIndex] = $row[$k]; //Remapping, set new index, unset the old one. | |
unset($row[0]); | |
} else { | |
if($remapIndex!=$k) { //Has the index changed? | |
$row[$remapIndex] = $row[$k]; //Remapping, set new index, unset the old one. | |
unset($row[$k]); //Removed the old index | |
} else { | |
$temp = $row[$k]; //Such a hack, but keeps the order of the re-indexed row | |
unset($row[$k]); | |
$row[$k]=$temp; | |
} | |
} | |
} else { | |
if(!isset($row[$k])) $row[$k] = NULL; //Missing cell, NULL | |
} | |
} | |
} unset($row); | |
return $data; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment