Skip to content

Instantly share code, notes, and snippets.

@DavidBruchmann
Created October 14, 2021 19:50
Show Gist options
  • Save DavidBruchmann/1215dc4fb9b7bd339253de5b6e304909 to your computer and use it in GitHub Desktop.
Save DavidBruchmann/1215dc4fb9b7bd339253de5b6e304909 to your computer and use it in GitHub Desktop.
CsvUtility to detect delimiters and other properties of CSV files
<?php
declare(strict_types = 1);
namespace WDB\PHP\Csv2content\Utility;
defined('TYPO3_MODE') || die();
class CsvUtility
{
public static function detectParams($csvFilePath)
{
$delimiter = self::detectDelimiter($csvFilePath);
$params = [
'delimiter' => $delimiter,
'enclosure' => self::detectEnclosure($csvFilePath, $delimiter),
'escape' => self::detectEnclosure($csvFilePath, $delimiter),
'encoding' => self::detectEncoding($csvFile, $delimiter),
];
return $params;
}
public static function detectEnclosure($csvFile, $delimiter)
{
// TODO
return '"';
}
public static function detectEscape($csvFile, $delimiter)
{
// TODO
return '\\';
}
public static function detectEncoding($csvFile, $delimiter)
{
// TODO
return 'UTF-8';
}
public static function detectDelimiter($csvFile, $linesToCheck = 100) : ?string
{
$handle = fopen($csvFile, "r");
$delimiters = [";" => 0, "," => 0, "\t" => 0, "|" => 0, null => 0];
$n = 0;
while (($line = fgets($handle)) && $n < $linesToCheck)
{
# debug ($line);
if (strlen($line)) {
$delimiters[self::detectDelimiterInLine($line)]++;
$n++;
}
}
fclose($handle);
$hasResult = true;
if (max($delimiters) === 0) {
$hasResult = false;
} else {
$countMax = 0;
$max = array_search(max($delimiters), $delimiters);
foreach ($delimiters as $delimiter => $count) {
if ($count == $max) {
$countMax++;
}
}
// TODO:
if ($countMax > 1 || $max === null) {
#$hasResult = false;
}
# debug (['$delimiters' => $delimiters, '$countMax' => $countMax, '$max' => $max, '$hasResult' => $hasResult]);
}
# debug(['$delimiters'=>$delimiters, '$hasResult' => $hasResult, '$max' => $max], __METHOD__.':'.__LINE__);
return $hasResult ? $max : null;
}
/**
* based on https://stackoverflow.com/a/59581170/1019850
* @param string $csvFile Path to the CSV file
* @return string Delimiter
*/
public static function detectDelimiterInLine($line) : ?string
{
$delimiters = [";" => 0, "," => 0, "\t" => 0, "|" => 0];
foreach ($delimiters as $delimiter => &$count) {
$count = count(str_getcsv($line, $delimiter));
}
$hasResult = true;
if (max($delimiters) === 0) {
$hasResult = false;
} else {
$countMax = 0;
$max = array_search(max($delimiters), $delimiters);
foreach ($delimiters as $delimiter => $count) {
if ($count == $max) {
$countMax++;
}
}
if ($countMax > 1) {
$hasResult = false;
}
}
// DebuggerUtility::var_dump(['$delimiters' => $delimiters, '$hasResult' => $hasResult, '$max' => $max], __METHOD__.':'.__LINE__);
return $hasResult ? $max : null;
}
}
@doelmi
Copy link

doelmi commented Jun 8, 2022

Line 18 'encoding' => self::detectEncoding($csvFile, $delimiter),
should be
'encoding' => self::detectEncoding($csvFilePath, $delimiter),

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment