Skip to content

Instantly share code, notes, and snippets.

@divinity76
Created March 2, 2024 08:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save divinity76/3cab164c0c7a7be51fff4d77e1534bc0 to your computer and use it in GitHub Desktop.
Save divinity76/3cab164c0c7a7be51fff4d77e1534bc0 to your computer and use it in GitHub Desktop.
csv merger script
<?php
declare(strict_types=1);
function fix_encoding(string $str): string
{
//return $str;
return mb_convert_encoding($str, 'UTF-8', 'ISO-8859-1');
}
function parse_csv(string $csv, string $separator = ","): array
{
$csv = strtr(
$csv,
[
"\xEF\xBB\xBF" => "", // remove UTF-8 byte order masks, if present
"\r\n" => "\n", // Windows CrLf=> Unix Lf
"\r" => "\n" // old-MacOS Cr => Unix Lf
// (both modern MacOS and Linux use Lf .. Windows is the only outlier)
]
);
$lines = explode("\n", $csv);
$keys = str_getcsv(array_shift($lines), $separator);
$ret = array();
foreach ($lines as $lineno => $line) {
if (strlen($line) < 1) {
// ... probably malformed csv, but we'll allow it
continue;
}
$parsed = str_getcsv($line, $separator);
if (count($parsed) !== count($keys)) {
throw new \RuntimeException("error on csv line #{$lineno}: count mismatch:" . count($parsed) . ' !== ' . count($keys) . ": " . var_export([
'error' => 'count mismatch',
'keys' => $keys,
'parsed' => $parsed,
'line' => $line
], true));
}
$ret[] = array_combine($keys, $parsed);
}
return $ret;
}
error_reporting(E_ALL);
ini_set('display_errors', '1');
set_error_handler(function ($errno, $errstr, $errfile, $errline) {
if (error_reporting() & $errno) {
throw new \ErrorException($errstr, 0, $errno, $errfile, $errline);
}
});
if ($argc !== 3) {
echo "Usage: php {$argv[0]} <file1.csv> <file2.csv>\n";
exit(1);
}
$file1 = $argv[1];
$file2 = $argv[2];
$raw1 = file_get_contents($file1);
$raw2 = file_get_contents($file2);
$raw1 = fix_encoding($raw1);
$raw2 = fix_encoding($raw2);
$csv1 = parse_csv($raw1);
$csv2 = parse_csv($raw2);
$merged = [];
// Assuming both CSVs have the same headers and the merge is based on these headers.
$headers = array_keys($csv1[0] + $csv2[0]); // Combine and get unique headers from both CSVs
foreach ($csv1 as $index => $row) {
foreach ($headers as $header) {
// If the current header exists in both csv1 and csv2 rows
if (isset($csv2[$index][$header], $row[$header])) {
// If csv1's value is empty and csv2's value is not, take csv2's value
if (empty($row[$header]) && !empty($csv2[$index][$header])) {
$merged[$index][$header] = $csv2[$index][$header];
} else {
// Otherwise, use csv1's value (even if it's empty, since both are equal priority in this case)
$merged[$index][$header] = $row[$header];
}
} elseif (isset($row[$header])) {
// If the header exists only in csv1, take csv1's value
$merged[$index][$header] = $row[$header];
} elseif (isset($csv2[$index][$header])) {
// If the header exists only in csv2, take csv2's value
$merged[$index][$header] = $csv2[$index][$header];
}
}
}
// Handle the case where csv2 is longer than csv1
if (count($csv2) > count($csv1)) {
for ($i = count($csv1); $i < count($csv2); $i++) {
foreach ($headers as $header) {
if (isset($csv2[$i][$header])) {
$merged[$i][$header] = $csv2[$i][$header];
}
}
}
}
function array_to_csv(array $data, string $delimiter = ",", string $enclosure = '"', string $escapeChar = "\\"): string {
$fh = fopen('php://temp', 'rw'); // Write to a temporary file
foreach ($data as $row) {
fputcsv($fh, $row, $delimiter, $enclosure, $escapeChar);
}
rewind($fh);
$csvString = stream_get_contents($fh);
fclose($fh);
return $csvString;
}
$mergedCsvString = array_to_csv($merged);
// For demonstration, let's just print the merged CSV string
echo mb_convert_encoding($mergedCsvString, 'ISO-8859-1', 'UTF-8');
//echo $mergedCsvString;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment