Last active
December 5, 2017 20:28
-
-
Save heathdutton/5f040d8dc6759e838b57d4c4c0eb3d00 to your computer and use it in GitHub Desktop.
Takes a CSV with JSON in it, and makes it more usable as a CSV with columns extracted.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Automatically extrapolate JSON keys/values from a CSV into their own columns. | |
* JSON is flattened, and null values/keys are ignored. | |
* | |
* Usage: | |
* | |
* php UnpackCSVJSON.php original.csv destination.csv | |
*/ | |
$file = $argv[1]; | |
$fileOutput = $argv[2] ?? str_ireplace('.csv', '', $file) . '.unpacked.csv'; | |
echo "Unpacking JSON values from: $file\n"; | |
echo "Creating: $fileOutput\n"; | |
// Get the key->value structure of all json. | |
$keys = []; | |
$columnCountMax = 0; | |
if (($handle = fopen($file, "r")) !== false) { | |
while (($row = fgetcsv($handle, 0, ',', '"', '"')) !== false) { | |
$columnCount = count($row); | |
$columnCountMax = max($columnCountMax, $columnCount); | |
for ($col = 0; $col < $columnCount; $col++) { | |
$array = json_decode_and_flatten_recursive($row[$col]); | |
if (is_array($array) && !empty($array)) { | |
foreach ($array as $key => $value) { | |
$keys[$key] = true; | |
} | |
} | |
} | |
} | |
fclose($handle); | |
} | |
echo "Unpacked JSON columns to be added to your CSV: " . count($keys) . "\n"; | |
echo "Total column count will be: " . (count($keys) + $columnCountMax) . "\n"; | |
$keys = array_keys($keys); | |
sort($keys); | |
// print_r($keys); | |
// exit; | |
// Step through the file again, this time filling the new CSV with contents. | |
if (($output = fopen($fileOutput, "w")) !== false) { | |
// Header column | |
$headers = []; | |
for ($i = 0; $i < $columnCountMax; $i++) { | |
$headers[] = '?'; | |
} | |
foreach ($keys as $key) { | |
$headers[] = $key; | |
} | |
fputcsv($output, $headers); | |
if (($handle = fopen($file, "r")) !== false) { | |
while (($row = fgetcsv($handle, 0, ',', '"', '"')) !== false) { | |
$columnCount = count($row); | |
$newRow = []; | |
$newRowJSON = []; | |
for ($col = 0; $col < $columnCount; $col++) { | |
$array = json_decode_and_flatten_recursive($row[$col]); | |
if (is_array($array) && !empty($array)) { | |
foreach ($keys as $key) { | |
$newRowJSON[] = $array[$key] ?? null; | |
} | |
$newRow[] = 'Contained JSON'; | |
} else { | |
$newRow[] = $row[$col]; | |
} | |
} | |
// Put the JSON fields at the end of the rows (because they could have been in varoius columns previously.) | |
foreach ($newRowJSON as $newRowJSONCol) { | |
$newRow[] = $newRowJSONCol; | |
} | |
fputcsv($output, $newRow); | |
} | |
fclose($handle); | |
} | |
fclose($output); | |
} | |
echo "done\n"; | |
/** | |
* Method to flatten a nested JSON string or array. | |
* Excludes empty values and anything lacking a key. | |
* | |
* @param mixed $input | |
* @return array | |
*/ | |
function json_decode_and_flatten_recursive($input) { | |
$return = []; | |
if (!is_array($input)) { | |
$input = json_decode($input, true); | |
} | |
if (is_array($input)) { | |
array_walk_recursive($input, function ($a, $b) use (&$return) { | |
if (!empty($b)){ | |
$arr = json_decode($a, true); | |
if (is_array($arr)) { | |
$return += json_decode_and_flatten_recursive($a); | |
$a = null; | |
} | |
} | |
if (!empty($a) && !empty($b)) { | |
$return[$b] = $a; | |
} | |
}); | |
} | |
return $return; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment