Skip to content

Instantly share code, notes, and snippets.

@heathdutton
Last active December 5, 2017 20:28
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save heathdutton/5f040d8dc6759e838b57d4c4c0eb3d00 to your computer and use it in GitHub Desktop.
Save heathdutton/5f040d8dc6759e838b57d4c4c0eb3d00 to your computer and use it in GitHub Desktop.
Takes a CSV with JSON in it, and makes it more usable as a CSV with columns extracted.
<?php
/**
* Automatically extrapolate JSON keys/values from a CSV into their own columns.
* JSON is flattened, and null values/keys are ignored.
*
* Usage:
*
* php UnpackCSVJSON.php original.csv destination.csv
*/
$file = $argv[1];
$fileOutput = $argv[2] ?? str_ireplace('.csv', '', $file) . '.unpacked.csv';
echo "Unpacking JSON values from: $file\n";
echo "Creating: $fileOutput\n";
// Get the key->value structure of all json.
$keys = [];
$columnCountMax = 0;
if (($handle = fopen($file, "r")) !== false) {
while (($row = fgetcsv($handle, 0, ',', '"', '"')) !== false) {
$columnCount = count($row);
$columnCountMax = max($columnCountMax, $columnCount);
for ($col = 0; $col < $columnCount; $col++) {
$array = json_decode_and_flatten_recursive($row[$col]);
if (is_array($array) && !empty($array)) {
foreach ($array as $key => $value) {
$keys[$key] = true;
}
}
}
}
fclose($handle);
}
echo "Unpacked JSON columns to be added to your CSV: " . count($keys) . "\n";
echo "Total column count will be: " . (count($keys) + $columnCountMax) . "\n";
$keys = array_keys($keys);
sort($keys);
// print_r($keys);
// exit;
// Step through the file again, this time filling the new CSV with contents.
if (($output = fopen($fileOutput, "w")) !== false) {
// Header column
$headers = [];
for ($i = 0; $i < $columnCountMax; $i++) {
$headers[] = '?';
}
foreach ($keys as $key) {
$headers[] = $key;
}
fputcsv($output, $headers);
if (($handle = fopen($file, "r")) !== false) {
while (($row = fgetcsv($handle, 0, ',', '"', '"')) !== false) {
$columnCount = count($row);
$newRow = [];
$newRowJSON = [];
for ($col = 0; $col < $columnCount; $col++) {
$array = json_decode_and_flatten_recursive($row[$col]);
if (is_array($array) && !empty($array)) {
foreach ($keys as $key) {
$newRowJSON[] = $array[$key] ?? null;
}
$newRow[] = 'Contained JSON';
} else {
$newRow[] = $row[$col];
}
}
// Put the JSON fields at the end of the rows (because they could have been in varoius columns previously.)
foreach ($newRowJSON as $newRowJSONCol) {
$newRow[] = $newRowJSONCol;
}
fputcsv($output, $newRow);
}
fclose($handle);
}
fclose($output);
}
echo "done\n";
/**
* Method to flatten a nested JSON string or array.
* Excludes empty values and anything lacking a key.
*
* @param mixed $input
* @return array
*/
function json_decode_and_flatten_recursive($input) {
$return = [];
if (!is_array($input)) {
$input = json_decode($input, true);
}
if (is_array($input)) {
array_walk_recursive($input, function ($a, $b) use (&$return) {
if (!empty($b)){
$arr = json_decode($a, true);
if (is_array($arr)) {
$return += json_decode_and_flatten_recursive($a);
$a = null;
}
}
if (!empty($a) && !empty($b)) {
$return[$b] = $a;
}
});
}
return $return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment