Skip to content

Instantly share code, notes, and snippets.

@jkaflik
Created September 18, 2013 09:24
Show Gist options
  • Save jkaflik/6606707 to your computer and use it in GitHub Desktop.
Save jkaflik/6606707 to your computer and use it in GitHub Desktop.
CSV files splitter <kofels@gmail.com> usage: csv_splitter file file1 file2 options: -l=NUMBER_OF_ROWS specify num of rows per part, defaults 1000 -h print help -o=DIR output directory, by default the same as the original file -f files has headers (first row goes to each part of file)
#!/usr/bin/env php
<?php
/**
* CSV files splitter
* @author kofels@gmail.com
*/
// helpers functions
$stderr = function ($message) {
file_put_contents('php://stderr', $message, FILE_APPEND);
};
$join_path = function () {
$paths = array();
foreach (func_get_args() as $arg) {
if ($arg !== '') { $paths[] = $arg; }
}
return preg_replace('#/+#','/',join('/', $paths));
};
// main script code
$options = array();
$optionsArgs = array();
$files = array();
array_shift($argv);
foreach ($argv as $arg)
{
if ($arg[0] == '-') // check an argument is option, then push to list
{
$bfr = explode('=', substr($arg, 1));
$options[] = $bfr[0];
if (count($bfr) > 1)
$optionsArgs[$bfr[0]] = $bfr[1];
}
else // if not, it's a file point
{
if (!file_exists($arg) && !is_file($arg)) // errro if not exists or not a file
{
$stderr('File not found: ' . $arg . PHP_EOL);
return 1;
}
$files[] = $arg;
}
}
if (in_array('h', $options) || empty($options))
{
echo 'CSV files splitter <kofels@gmail.com>' . PHP_EOL;
echo 'usage: csv_splitter file file1 file2' . PHP_EOL;
echo 'options:' . PHP_EOL;
echo '-l=NUMBER_OF_ROWS specify num of rows per part, defaults 1000' . PHP_EOL;
echo '-h print help' . PHP_EOL;
echo '-o=DIR output directory, by default the same as the original file' . PHP_EOL;
echo '-f files has headers (first row goes to each part of file)' . PHP_EOL;
exit;
}
// if (in_array('o', $options) && (!file_exists($optionsArgs['o']) || !is_dir($optionsArgs['o'])))
// {
// $stderr('Output directory `' . $optionsArgs['o'] . '` not found!' . PHP_EOL);
// return 1;
// }
foreach ($files as $file)
{
$h = @fopen($file, 'r');
$o = null;
$n = 0; // output file N postfix
if (!$h)
{
$stderr('Cannot open ' . $file . ' file' . PHP_EOL);
return 1;
}
$fileName = basename($file);
$outputDirectory = (in_array('o', $options)) ? $optionsArgs['o'] : dirname($file);
$limit = (in_array('l', $options)) ? (int) $optionsArgs['l'] : 1000;
$header = null;
if (in_array('f', $options))
{
$header = fgetcsv($h);
}
for ($i = 0; $row = fgetcsv($h); $i++)
{
if (!$o || $i >= $limit)
{
$n++;
$outputPath = $join_path($outputDirectory, $fileName . '.' . $n);
$o = @fopen($outputPath, 'w');
if (!$o)
{
$stderr('Cannot open `' . $outputPath . '` file for write' . PHP_EOL);
return 1;
}
$i = 0;
if ($header)
{
fputcsv($o, $header);
$i++;
}
}
fputcsv($o, $row);
}
echo $file . ' splitted to ' . $n . ' part(s).' . PHP_EOL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment