Skip to content

Instantly share code, notes, and snippets.

@nizaroni
Created April 19, 2013 21:48
Show Gist options
  • Save nizaroni/5423468 to your computer and use it in GitHub Desktop.
Save nizaroni/5423468 to your computer and use it in GitHub Desktop.
PHP script that parses a CSV data file and adds certain stats from rows with keys in common. Receives path to CSV file, amount of expected columns on data rows and (optionally) the amount of rows to parse and add. It will ignore empty rows, rows that begin with `#` (comments) and rows that don't have the right amount of columns.
<?php
// Config
//------------------------------------------
define('COUNTRY_KEY', 1);
define('ADD_KEY', 3);
// Functions
//------------------------------------------
function error($message) {
exit(sprintf("\nERROR: %s\n\n", $message));
}
function file_error($filename) {
error(sprintf('File `%s` couldn’t be opened.', $filename));
}
function is_skip_line($line_data) {
if ($line_data === FALSE) {
return FALSE;
}
return
// Empty line
(count($line_data) === 1 AND empty($line_data[0]))
// Commented line
OR substr($line_data[0], 0, 1) === '#'
// Line doesn't have the right number of columns
OR (ARG_COLUMNS AND count($line_data) != ARG_COLUMNS);
}
function fgetsmartcsv($handle) {
$result = fgetcsv($handle);
if (is_skip_line($result)) {
return fgetsmartcsv($handle);
}
return $result;
}
function add_line_to_array($line_data, &$stats_array) {
$country = $line_data[COUNTRY_KEY];
if (empty($country) OR $country === '(not set)') {
$country = 'Unknown';
}
if (!array_key_exists($country, $stats_array)) {
$stats_array[$country] = 0;
}
$stats_array[$country] += $line_data[ADD_KEY];
}
function format_percent($part, $total, $decimal_places = 0) {
return round(100 * $part / $total, $decimal_places) . '%';
}
function format_key($key) {
return str_pad($key, PAD_LENGTH, ' ', STR_PAD_LEFT);
}
// Script start
//------------------------------------------
if(!array_key_exists(1, $argv)) {
error('Send country data file’s name.');
}
define('ARG_FILENAME', $argv[1]);
if (!file_exists(ARG_FILENAME) OR ($file = fopen(ARG_FILENAME, 'r')) === FALSE) {
file_error(ARG_FILENAME);
}
define('ARG_COLUMNS', array_key_exists(2, $argv) ? $argv[2] : FALSE);
define('ARG_LIMIT', array_key_exists(3, $argv) ? $argv[3] : FALSE);
$stats = [];
$count = 1;
while (($data = fgetsmartcsv($file)) !== FALSE) {
add_line_to_array($data, $stats);
// echo "\n". print_r($data, TRUE). "\n";
if ($count == ARG_LIMIT) {
break;
}
$count++;
}
fclose($file);
// echo "\n". print_r($stats, TRUE). "\n";
if (!arsort($stats, SORT_NUMERIC)) {
error('Couldn’t sort results.');
}
$total = 0;
$longest_key = 0;
foreach($stats as $key => $item) {
$length = strlen($key);
if ($length > $longest_key) {
$longest_key = $length;
}
$total += $item;
}
define('PAD_LENGTH', $longest_key);
echo "\n";
foreach($stats as $key => $item) {
echo sprintf("%1\$s: %2\$s %3\$s\n", format_key($key), $item, format_percent($item, $total, 2));
}
echo sprintf("\n%s: %s\n\n", format_key('Total'), $total);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment