Created
April 19, 2013 21:48
-
-
Save nizaroni/5423468 to your computer and use it in GitHub Desktop.
PHP script that parses a CSV data file and adds certain stats from rows with keys in common. Receives path to CSV file, amount of expected columns on data rows and (optionally) the amount of rows to parse and add. It will ignore empty rows, rows that begin with `#` (comments) and rows that don't have the right amount of columns.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Config | |
//------------------------------------------ | |
define('COUNTRY_KEY', 1); | |
define('ADD_KEY', 3); | |
// Functions | |
//------------------------------------------ | |
function error($message) { | |
exit(sprintf("\nERROR: %s\n\n", $message)); | |
} | |
function file_error($filename) { | |
error(sprintf('File `%s` couldn’t be opened.', $filename)); | |
} | |
function is_skip_line($line_data) { | |
if ($line_data === FALSE) { | |
return FALSE; | |
} | |
return | |
// Empty line | |
(count($line_data) === 1 AND empty($line_data[0])) | |
// Commented line | |
OR substr($line_data[0], 0, 1) === '#' | |
// Line doesn't have the right number of columns | |
OR (ARG_COLUMNS AND count($line_data) != ARG_COLUMNS); | |
} | |
function fgetsmartcsv($handle) { | |
$result = fgetcsv($handle); | |
if (is_skip_line($result)) { | |
return fgetsmartcsv($handle); | |
} | |
return $result; | |
} | |
function add_line_to_array($line_data, &$stats_array) { | |
$country = $line_data[COUNTRY_KEY]; | |
if (empty($country) OR $country === '(not set)') { | |
$country = 'Unknown'; | |
} | |
if (!array_key_exists($country, $stats_array)) { | |
$stats_array[$country] = 0; | |
} | |
$stats_array[$country] += $line_data[ADD_KEY]; | |
} | |
function format_percent($part, $total, $decimal_places = 0) { | |
return round(100 * $part / $total, $decimal_places) . '%'; | |
} | |
function format_key($key) { | |
return str_pad($key, PAD_LENGTH, ' ', STR_PAD_LEFT); | |
} | |
// Script start | |
//------------------------------------------ | |
if(!array_key_exists(1, $argv)) { | |
error('Send country data file’s name.'); | |
} | |
define('ARG_FILENAME', $argv[1]); | |
if (!file_exists(ARG_FILENAME) OR ($file = fopen(ARG_FILENAME, 'r')) === FALSE) { | |
file_error(ARG_FILENAME); | |
} | |
define('ARG_COLUMNS', array_key_exists(2, $argv) ? $argv[2] : FALSE); | |
define('ARG_LIMIT', array_key_exists(3, $argv) ? $argv[3] : FALSE); | |
$stats = []; | |
$count = 1; | |
while (($data = fgetsmartcsv($file)) !== FALSE) { | |
add_line_to_array($data, $stats); | |
// echo "\n". print_r($data, TRUE). "\n"; | |
if ($count == ARG_LIMIT) { | |
break; | |
} | |
$count++; | |
} | |
fclose($file); | |
// echo "\n". print_r($stats, TRUE). "\n"; | |
if (!arsort($stats, SORT_NUMERIC)) { | |
error('Couldn’t sort results.'); | |
} | |
$total = 0; | |
$longest_key = 0; | |
foreach($stats as $key => $item) { | |
$length = strlen($key); | |
if ($length > $longest_key) { | |
$longest_key = $length; | |
} | |
$total += $item; | |
} | |
define('PAD_LENGTH', $longest_key); | |
echo "\n"; | |
foreach($stats as $key => $item) { | |
echo sprintf("%1\$s: %2\$s %3\$s\n", format_key($key), $item, format_percent($item, $total, 2)); | |
} | |
echo sprintf("\n%s: %s\n\n", format_key('Total'), $total); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment