Skip to content

Instantly share code, notes, and snippets.

@johann8384
Created December 11, 2012 08:52
Show Gist options
  • Save johann8384/4257122 to your computer and use it in GitHub Desktop.
Save johann8384/4257122 to your computer and use it in GitHub Desktop.
PHP Stats Library
/**
*
* Extend Stats class to include Histogram support
* borrows heavily from Jesus M Castagnetto's code published here: http://px.sklar.com/code.html?id=119
* @author jcreasy
*/
/*
* // Original Header
* This is a histogram class that accepts and unidimensional array of data
* Returns 2 arrays by using the getStats() and getBins() methods.
* Note: Tested only w/ PHP 3.0.7
* (c) Jesus M. Castagnetto, 1999.
* Gnu GPL'd code, see www.fsf.org for the details.
*/
class Histogram extends Stat
{
private $bins = array();
private $title = '';
public function title()
{
return $this->runmode . '_' . $this->class . '_' . $this->metric;
}
private function print_stats()
{
$s = "Statistics for histogram: " . $this->title() . "\n";
$s .= parent::to_string();
return $s;
}
private function print_bins()
{
$s = sprintf("Number of bins: %s\n", count($this->bins));
$s .= sprintf("BIN\tVAL\t\tFREQ\n");
$maxbin = max($this->bins);
reset($this->bins);
for ($i = 0; $i < count($this->bins); $i++) {
list($key, $val) = each($this->bins);
$s .= sprintf("%d\t%-8.2f\t%-8d |%s\n", $i + 1, $key, $val, $this->print_bar($val, $maxbin));
}
return $s;
}
private function number_of_bins()
{
$count = count(array_unique($this->data));
//http://www.amazon.com/Jurans-Quality-Control-Handbook-Juran/dp/0070331766
if ($count < 1) {
return 0;
}
if ($count < 20) {
return 5;
}
if ($count <= 50) {
return 6;
}
if ($count <= 100) {
return 7;
}
if ($count <= 200) {
return 8;
}
if ($count <= 500) {
return 9;
}
if ($count <= 1000) {
return 10;
}
if ($count <= 5000) {
$n = ($count / 100) + 1;
}
return 52;
}
private function validate()
{
if ($this->count() <= 1) {
throw new Exception("Not enough data, " . $this->count() . " values");
}
if ($this->number_of_bins() < 1) {
throw new Exception("Insufficient number of bins.");
}
return;
}
private function print_bar($val, $maxbin)
{
$fact = (float)($maxbin > 40) ? 40 / $maxbin : 1;
$niter = (int)$val * $fact;
$out = "";
for ($i = 0; $i < $niter; $i++) {
$out .= "*";
}
return $out;
}
public function __construct($runmode, $class, $metric, $interval = '1', $values = NULL)
{
parent::__construct($runmode, $class, $metric, $interval, $values);
if (empty($this->data)) {
return;
}
$this->validate();
$this->histogram();
}
public function histogram($number_of_bins = NULL, $first_bin = NULL, $bin_width = NULL)
{
$bin = array();
/* init bins array */
if (empty($number_of_bins)) {
$number_of_bins = $this->number_of_bins();
}
/* width of bins */
if (empty($bin_width)) {
$bin_width = $this->delta($number_of_bins);
}
if (empty($first_bin)) {
$first_bin = $this->min();
}
for ($i = 0; $i < $number_of_bins; $i++) {
$bin[$i] = (float)$first_bin + $bin_width * $i;
$this->bins[(string)$bin[$i]] = 0;
}
/* calculate frequencies and populate bins array */
$data = $this->data;
$tmp = ($number_of_bins - 1);
for ($i = 0; $i < $this->count(); $i++) {
for ($j = $tmp; $j >= 0; $j--) {
if ($data[$i] >= $bin[$j]) {
$this->bins[(string)$bin[$j]]++;
break;
}
}
}
}
public function delta($number_of_bins = NULL)
{
if (empty($number_of_bins)) {
$number_of_bins = $this->number_of_bins();
}
return (float)($this->max() - $this->min()) / $number_of_bins;
}
/* send back BINS array */
public function get_bins()
{
return $this->bins;
}
public function __toString()
{
return $this->to_string();
}
public function to_string()
{
$s = sprintf("%s\n%s\n", $this->print_stats(), $this->print_bins());
return $s;
}
public function to_array($include_points = false)
{
$ret = parent::to_array($include_points);
$ret['histogram'] = $this->get_bins();
return $ret;
}
}
/**
* License for Holt-Winters Class:
*
* @url https://github.com/ianbarber/PHPIR
* Copyright (c) 2011, Ian Barber
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/
class phpir
{
/**
* Smooth supplied timeline data 3 ways - overall, by trend and by season.
*
* @param array $data - 1d array of data
* @param int $season_length - the number of entries that represent a 'season'
* @param float $alpha - data smoothing factor
* @param float $beta - trend smoothing factor
* @param float $gamma - seasonality smoothing factor
* @param float $dev_gamma - smoothing factor for deviations
* @return array - the smoothed data
*/
public static function holt_winters($data, $season_length = 7, $alpha = 0.2, $beta = 0.01, $gamma = 0.01, $dev_gamma = 0.1)
{
// Calculate an initial trend level
$trend1 = 0;
for ($i = 0; $i < $season_length; $i++) {
$trend1 += $data[$i];
}
$trend1 /= $season_length;
$trend2 = 0;
for ($i = $season_length; $i < 2 * $season_length; $i++) {
$trend2 += $data[$i];
}
$trend2 /= $season_length;
$initial_trend = ($trend2 - $trend1) / $season_length;
// Take the first value as the initial level
$initial_level = $data[0];
// Build index
$index = array();
foreach ($data as $key => $val) {
$index[$key] = $val / ($initial_level + ($key + 1) * $initial_trend);
}
// Build season buffer
$season = array_fill(0, count($data), 0);
for ($i = 0; $i < $season_length; $i++) {
$season[$i] = ($index[$i] + $index[$i + $season_length]) / 2;
}
// Normalise season
$season_factor = $season_length / array_sum($season);
foreach ($season as $key => $val) {
$season[$key] *= $season_factor;
}
$holt_winters = array();
$deviations = array();
$alpha_level = $initial_level;
$beta_trend = $initial_trend;
foreach ($data as $key => $value) {
$temp_level = $alpha_level;
$temp_trend = $beta_trend;
$alpha_level = $alpha * $value / $season[$key] + (1.0 - $alpha) * ($temp_level + $temp_trend);
$beta_trend = $beta * ($alpha_level - $temp_level) + (1.0 - $beta) * $temp_trend;
$season[$key + $season_length] = $gamma * $value / $alpha_level + (1.0 - $gamma) * $season[$key];
$holt_winters[$key] = ($alpha_level + $beta_trend * ($key + 1)) * $season[$key];
$deviations[$key] = $dev_gamma * abs($value - $holt_winters[$key]) + (1 - $dev_gamma)
* (isset($deviations[$key - $season_length]) ? $deviations[$key - $season_length] : 0);
}
/* Could forecast a bit!
for($i = 1; $i <= $season_length; $i++) {
$holt_winters[$key + $i] = $alpha_level + $beta_trend * $season[$key + $i];
}
*/
return array($holt_winters, $deviations);
}
}
/**
* Simple Stats Class
* Build a dataset and get simple statistics
* @author jcreasy
*/
class Stat
{
private $data = array();
private $class = '';
private $metric = '';
private $runmode = '';
private $interval = 1;
private $points = array();
public function __get($name)
{
return $this->$name;
}
public function __construct($runmode, $class, $metric, $interval = 1, $values = NULL)
{
if (!empty($values)) {
$this->add_values($values);
}
$this->data = array();
$this->points = array();
$this->runmode = $runmode;
$this->metric = $metric;
$this->interval = $interval;
$this->class = $class;
}
public function get_values()
{
return $this->points;
}
public function get_data()
{
return $this->data;
}
public function add_values($values)
{
//echo "adding value: " . json_encode($values) . "\n";
foreach ($values as $timestamp => $values) {
foreach ($values as $value) {
$this->add_value($value, $timestamp);
}
}
}
public function add_value($value, $timestamp)
{
if ((empty($value) && $value !== 0) || !is_numeric($value)) {
throw new Exception('invalid value ' . $value);
}
if (!array_key_exists("$timestamp", $this->points)) {
$this->points["$timestamp"] = array();
}
$this->points["$timestamp"][] = $value;
$this->data[] = $value;
}
public function reset_data()
{
$this->points = array();
$this->data = array();
}
public function range()
{
return $this->max() - $this->min();
}
public function rate($interval = null)
{
if (empty($interval)) {
$interval = $this->interval;
}
return $this->count() / $interval;
}
public function count()
{
return count($this->data);
}
public function median()
{
$median = $this->percentile(50);
return $median;
}
public function min_timestamp()
{
return array_keys($this->points, min($this->points));
}
public function sum()
{
$sum = 0;
foreach ($this->data as $value) {
$sum += $value;
}
return $sum;
}
public function sum2()
{
$sum = 0;
foreach ($this->data as $value) {
$sum += (float)pow($value, 2);
}
return $sum;
}
public function avg()
{
if ($this->count() == 0 || $this->sum() == 0) {
return 0;
}
return $this->sum() / $this->count();
}
public function min()
{
if (count($this->data) < 1) {
return 0;
}
return (float)min($this->data);
}
public function max()
{
if (count($this->data) < 1) {
return 0;
}
return (float)max($this->data);
}
public function standard_deviation()
{
return sqrt(($this->sum2() - $this->count() * pow($this->avg(), 2)) / (float)($this->count() - 1));
}
public function __toString()
{
return $this->to_string();
}
public function to_string()
{
$s = '';
$s .= sprintf("N = %8d\tRange = %-8.0f\tMin = %-8.4f\tMax = %-8.4f\tAvg = %-8.4f\n", $this->count(), $this->range(), $this->min(), $this->max(), $this->avg());
$s .= sprintf("StDev = %-8.4f\tSum = %-8.4f\tSum^2 = %-8.4f\n", $this->standard_deviation(), $this->sum(), $this->sum2());
return $s;
}
public function to_array($include_points = false)
{
$ret = array();
$ret['metric'] = array('runmode' => $this->runmode, 'class' => $this->class, 'type' => $this->metric, 'interval' => $this->interval);
$stats = array();
$stats['min'] = $this->min();
$stats['max'] = $this->max();
$stats['count'] = $this->count();
$stats['sum'] = $this->sum();
$stats['sum2'] = $this->sum2();
$stats['avg'] = $this->avg();
$stats['stdv'] = $this->standard_deviation();
$ret['stats'] = $stats;
if ($include_points === true) {
$ret['points'] = $this->points;
}
return $ret;
}
public function to_json($include_points = false)
{
$ret = $this->to_array($include_points);
$ret = json_encode($ret);
return $ret;
}
public function percentile($percentile)
{
if (empty($percentile) || !is_numeric($percentile)) {
throw new Exception('invalid percentile ' . $percentile);
}
if (0 < $percentile && $percentile < 1) {
$p = $percentile;
} else if (1 < $percentile && $percentile <= 100) {
$p = $percentile * .01;
} else {
throw new Exception('invalid percentile ' . $percentile);
}
if (empty($this->data) || !is_array($this->data)) {
throw new Exception('invalid data');
}
$count = count($this->data);
$allindex = ($count - 1) * $p;
$intvalindex = intval($allindex);
$floatval = $allindex - $intvalindex;
sort($this->data);
if (!is_float($floatval)) {
$result = $this->data[$intvalindex];
} else {
if ($count > $intvalindex + 1) {
$result = $floatval * ($this->data[$intvalindex + 1] - $this->data[$intvalindex]) + $this->data[$intvalindex];
} else {
$result = $this->data[$intvalindex];
}
}
return $result;
}
public function quartiles()
{
$q1 = $this->percentile(25);
$q2 = $this->percentile(50);
$q3 = $this->percentile(75);
$quartile = array('25' => $q1, '50' => $q2, '75' => $q3);
return $quartile;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment