Skip to content

Instantly share code, notes, and snippets.

@fatihgune
Created December 6, 2020 16:05
Show Gist options
  • Save fatihgune/20430240e6346891032c6b5394dd230b to your computer and use it in GitHub Desktop.
Save fatihgune/20430240e6346891032c6b5394dd230b to your computer and use it in GitHub Desktop.
Confusion Equation(X Equation) with normal distribution (PHP- Laravel)
<?
// From 2006 to 2019
$years = config('years');
// Get All normal distribution records of bankrupted companies for selected Country
$allBankruptedCompaniesCount = NormalDistribution::join('companies AS c', 'company_id', '=', 'c.id')
->where('c.country', '=', 'UK')
->where('c.bankrupted', '=', 'yes')
->get()->unique('company_id')
->count();
\Cache::flush();
// initiatal values
$plus = 0.00;
$row1All = [];
$row3All = [];
$rowResult = [];
// Mimic percentages; from 1% to 100%
for ($i = 1; $i <= 100; $i++) {
$plus += 0.01;
// reset values for each percentage
$countOfBankruptedCompaniesForPercentage = 0;
$countOfHealthyCompaniesForPercentage = 0;
// for every year
foreach ($years as $year) {
// Get every normal distribution record of given country
// in given year and sort them by ascending order
$normDist = NormalDistribution::join('companies AS c', 'company_id', '=', 'c.id')
->where('c.country', '=', 'UK')
->where('year', '=', $year)->get()
->sortBy('value', SORT_NUMERIC);
// get normal distribution as many records based on current percentage
// i.e; 1 percent = Take 1 percent of all records in given condition
$normDistForThatPercentage = $normDist->take(round($normDist->count() * $plus, 0, PHP_ROUND_HALF_DOWN));
// separate bankrupted and healthy companies from the records above ($normDistForThatPercentage)
$bankruptedCompaniesOfTheYear = $normDistForThatPercentage->where('bankrupted', 'yes')->unique('company_id');
$healthyCompaniesOfTheYear = $normDistForThatPercentage->where('bankrupted', 'no')->unique('company_id');
// Add numbers and store to the variables respectively.
// This will help us determine the bankrupted and healthy company numbers for the percentage.
$countOfBankruptedCompaniesForPercentage += $bankruptedCompaniesOfTheYear->count();
$countOfHealthyCompaniesForPercentage += $healthyCompaniesOfTheYear->count();
}
// store same values under the given key for later use.
// ie; in some equation, a value from previous percentage is needed.
// storing them in a separate storage and using them will ease the process
\Cache::put($i.'bankrupted-percentage', $countOfBankruptedCompaniesForPercentage);
\Cache::put($i.'non-bankrupted-percentage', $countOfHealthyCompaniesForPercentage);
if ($i === 1) {
// for 1% row one is as follows
$row1 = $countOfHealthyCompaniesForPercentage;
} else {
// for 2% until 100% $row1 is stored as count of healthy companies for that percentage minus the value previous percentage.
// this will help us achieve isolate the difference instead of the accumulated numbers.
$row1 = $countOfHealthyCompaniesForPercentage - \Cache::get($i - 1 . 'non-bankrupted-percentage');
}
// self explanatory
$row2 = $allBankruptedCompaniesCount - $countOfBankruptedCompaniesForPercentage;
// see $row1. Same principle but with bankrupted companies.
if ($i === 1) {
$row3 = $countOfBankruptedCompaniesForPercentage;
} else {
$row3 = $countOfBankruptedCompaniesForPercentage - \Cache::get($i - 1 . 'bankrupted-percentage');
}
// self explanatory
if ($i === 1) {
$row4 = 0;
} else {
$row4 = $countOfHealthyCompaniesForPercentage; // note that this includes all records for the given percentage.
}
// store the values in arrays respectively for each percentage
$row1All[] = $row1;
$row2All[] = $row2;
$row3All[] = $row3;
$row4All[] = $row4;
// store the result of the equation in the array for each percentage
$rowResult[] = $row1 * $row2 + 0.5 * $row1 * $row3;
}
// get the sum of the all values and store them respectively
$rowResult = array_sum($rowResult);
$row3All = array_sum($row3All);
$row1All = array_sum($row1All);
// calculate auc
$auc = $rowResult / ($row3All * $row1All);
// calculate q1
$q1 = $auc / (2 - $auc);
// calculate q2
$q2 = 2 * ($auc * $auc) / (1 + $auc);
// calculate se
$se = sqrt(($auc * (1 - $auc) + ($row3All - 1) * ($q1 - ($auc * $auc)) + ($row1All - 1) * ($q2 - ($auc * $auc))) / ($row3All * $row1All));
// calculate z
$z = ($auc - 0.5) / $se;
// calculate ar
$ar = 2 * ($auc - 0.5);
// store the results
$results = [
'auc' => $auc, 'se' => $se, 'z' => $z, 'AR' => $ar,
];
return $results;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment