Skip to content

Instantly share code, notes, and snippets.

@tomardern
Created March 27, 2015 10:58
Show Gist options
  • Save tomardern/bf7664f7d787898ca3f2 to your computer and use it in GitHub Desktop.
Save tomardern/bf7664f7d787898ca3f2 to your computer and use it in GitHub Desktop.
Old Script for Quick Weka Comparisons
<?php
error_reporting(0);
function search($lines,$text,$after){
$textLines = explode("\n", $lines);
if ($after) {
foreach($textLines as $num => $line){
if (stripos((string) $line,"Stratified") > -1){
$textLines= array_slice($textLines, $num);
}
}
}
foreach($textLines as $line){
//If what we are looking for is in this line
$pos1 = stripos( (string) $line, $text);
if ($pos1 > -1){
$items = explode(" ",$line);
//Loop though each item, and get the first number back
foreach($items as $item){
$text = str_replace("-", "", $item);
$text = filter_var($text, FILTER_SANITIZE_NUMBER_FLOAT,FILTER_FLAG_ALLOW_FRACTION);
if (strlen($text)){
return $text;
}
}
}
}
}
function findType($filename){
if (stripos($filename, "j48_") > -1){
return "j48";
}
if (stripos($filename, "knn_") > -1){
return "knn";
}
if (stripos($filename, "nb_") > -1){
return "nb";
}
}
function findDataSet($filename){
$name = explode("\\", $filename);
$name = str_replace(".txt","",end($name));
$name = explode("_",$name);
return end($name);
}
function findFileSize($filename){
global $dataFiles;
$dataset = findDataSet($filename);
return filesize ($dataFiles.$dataset.".arff");
}
function findtimeTaken($log,$filename){
$name = explode("\\", $filename);
$name = end($name);
foreach($log as $line){
if (stripos($line, $name) > -1){
return str_replace(array($name,"\n"), "", $line);
}
}
}
function findNumAttributes($filename){
//Firstly, find the dataset name
global $dataFiles;
$dataset = findDataSet($filename);
$contents = file_get_contents($dataFiles.$dataset.".arff");
return substr_count(strtoupper($contents), strtoupper("@attribute"));
// return search($contents,"Number of Attributes:",false);
}
function findKFromFilename($filename){
$name = explode("\\", $filename);
$name = end($name);
$name = str_replace(array("j48","-"), "", $name);
return filter_var($name, FILTER_SANITIZE_NUMBER_INT);
}
function findNumberOfClasses($text){
$lines = explode("\n", $text);
foreach($lines as $line){
if (stripos($line, "<-- classified as") > -1){
$line = substr($line, 0, strlen($line) - strlen("<-- classified as") - 1);
$line = str_replace(" ", "", $line);
return strlen($line);
}
}
}
echo "<pre>";
// ****************** SETTINGS ******************* //
$dataFiles = "C:\\Weka-3-6\\data\\";
$location = "C:\\Weka-3-6\\dwm_output\\";
$log = $location."console_log.txt";
//******************** END SETTINGS ***************//
$files = glob($location."*.txt");
$logger = file($log);
$data = array();
/* $data = array(
"Attributes",
"Classes",
"Type",
"Dataset",
"Size",
"Generate Time",
"Instances",
"Model Time",
"Correct Identify",
"Incorrect Identify",
"J48 Leaves",
"J48 Size Tree",
"K"
); */
$fp = fopen('output.csv', 'w');
foreach($files as $file){
if ($file != $log) {
$text = file_get_contents($file);
$result = iconv($in_charset = 'UTF-16LE' , $out_charset = 'UTF-8' , $text);
$attributes = findNumAttributes($file);
$type = findType($file);
$dataset = findDataSet($file);
$size = findFileSize($file);
$timeTaken = findTimeTaken($logger,$file);
$instances = search($result,"Number of Instances",false);
$leaves = search($result,"Number of Leaves",false);
$tsize = search($result,"of the tree",false);
$time = search($result,"taken to build model",false);
$correct = search($result,"Correctly Classified Instances",true);
$incorrect = search($result, "Incorrectly Classified Instances", true);
$nearest = findKFromFilename($file);
$classes = findNumberOfClasses($result);
if (!isset($data[$dataset])) {
$data[$dataset] = array(
$dataset,
$attributes,
$classes,
$size,
$instances,
"j48" => array(),
"nb" => array(),
"knn" => array(),
);
}
if (strlen($nearest)) {
$data[$dataset][$type][$nearest] = array(
$timeTaken,
$time,
$correct,
$incorrect,
$nearest
);
} else {
$data[$dataset][$type] = array(
$timeTaken,
$time,
$correct,
$incorrect,
$nearest,
$leaves,
$tsize
);
}
/* $data[$dataset] = array(
$attributes,
$classes,
$type,
$dataset,
$size,
$timeTaken,
$instances,
$time,
$correct,
$incorrect,
$leaves,
$tsize,
$nearest
);
fputcsv($fp, $data)
echo "\n\n--------------------------------------------------------------- \n";
echo "Output File: ".$file."\n";
echo "- Attibutes: ".$attributes."\n";
echo "- Number of Classes: ".$classes."\n";
echo "- Type: ".$type."\n";
echo "- Dataset: ".$dataset."\n";
echo "- Size: ".$size."\n";
echo "- Time to Generate: ".$timeTaken."\n";
echo "- Instances: ".$instances."\n";
echo "- Time for Model: ".$time."\n";
echo "- Correct: ".$correct."\n";
echo "- Incorrect: ".$incorrect."\n";
echo "- J48 Leaves: ".$leaves."\n";
echo "- J48 Size of tree: ".$tsize."\n";
echo "- KNN Nearest: ".$nearest."\n";
*/
}
}
$fp = fopen('output_single.csv', 'w');
foreach($data as $dataset => $stats) {
$row = $stats;
$row = array(
"General",
$stats[0],
$stats[1],
$stats[2],
$stats[3],
$stats[4]
);
$row[] = "J48";
$row[] = $stats["j48"][0];
$row[] = $stats["j48"][1];
$row[] = $stats["j48"][2];
$row[] = $stats["j48"][3];
$row[] = $stats["j48"][5];
$row[] = $stats["j48"][6];
$row[] = "NB";
$row[] = $stats["nb"][0];
$row[] = $stats["nb"][1];
$row[] = $stats["nb"][2];
$row[] = $stats["nb"][3];
foreach($stats["knn"] as $knn){
$row[] = "KNN".$knn[4];
$row[] = $knn[4];
$row[] = $knn[0];
$row[] = $knn[1];
$row[] = $knn[2];
$row[] = $knn[3];
}
fputcsv($fp, $row);
print_r($row);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment