Last active
February 13, 2016 22:23
-
-
Save krishnanraman/5cd9746668fe9029fdb0 to your computer and use it in GitHub Desktop.
Image Detection using Statistical Moments + KMeans dominant color + Binary SVM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name := "cbir Job" | |
version := "1.0" | |
scalaVersion := "2.10.4" | |
assemblyJarName := "cbir.jar" | |
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.4.1" | |
libraryDependencies += "org.apache.spark" % "spark-mllib_2.10" % "1.4.1" | |
mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) => | |
{ | |
case PathList("META-INF", xs @ _*) => MergeStrategy.discard | |
case x => MergeStrategy.first | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.awt.image.{BufferedImage, WritableRaster} | |
import javax.imageio.ImageIO | |
import java.io.File | |
import scala.collection.JavaConversions._ | |
import org.apache.commons.math3.ml.clustering.{KMeansPlusPlusClusterer, DoublePoint} | |
import org.apache.commons.math3.ml.distance.EuclideanDistance | |
import org.apache.commons.math3.stat.descriptive.moment.{Mean, Variance, Skewness} | |
/* | |
Given a picture, | |
run kmeans to compute 8 dominant colors. | |
replace picture with simpler picture containing only 8 dominant colors | |
Compute a cbir feature ( Array[Double] of length 45 ) based primarily on statistical moments (mean,variance,skew) | |
of the simpler picture. | |
To run: scala -cp commonsmath.jar:. cbir directory | |
*/ | |
object cbir extends App { | |
// first 3 moments ( try more ?) | |
def stats(x:Array[Double]):List[Double] = { | |
List(new Mean().evaluate(x, 0, x.size), | |
new Variance().evaluate(x, 0, x.size), | |
new Skewness().evaluate(x, 0, x.size)) | |
} | |
def compute(imagefile:String):List[Double] = { | |
// read an image & get its raster | |
val img = ImageIO.read(new File(imagefile)) | |
val raster:WritableRaster = img.getRaster | |
val (w,h) = (img.getWidth, img.getHeight) | |
// extract all colors from raster | |
val allColors = (0 until w).map { x=> | |
(0 until h).map { y=> | |
val arr = Array.fill[Double](3)(0.0) | |
raster.getPixel(x,y, arr) | |
new DoublePoint(arr) | |
} | |
}.flatten | |
// find 8 dominant colors of image via kmeans | |
val k = 8 | |
val kmeans = new KMeansPlusPlusClusterer[DoublePoint](k, 1000) | |
kmeans.getRandomGenerator().setSeed(1234567L) | |
val centroids = kmeans.cluster(allColors.toIterable) | |
val colors = centroids.map{ x=> x.getCenter.getPoint } | |
val euclidean = new EuclideanDistance() | |
// update raster with 8 color palette | |
(0 until w).foreach { x=> | |
(0 until h).foreach { y=> | |
val arr = Array.fill[Double](3)(0.0) | |
raster.getPixel(x,y, arr) // loads the BGR color into arr | |
val closestIdx = colors | |
.zipWithIndex | |
.map{ ci => | |
val (c,idx) = ci | |
(idx,euclidean.compute(c,arr)) | |
} | |
.minBy{ x=> x._2 } // want smallest euclidean | |
._1 // index of smallest euclidean | |
// replace each pixel with closest dominant color | |
val closestColor = colors(closestIdx) | |
raster.setPixel(x,y,closestColor) // replace pixel color in raster | |
} | |
} | |
// extract all the colors again from the updated raster | |
val data:Seq[Array[Double]] = (0 until w).map { x=> | |
(0 until h).map { y=> | |
val arr = Array.fill[Double](3)(0.0) | |
raster.getPixel(x,y, arr) // copies pixel's x,y value to arr! | |
arr | |
} | |
}.flatten.toSeq | |
// EACH COLOR IN THE COLOR SPACE HAS MEAN, VARIANCE & SKEW = List of size 3 | |
// We USE THE RGB COLOR SPACE | |
// So r=3, g=3,b=3 => an image has a signature of length 3+3+3=9. | |
// That's the global sig. | |
// For local, we break up image into x pieces, treat each piece as an image. | |
// Hence local sig = x*9, x = spatial | |
// global sig | |
val global = stats(data.map{ x:Array[Double] => x(0) }.toArray) ++ // B | |
stats(data.map{ x:Array[Double] => x(1) }.toArray) ++ | |
stats(data.map{ x:Array[Double] => x(2) }.toArray) | |
assert(global.size == 9) | |
//println(global) | |
// compute signature on spatial groups | |
val spatial = args(1).toInt | |
//println(spatial) | |
val n = data.size | |
val local = data | |
.grouped(n/spatial) | |
.map{ gp:Seq[Array[Double]] => | |
val b = stats(gp.map{ x=> x(0)}.toArray) | |
val r = stats(gp.map{ x=> x(1)}.toArray) | |
val g = stats(gp.map{ x=> x(2)}.toArray) | |
//println(b) | |
//println(g) | |
//println(r) | |
b ++ r ++ g | |
} | |
.reduceLeft(_ ++ _) | |
//println(local.size) | |
assert(local.size == spatial * 9) | |
local ++ global | |
} | |
def routine = { | |
} | |
def save(x:List[Double], dir:String, file:String) = { | |
val path = dir + file+"_sig.txt" | |
val pw = new java.io.PrintWriter(path) | |
println("Saving " + path) | |
x.foreach(pw.println) | |
pw.flush | |
pw.close | |
} | |
// find all image files in dir | |
val dir = args(0) | |
val files = new File(dir).list().filter{ x=> x.endsWith(".jpg")} | |
files.foreach{ | |
file => | |
val path = dir + file | |
println("Processing " + path) | |
save(cbir.compute(path), dir, file) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.marin.cv | |
// To build: sbt assembly | |
// To run: spark-submit --driver-memory 120g --executor-memory 120g --class com.marin.cv.cbirJob --master local[*] target/scala-2.10/cbir.jar "--dir /media/kraman/disk1/comp_vision/sig" | |
import org.apache.spark.SparkContext | |
import org.apache.spark.SparkContext._ | |
import org.apache.spark.SparkConf | |
import org.apache.spark.rdd._ | |
import org.apache.spark.rdd.PairRDDFunctions | |
import com.marin.util.Args | |
import java.io.PrintWriter | |
import org.apache.spark.mllib.classification._ | |
import org.apache.spark.mllib.regression.LabeledPoint | |
import org.apache.spark.mllib.linalg._ | |
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics | |
import collection.mutable.ListBuffer | |
object cbirJob extends App { | |
val conf = new SparkConf() | |
.setMaster("local[28]") | |
.set("spark.driver.maxResultSize", "100g") | |
.set("spark.local.dir", "/media/kraman/disk2/tmp") | |
.set("spark.akka.threads", "256") | |
val sc = new SparkContext(conf) | |
val myargs = Args(args.mkString) | |
val dir = myargs("dir") | |
val mylog = new PrintWriter("log") | |
val classNames:Map[Int, String] = Map( | |
300->"bus", | |
400->"dino", | |
500->"elephant", | |
600->"flower", | |
700->"horse", | |
800->"nature", | |
900->"food") | |
// read all the CBIR image signatures & convert to features | |
val classFeatures:Map[String, ListBuffer[LabeledPoint]] = sc | |
.wholeTextFiles(dir) // RDD[(String, String)] | |
.map{ kv => | |
val (filename, contents) = kv | |
val array:Array[String] = contents.split("\n") | |
val features:DenseVector = new DenseVector(array.map{_.toDouble}) | |
val lp:LabeledPoint = new LabeledPoint(1.0d, features) | |
val idx = filename.indexOf(".jpg_sig.txt") | |
val key = (filename.slice(idx-3,idx).toInt/100)*100 // eg. turn 368 into 300 | |
val name = classNames(key) | |
(name, lp) | |
} // RDD[String, LabeledPoint] | |
.aggregateByKey(collection.mutable.ListBuffer[LabeledPoint]())( | |
(u,v) => u.+:(v), | |
(u1,u2) => u1 ++ u2 | |
) // RDD[String, ListBuffer[LabeledPoint]] | |
.collectAsMap.toMap | |
// for each class(bus, dino, elephant, flower), train binary classifier | |
// uniform sample to ensure equal number of negative examples | |
val results = classFeatures.map{ kv => | |
val (className, features) = kv | |
val trainTestRatio = Array(0.8,0.2) | |
val array = sc.makeRDD(features).randomSplit(trainTestRatio) | |
val (posTrain, posTest) = (array(0), array(1)) | |
val negFeatures = sc.makeRDD(classFeatures | |
.keys | |
.filterNot(name => name==className) | |
.map{ negativeClass => classFeatures(negativeClass)} | |
.reduceLeft(_++_)) | |
.map{ x:LabeledPoint => new LabeledPoint(0, x.features)} // flip labels | |
// want |negatives| == |positives|, but |negatives| > |positives| in dataset | |
val fraction = posTrain.count.toDouble/negFeatures.count | |
val negTrain = negFeatures.sample(false, fraction) | |
val negTest = negFeatures.subtract(negTrain) | |
mylog.println("Counts(NegTrain, NegTest, PosTrain, PosTest):" + | |
negTrain.count + "," + negTest.count + "," + posTrain.count + "," + posTest.count) | |
val model:SVMModel = SVMWithSGD.train(posTrain ++ negTrain, 1000) | |
val testSet = posTest ++ negTest | |
val scores = model.predict( testSet.map{ lp => lp.features } ) | |
val labels = testSet.map{ lp => lp.label } | |
val scoresAndLabels = scores.zip(labels) | |
val metrics = new BinaryClassificationMetrics(scoresAndLabels) | |
mylog.println( className + "," + metrics.areaUnderPR + "," + metrics.areaUnderROC) | |
(className, metrics.areaUnderPR, metrics.areaUnderROC) | |
}.toList | |
sc.makeRDD(results,1).saveAsTextFile("svm classifier results"+System.currentTimeMillis) | |
mylog.close | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// THIS VERSION TRAINS MULTIPLE CLASSIFIERS PER CLASS | |
// Corel has 7 classes, each class has 100 images | |
// WANT 80 +ve, 80-ve PER CLASSIFIER PER CLASS. | |
// So 600 -ve per class = 80*7 + 40. So 7 classifiers PER CLASS | |
// TOTAL 7 classifiers per class * 7 classes = 49 classifiers | |
package com.marin.cv | |
// To build: sbt assembly | |
// To run: spark-submit --driver-memory 120g --executor-memory 120g --class com.marin.cv.cbirJob --master local[*] target/scala-2.10/cbir.jar "--dir /media/kraman/disk1/comp_vision/sig" | |
import org.apache.spark.SparkContext | |
import org.apache.spark.SparkContext._ | |
import org.apache.spark.SparkConf | |
import org.apache.spark.rdd._ | |
import org.apache.spark.rdd.PairRDDFunctions | |
import com.marin.util.Args | |
import java.io.PrintWriter | |
import org.apache.spark.mllib.classification._ | |
import org.apache.spark.mllib.regression.LabeledPoint | |
import org.apache.spark.mllib.linalg._ | |
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics | |
import collection.mutable.ListBuffer | |
object cbirJob extends App { | |
val conf = new SparkConf() | |
.setMaster("local[28]") | |
.set("spark.driver.maxResultSize", "100g") | |
.set("spark.local.dir", "/media/kraman/disk2/tmp") | |
.set("spark.akka.threads", "256") | |
val sc = new SparkContext(conf) | |
val myargs = Args(args.mkString) | |
val dir = myargs("dir") | |
val mylog = new PrintWriter("log") | |
val classNames:Map[Int, String] = Map( | |
300->"bus", | |
400->"dino", | |
500->"elephant", | |
600->"flower", | |
700->"horse", | |
800->"nature", | |
900->"food") | |
// read all the CBIR image signatures & convert to features | |
val classFeatures:Map[String, ListBuffer[LabeledPoint]] = sc | |
.wholeTextFiles(dir) // RDD[(String, String)] | |
.map{ kv => | |
val (filename, contents) = kv | |
val array:Array[String] = contents.split("\n") | |
val features:DenseVector = new DenseVector(array.map{_.toDouble}) | |
val lp:LabeledPoint = new LabeledPoint(1.0d, features) | |
val idx = filename.indexOf(".jpg_sig.txt") | |
val key = (filename.slice(idx-3,idx).toInt/100)*100 // eg. turn 368 into 300 | |
val name = classNames(key) | |
(name, lp) | |
} // RDD[String, LabeledPoint] | |
.aggregateByKey(collection.mutable.ListBuffer[LabeledPoint]())( | |
(u,v) => u.+:(v), | |
(u1,u2) => u1 ++ u2 | |
) // RDD[String, ListBuffer[LabeledPoint]] | |
.collectAsMap.toMap | |
// for each class(bus, dino, elephant, flower), train binary classifier | |
val results = classFeatures.map{ kv => | |
val (className, features) = kv | |
val trainTestRatio = Array(0.8,0.2) | |
val array = sc.makeRDD(features).randomSplit(trainTestRatio) | |
val (posTrain, posTest) = (array(0), array(1)) | |
val negFeatures = sc.makeRDD(classFeatures | |
.keys | |
.filterNot(name => name==className) | |
.map{ negativeClass => classFeatures(negativeClass)} | |
.reduceLeft(_++_)) | |
.map{ x:LabeledPoint => new LabeledPoint(0, x.features)} // flip labels | |
// want |negatives| == |positives|, but |negatives| > |positives| in dataset | |
// so make as many sample buckets as you need to | |
// sample bucket size must be equal to pos sample size = 80 | |
// 80 * 7 + 40 = 600 | |
// so need 7 buckets, plus 1 test set bucket | |
val negRatio = Array(80,80,80,80,80,80,80,40).map{ _/600.0 } | |
val negArray = negFeatures.randomSplit(negRatio) | |
val negTest = negArray(7) // SET TEST SET ASIDE | |
val testSet = posTest ++ negTest | |
val testSetFeatures = testSet.map{ lp => lp.features } | |
// NOW BUILD 1 classifier PER NEG SAMPLE BUCKET | |
val scores:Seq[RDD[Double]] = (0 to 6).toSeq.map{ idx => | |
val negTrain = negArray(idx) | |
val model:SVMModel = SVMWithSGD.train(posTrain ++ negTrain, 1000) | |
mylog.println("SVM: " + idx + " Counts(NegTrain, NegTest, PosTrain, PosTest):" + | |
negTrain.count + "," + negTest.count + "," + posTrain.count + "," + posTest.count) | |
model.predict( testSetFeatures ) | |
} | |
/* fuck this! each classifier gives you an RDD[Double] full of scores! | |
so you have 7 columns full of scores, like | |
0 1 1 ... | |
1 0 1 | |
0 1 1 | |
0 1 1 | |
you want 1 column full of scores, | |
which contains a 1 if the consensus is 1, otherwise 0. | |
consensus on 7 = ceil(7/2) = 4 | |
so add all columns, divide by 4. if it exceeds 1, then 1 else 0 | |
*/ | |
val consensus:RDD[Double] = scores.reduceLeft{ (a:RDD[Double], b:RDD[Double]) => | |
a.zip(b).map{ rec => rec._1 + rec._2 } | |
}.map{ x=> if( x/4.0 >= 1.0) 1.0 else 0.0 } | |
val labels = testSet.map{ lp => lp.label } | |
val scoresAndLabels = consensus.zip(labels) | |
val metrics = new BinaryClassificationMetrics(scoresAndLabels) | |
mylog.println( className + "," + metrics.areaUnderPR + "," + metrics.areaUnderROC) | |
(className, metrics.areaUnderPR, metrics.areaUnderROC) | |
}.toList | |
sc.makeRDD(results,1).saveAsTextFile("svm classifier results"+System.currentTimeMillis) | |
mylog.close | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Dataset: COREL subset ( 7 classes, 100 images per class => 7*100 = 700 jpgs ) | |
COREL: https://sites.google.com/site/dctresearch/Home/content-based-image-retrieval | |
Training Test Ratio: 80-20 | |
Equal number of true & false samples ie. train on 80 dinos & 80 random non-dinos out of 700-100 = 600 non-dinos. | |
So training sample size = 80 + 80 = 160 | |
Test sample = 20 dinos + 20 non-dinos | |
Train 1 SVM classifier per class => 7 SVM classifiers | |
Hyperparam: 1000 iterations on the SGD with training rate 1.0 | |
(Name of Classifier, AUC = Area under Precision Recall Curve, Area under ROC Curve) | |
(bus,0.7266666666666667,0.6215384615384616) | |
(dino,0.9289215686274509,0.9179566563467493) | |
(flower,0.9762845849802372,0.9545454545454546) | |
(horse,0.9277836134453781,0.8854166666666667) | |
(food,0.7559523809523809,0.6142857142857143) | |
(nature,0.843956043956044,0.7318181818181818) | |
(elephant,0.5694235588972432,0.5354691075514875) | |
Conclusions: | |
Easiest to classify = Flower, Dino | |
Hardesy to classify = Elephant | |
wiki for AUC: http://fastml.com/what-you-wanted-to-know-about-auc/ ( we want 1.0, we get 0.56 to 0.97 ) | |
wiki for ROC:http://gim.unmc.edu/dxtests/roc3.htm ( we want 1.0, we get 0.53 to 0.95) | |
Run2: Test Set: 20 positives + 520 negatives ( negative test samples far overwhelm positives ) | |
(bus,0.3872202166064982,0.7410112359550562) | |
(dino,0.6296296296296297,0.9430740037950663) | |
(flower,0.5792057698992006,0.8983012559862654) | |
(horse,0.4008389605074688,0.6706503014642549) | |
(food,0.24998883499915142,0.6119496855345912) | |
(nature,0.11913195351266649,0.5286529060293319) | |
(elephant,0.5030071466512145,0.650146771037182) | |
Run3: Use a 1:9 ratio for training. 1 positive sample for 9 negative samples! | |
Justification: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4061540/ | |
Essentially, opposite of Run2. | |
Summary: (CLASSNAME, AUC, ROC) | |
(bus,0.6923076923076923,0.5) | |
(dino,0.9463950683462878,0.9024390243902439) | |
(flower,0.9299660441426146,0.881578947368421) | |
(horse,0.6826025459688826,0.5142857142857142) | |
(food,0.7222222222222222,0.5) | |
(nature,0.7056831267357584,0.7325162220620043) | |
(elephant,0.6941747572815534,0.5) | |
--------- | |
Detailed Counts | |
CLASSNAME, AUC, ROC | |
--------- | |
Counts(NegTrain, NegTest, PosTrain, PosTest):536,64,60,40 | |
bus,0.6923076923076923,0.5 | |
Counts(NegTrain, NegTest, PosTrain, PosTest):550,50,59,41 | |
dino,0.9463950683462878,0.9024390243902439 | |
Counts(NegTrain, NegTest, PosTrain, PosTest):545,55,62,38 | |
flower,0.9299660441426146,0.881578947368421 | |
Counts(NegTrain, NegTest, PosTrain, PosTest):534,66,65,35 | |
horse,0.6826025459688826,0.5142857142857142 | |
Counts(NegTrain, NegTest, PosTrain, PosTest):550,50,60,40 | |
food,0.7222222222222222,0.5 | |
Counts(NegTrain, NegTest, PosTrain, PosTest):527,73,62,38 | |
nature,0.7056831267357584,0.7325162220620043 | |
Counts(NegTrain, NegTest, PosTrain, PosTest):537,63,60,40 | |
elephant,0.6941747572815534,0.5 | |
Run 4. Use equal number of positive & negative training set. | |
But since number of negatives > number of positives, | |
use as many classifiers PER class as required to cover the entire negative sample training set. | |
// THIS VERSION TRAINS MULTIPLE CLASSIFIERS PER CLASS | |
// Corel has 7 classes, each class has 100 images | |
// WANT 80 +ve, 80-ve PER CLASSIFIER PER CLASS. | |
// So 600 -ve per class = 80*7 + 40. So 7 classifiers PER CLASS | |
// TOTAL 7 classifiers per class * 7 classes = 49 classifiers | |
Justification: | |
http://sci2s.ugr.es/keel/pdf/specific/congreso/akbani_svm_2004.pdf | |
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.309.904&rep=rep1&type=pdf | |
Summary: (CLASSNAME, AUC, ROC) | |
(bus,0.7278735632183908,0.7034313725490197) | |
(dino,0.9265931372549019,0.9295485636114911) | |
(flower,0.9380952380952381,0.9047619047619048) | |
(horse,0.7213669950738917,0.7648809523809524) | |
(food,0.7255244755244755,0.7071428571428572) | |
(nature,0.6842105263157895,0.6556390977443609) | |
(elephant,0.7199602780536245,0.6795665634674922) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment