Skip to content

Instantly share code, notes, and snippets.

@krishnanraman
Last active February 13, 2016 22:23
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krishnanraman/5cd9746668fe9029fdb0 to your computer and use it in GitHub Desktop.
Save krishnanraman/5cd9746668fe9029fdb0 to your computer and use it in GitHub Desktop.
Image Detection using Statistical Moments + KMeans dominant color + Binary SVM
name := "cbir Job"
version := "1.0"
scalaVersion := "2.10.4"
assemblyJarName := "cbir.jar"
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.4.1"
libraryDependencies += "org.apache.spark" % "spark-mllib_2.10" % "1.4.1"
mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) =>
{
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x => MergeStrategy.first
}
}
import java.awt.image.{BufferedImage, WritableRaster}
import javax.imageio.ImageIO
import java.io.File
import scala.collection.JavaConversions._
import org.apache.commons.math3.ml.clustering.{KMeansPlusPlusClusterer, DoublePoint}
import org.apache.commons.math3.ml.distance.EuclideanDistance
import org.apache.commons.math3.stat.descriptive.moment.{Mean, Variance, Skewness}
/*
Given a picture,
run kmeans to compute 8 dominant colors.
replace picture with simpler picture containing only 8 dominant colors
Compute a cbir feature ( Array[Double] of length 45 ) based primarily on statistical moments (mean,variance,skew)
of the simpler picture.
To run: scala -cp commonsmath.jar:. cbir directory
*/
object cbir extends App {
// first 3 moments ( try more ?)
def stats(x:Array[Double]):List[Double] = {
List(new Mean().evaluate(x, 0, x.size),
new Variance().evaluate(x, 0, x.size),
new Skewness().evaluate(x, 0, x.size))
}
def compute(imagefile:String):List[Double] = {
// read an image & get its raster
val img = ImageIO.read(new File(imagefile))
val raster:WritableRaster = img.getRaster
val (w,h) = (img.getWidth, img.getHeight)
// extract all colors from raster
val allColors = (0 until w).map { x=>
(0 until h).map { y=>
val arr = Array.fill[Double](3)(0.0)
raster.getPixel(x,y, arr)
new DoublePoint(arr)
}
}.flatten
// find 8 dominant colors of image via kmeans
val k = 8
val kmeans = new KMeansPlusPlusClusterer[DoublePoint](k, 1000)
kmeans.getRandomGenerator().setSeed(1234567L)
val centroids = kmeans.cluster(allColors.toIterable)
val colors = centroids.map{ x=> x.getCenter.getPoint }
val euclidean = new EuclideanDistance()
// update raster with 8 color palette
(0 until w).foreach { x=>
(0 until h).foreach { y=>
val arr = Array.fill[Double](3)(0.0)
raster.getPixel(x,y, arr) // loads the BGR color into arr
val closestIdx = colors
.zipWithIndex
.map{ ci =>
val (c,idx) = ci
(idx,euclidean.compute(c,arr))
}
.minBy{ x=> x._2 } // want smallest euclidean
._1 // index of smallest euclidean
// replace each pixel with closest dominant color
val closestColor = colors(closestIdx)
raster.setPixel(x,y,closestColor) // replace pixel color in raster
}
}
// extract all the colors again from the updated raster
val data:Seq[Array[Double]] = (0 until w).map { x=>
(0 until h).map { y=>
val arr = Array.fill[Double](3)(0.0)
raster.getPixel(x,y, arr) // copies pixel's x,y value to arr!
arr
}
}.flatten.toSeq
// EACH COLOR IN THE COLOR SPACE HAS MEAN, VARIANCE & SKEW = List of size 3
// We USE THE RGB COLOR SPACE
// So r=3, g=3,b=3 => an image has a signature of length 3+3+3=9.
// That's the global sig.
// For local, we break up image into x pieces, treat each piece as an image.
// Hence local sig = x*9, x = spatial
// global sig
val global = stats(data.map{ x:Array[Double] => x(0) }.toArray) ++ // B
stats(data.map{ x:Array[Double] => x(1) }.toArray) ++
stats(data.map{ x:Array[Double] => x(2) }.toArray)
assert(global.size == 9)
//println(global)
// compute signature on spatial groups
val spatial = args(1).toInt
//println(spatial)
val n = data.size
val local = data
.grouped(n/spatial)
.map{ gp:Seq[Array[Double]] =>
val b = stats(gp.map{ x=> x(0)}.toArray)
val r = stats(gp.map{ x=> x(1)}.toArray)
val g = stats(gp.map{ x=> x(2)}.toArray)
//println(b)
//println(g)
//println(r)
b ++ r ++ g
}
.reduceLeft(_ ++ _)
//println(local.size)
assert(local.size == spatial * 9)
local ++ global
}
def routine = {
}
def save(x:List[Double], dir:String, file:String) = {
val path = dir + file+"_sig.txt"
val pw = new java.io.PrintWriter(path)
println("Saving " + path)
x.foreach(pw.println)
pw.flush
pw.close
}
// find all image files in dir
val dir = args(0)
val files = new File(dir).list().filter{ x=> x.endsWith(".jpg")}
files.foreach{
file =>
val path = dir + file
println("Processing " + path)
save(cbir.compute(path), dir, file)
}
}
package com.marin.cv
// To build: sbt assembly
// To run: spark-submit --driver-memory 120g --executor-memory 120g --class com.marin.cv.cbirJob --master local[*] target/scala-2.10/cbir.jar "--dir /media/kraman/disk1/comp_vision/sig"
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.rdd._
import org.apache.spark.rdd.PairRDDFunctions
import com.marin.util.Args
import java.io.PrintWriter
import org.apache.spark.mllib.classification._
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import collection.mutable.ListBuffer
object cbirJob extends App {
val conf = new SparkConf()
.setMaster("local[28]")
.set("spark.driver.maxResultSize", "100g")
.set("spark.local.dir", "/media/kraman/disk2/tmp")
.set("spark.akka.threads", "256")
val sc = new SparkContext(conf)
val myargs = Args(args.mkString)
val dir = myargs("dir")
val mylog = new PrintWriter("log")
val classNames:Map[Int, String] = Map(
300->"bus",
400->"dino",
500->"elephant",
600->"flower",
700->"horse",
800->"nature",
900->"food")
// read all the CBIR image signatures & convert to features
val classFeatures:Map[String, ListBuffer[LabeledPoint]] = sc
.wholeTextFiles(dir) // RDD[(String, String)]
.map{ kv =>
val (filename, contents) = kv
val array:Array[String] = contents.split("\n")
val features:DenseVector = new DenseVector(array.map{_.toDouble})
val lp:LabeledPoint = new LabeledPoint(1.0d, features)
val idx = filename.indexOf(".jpg_sig.txt")
val key = (filename.slice(idx-3,idx).toInt/100)*100 // eg. turn 368 into 300
val name = classNames(key)
(name, lp)
} // RDD[String, LabeledPoint]
.aggregateByKey(collection.mutable.ListBuffer[LabeledPoint]())(
(u,v) => u.+:(v),
(u1,u2) => u1 ++ u2
) // RDD[String, ListBuffer[LabeledPoint]]
.collectAsMap.toMap
// for each class(bus, dino, elephant, flower), train binary classifier
// uniform sample to ensure equal number of negative examples
val results = classFeatures.map{ kv =>
val (className, features) = kv
val trainTestRatio = Array(0.8,0.2)
val array = sc.makeRDD(features).randomSplit(trainTestRatio)
val (posTrain, posTest) = (array(0), array(1))
val negFeatures = sc.makeRDD(classFeatures
.keys
.filterNot(name => name==className)
.map{ negativeClass => classFeatures(negativeClass)}
.reduceLeft(_++_))
.map{ x:LabeledPoint => new LabeledPoint(0, x.features)} // flip labels
// want |negatives| == |positives|, but |negatives| > |positives| in dataset
val fraction = posTrain.count.toDouble/negFeatures.count
val negTrain = negFeatures.sample(false, fraction)
val negTest = negFeatures.subtract(negTrain)
mylog.println("Counts(NegTrain, NegTest, PosTrain, PosTest):" +
negTrain.count + "," + negTest.count + "," + posTrain.count + "," + posTest.count)
val model:SVMModel = SVMWithSGD.train(posTrain ++ negTrain, 1000)
val testSet = posTest ++ negTest
val scores = model.predict( testSet.map{ lp => lp.features } )
val labels = testSet.map{ lp => lp.label }
val scoresAndLabels = scores.zip(labels)
val metrics = new BinaryClassificationMetrics(scoresAndLabels)
mylog.println( className + "," + metrics.areaUnderPR + "," + metrics.areaUnderROC)
(className, metrics.areaUnderPR, metrics.areaUnderROC)
}.toList
sc.makeRDD(results,1).saveAsTextFile("svm classifier results"+System.currentTimeMillis)
mylog.close
}
// THIS VERSION TRAINS MULTIPLE CLASSIFIERS PER CLASS
// Corel has 7 classes, each class has 100 images
// WANT 80 +ve, 80-ve PER CLASSIFIER PER CLASS.
// So 600 -ve per class = 80*7 + 40. So 7 classifiers PER CLASS
// TOTAL 7 classifiers per class * 7 classes = 49 classifiers
package com.marin.cv
// To build: sbt assembly
// To run: spark-submit --driver-memory 120g --executor-memory 120g --class com.marin.cv.cbirJob --master local[*] target/scala-2.10/cbir.jar "--dir /media/kraman/disk1/comp_vision/sig"
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.rdd._
import org.apache.spark.rdd.PairRDDFunctions
import com.marin.util.Args
import java.io.PrintWriter
import org.apache.spark.mllib.classification._
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import collection.mutable.ListBuffer
object cbirJob extends App {
val conf = new SparkConf()
.setMaster("local[28]")
.set("spark.driver.maxResultSize", "100g")
.set("spark.local.dir", "/media/kraman/disk2/tmp")
.set("spark.akka.threads", "256")
val sc = new SparkContext(conf)
val myargs = Args(args.mkString)
val dir = myargs("dir")
val mylog = new PrintWriter("log")
val classNames:Map[Int, String] = Map(
300->"bus",
400->"dino",
500->"elephant",
600->"flower",
700->"horse",
800->"nature",
900->"food")
// read all the CBIR image signatures & convert to features
val classFeatures:Map[String, ListBuffer[LabeledPoint]] = sc
.wholeTextFiles(dir) // RDD[(String, String)]
.map{ kv =>
val (filename, contents) = kv
val array:Array[String] = contents.split("\n")
val features:DenseVector = new DenseVector(array.map{_.toDouble})
val lp:LabeledPoint = new LabeledPoint(1.0d, features)
val idx = filename.indexOf(".jpg_sig.txt")
val key = (filename.slice(idx-3,idx).toInt/100)*100 // eg. turn 368 into 300
val name = classNames(key)
(name, lp)
} // RDD[String, LabeledPoint]
.aggregateByKey(collection.mutable.ListBuffer[LabeledPoint]())(
(u,v) => u.+:(v),
(u1,u2) => u1 ++ u2
) // RDD[String, ListBuffer[LabeledPoint]]
.collectAsMap.toMap
// for each class(bus, dino, elephant, flower), train binary classifier
val results = classFeatures.map{ kv =>
val (className, features) = kv
val trainTestRatio = Array(0.8,0.2)
val array = sc.makeRDD(features).randomSplit(trainTestRatio)
val (posTrain, posTest) = (array(0), array(1))
val negFeatures = sc.makeRDD(classFeatures
.keys
.filterNot(name => name==className)
.map{ negativeClass => classFeatures(negativeClass)}
.reduceLeft(_++_))
.map{ x:LabeledPoint => new LabeledPoint(0, x.features)} // flip labels
// want |negatives| == |positives|, but |negatives| > |positives| in dataset
// so make as many sample buckets as you need to
// sample bucket size must be equal to pos sample size = 80
// 80 * 7 + 40 = 600
// so need 7 buckets, plus 1 test set bucket
val negRatio = Array(80,80,80,80,80,80,80,40).map{ _/600.0 }
val negArray = negFeatures.randomSplit(negRatio)
val negTest = negArray(7) // SET TEST SET ASIDE
val testSet = posTest ++ negTest
val testSetFeatures = testSet.map{ lp => lp.features }
// NOW BUILD 1 classifier PER NEG SAMPLE BUCKET
val scores:Seq[RDD[Double]] = (0 to 6).toSeq.map{ idx =>
val negTrain = negArray(idx)
val model:SVMModel = SVMWithSGD.train(posTrain ++ negTrain, 1000)
mylog.println("SVM: " + idx + " Counts(NegTrain, NegTest, PosTrain, PosTest):" +
negTrain.count + "," + negTest.count + "," + posTrain.count + "," + posTest.count)
model.predict( testSetFeatures )
}
/* fuck this! each classifier gives you an RDD[Double] full of scores!
so you have 7 columns full of scores, like
0 1 1 ...
1 0 1
0 1 1
0 1 1
you want 1 column full of scores,
which contains a 1 if the consensus is 1, otherwise 0.
consensus on 7 = ceil(7/2) = 4
so add all columns, divide by 4. if it exceeds 1, then 1 else 0
*/
val consensus:RDD[Double] = scores.reduceLeft{ (a:RDD[Double], b:RDD[Double]) =>
a.zip(b).map{ rec => rec._1 + rec._2 }
}.map{ x=> if( x/4.0 >= 1.0) 1.0 else 0.0 }
val labels = testSet.map{ lp => lp.label }
val scoresAndLabels = consensus.zip(labels)
val metrics = new BinaryClassificationMetrics(scoresAndLabels)
mylog.println( className + "," + metrics.areaUnderPR + "," + metrics.areaUnderROC)
(className, metrics.areaUnderPR, metrics.areaUnderROC)
}.toList
sc.makeRDD(results,1).saveAsTextFile("svm classifier results"+System.currentTimeMillis)
mylog.close
}
Dataset: COREL subset ( 7 classes, 100 images per class => 7*100 = 700 jpgs )
COREL: https://sites.google.com/site/dctresearch/Home/content-based-image-retrieval
Training Test Ratio: 80-20
Equal number of true & false samples ie. train on 80 dinos & 80 random non-dinos out of 700-100 = 600 non-dinos.
So training sample size = 80 + 80 = 160
Test sample = 20 dinos + 20 non-dinos
Train 1 SVM classifier per class => 7 SVM classifiers
Hyperparam: 1000 iterations on the SGD with training rate 1.0
(Name of Classifier, AUC = Area under Precision Recall Curve, Area under ROC Curve)
(bus,0.7266666666666667,0.6215384615384616)
(dino,0.9289215686274509,0.9179566563467493)
(flower,0.9762845849802372,0.9545454545454546)
(horse,0.9277836134453781,0.8854166666666667)
(food,0.7559523809523809,0.6142857142857143)
(nature,0.843956043956044,0.7318181818181818)
(elephant,0.5694235588972432,0.5354691075514875)
Conclusions:
Easiest to classify = Flower, Dino
Hardesy to classify = Elephant
wiki for AUC: http://fastml.com/what-you-wanted-to-know-about-auc/ ( we want 1.0, we get 0.56 to 0.97 )
wiki for ROC:http://gim.unmc.edu/dxtests/roc3.htm ( we want 1.0, we get 0.53 to 0.95)
Run2: Test Set: 20 positives + 520 negatives ( negative test samples far overwhelm positives )
(bus,0.3872202166064982,0.7410112359550562)
(dino,0.6296296296296297,0.9430740037950663)
(flower,0.5792057698992006,0.8983012559862654)
(horse,0.4008389605074688,0.6706503014642549)
(food,0.24998883499915142,0.6119496855345912)
(nature,0.11913195351266649,0.5286529060293319)
(elephant,0.5030071466512145,0.650146771037182)
Run3: Use a 1:9 ratio for training. 1 positive sample for 9 negative samples!
Justification: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4061540/
Essentially, opposite of Run2.
Summary: (CLASSNAME, AUC, ROC)
(bus,0.6923076923076923,0.5)
(dino,0.9463950683462878,0.9024390243902439)
(flower,0.9299660441426146,0.881578947368421)
(horse,0.6826025459688826,0.5142857142857142)
(food,0.7222222222222222,0.5)
(nature,0.7056831267357584,0.7325162220620043)
(elephant,0.6941747572815534,0.5)
---------
Detailed Counts
CLASSNAME, AUC, ROC
---------
Counts(NegTrain, NegTest, PosTrain, PosTest):536,64,60,40
bus,0.6923076923076923,0.5
Counts(NegTrain, NegTest, PosTrain, PosTest):550,50,59,41
dino,0.9463950683462878,0.9024390243902439
Counts(NegTrain, NegTest, PosTrain, PosTest):545,55,62,38
flower,0.9299660441426146,0.881578947368421
Counts(NegTrain, NegTest, PosTrain, PosTest):534,66,65,35
horse,0.6826025459688826,0.5142857142857142
Counts(NegTrain, NegTest, PosTrain, PosTest):550,50,60,40
food,0.7222222222222222,0.5
Counts(NegTrain, NegTest, PosTrain, PosTest):527,73,62,38
nature,0.7056831267357584,0.7325162220620043
Counts(NegTrain, NegTest, PosTrain, PosTest):537,63,60,40
elephant,0.6941747572815534,0.5
Run 4. Use equal number of positive & negative training set.
But since number of negatives > number of positives,
use as many classifiers PER class as required to cover the entire negative sample training set.
// THIS VERSION TRAINS MULTIPLE CLASSIFIERS PER CLASS
// Corel has 7 classes, each class has 100 images
// WANT 80 +ve, 80-ve PER CLASSIFIER PER CLASS.
// So 600 -ve per class = 80*7 + 40. So 7 classifiers PER CLASS
// TOTAL 7 classifiers per class * 7 classes = 49 classifiers
Justification:
http://sci2s.ugr.es/keel/pdf/specific/congreso/akbani_svm_2004.pdf
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.309.904&rep=rep1&type=pdf
Summary: (CLASSNAME, AUC, ROC)
(bus,0.7278735632183908,0.7034313725490197)
(dino,0.9265931372549019,0.9295485636114911)
(flower,0.9380952380952381,0.9047619047619048)
(horse,0.7213669950738917,0.7648809523809524)
(food,0.7255244755244755,0.7071428571428572)
(nature,0.6842105263157895,0.6556390977443609)
(elephant,0.7199602780536245,0.6795665634674922)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment