Skip to content

Instantly share code, notes, and snippets.

@Stefan-Wagner
Created November 17, 2011 09:57
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Stefan-Wagner/1372818 to your computer and use it in GitHub Desktop.
Save Stefan-Wagner/1372818 to your computer and use it in GitHub Desktop.
/**
Benchcoat.scala
done:
2011-11-17 v.4: improvement in plot-code
x-axis and y-axis needs description (how many units measured, time in second)
2011-11-17 v.3: minor improvement in plot-code
2011-10-22 v.2: improvement in plot-code and renaming prettyPrint
extend Benchcoat by
providing a List of (
names,
combined with a functions, which takes some
I and returns some O
)
typical Types for I,O are List[Int] for both, or Seq[Double], but very long Strings would
make sense too, and I and O don't need to be of the same Kind (else they were I and I).
*/
abstract class Benchcoat [I, O]{
// define a name by overriding, to be used for output-files
val name : String
/**
abstract methods, need implementation in your class.
Most important: provide a List of names, combined to functions you like to compare.
*/
val list2bench: List[(String, I => O)]
// typical case: Generate a List of n random Values of type I
def iGenerator (n: Int) : I
/* typical case: list.take (n)
used to protocoll the increasing cost of increasing n
*/
def takePart (i: I, n: Int) : I
val r = util.Random
import collection.mutable.ListBuffer
var buf = ListBuffer [String] ()
def timed (xs: I) (f: I => O) = {
val a = System.currentTimeMillis
val res = f (xs)
val z = System.currentTimeMillis
val delta = z-a
val s = "" + (delta / 1000.0)
println (s)
buf += s
res
}
def main (args: Array [String]) : Unit = {
val n = args(0).toInt
val xs = iGenerator (n)
/*
With 1 Mio. as param, start it with 100 000, 200k, 300k, ... 1Mio. cases.
a) warmup
b) look, where the process gets linear to size
*/
list2bench.foreach (f => {
buf += f._1 // name
println (f._1)
(1 to 10) foreach (i => {
val res = timed (takePart (xs, n/10 * i)) (f._2)
compat.Platform.collectGarbage
});
println ()}
)
println (toPrettyString)
// Be aware, that in each run, existing files are silently overwritten.
// if uncommented in bench.plot, this will be true for benchplot.png as well.
string2File (toPrettyString, "bench.data")
string2File (gnuplotPrint (n), "bench.plot")
}
def string2File (s: String, filename: String) {
val f = new java.io.File (filename)
val out = new java.io.PrintWriter (f)
try { out.print (s) }
finally { out.close }
}
def toPrettyString : String = {
/* yes, fixed number of iterations to 10
+1 for the headlines=11 */
val width = buf.size / 11
val sb = new StringBuffer
for (row <- (0 to 10)) {
sb.append (row + " \t")
for (col <- (0 to width - 1))
sb.append (buf (col * 11 + row) + " \t")
sb.append ("\n")
}
sb.toString
}
def prettyPrint = {
/* yes, fixed number of iterations to 10
+1 for the headlines=11 */
val width = buf.size / 11
for (row <- (0 to 10)) {
print (row + " \t")
for (col <- (0 to width - 1))
print (buf (col * 11 + row) + " \t")
println ()
}
}
/**
usage: cat bench.plot | gnuplot
Be aware, that in each run, existing files are silently overwritten.
*/
def gnuplotPrint (problemsize: Int) = {
/**
transform the size to readable form:
1-999: i/10, i, ""
1000-1M: i/10k, i/1000, k
1M... i/10M, i/1M, M
for example: 4000000 => 400k 4000k
*/
var (start, size, unit) = if (problemsize > 9999999) (problemsize/(10*1000*1000), problemsize/(1000*1000), "M")
else if (problemsize > 9999) (problemsize/(10*1000), problemsize/1000, "k")
else (problemsize/10, problemsize, "")
val cmd = "# set terminal png transparent nocrop enhanced font arial 8 size 420,320\n" +
"set output '" + name + ".png'\n" +
"set terminal png\n" +
"set ylabel 'Time in s'\n" +
"set xlabel 'Problem size from " + start + unit + " to " + size + unit + "\n" +
"set style data linespoints\n" +
"set xtics border in scale 1,0.5 nomirror offset character 0, 0, 0\n" +
"set title 'performance comparision v0.3'\n" +
"plot '" + name + ".data' using 2:xtic(1) t 2\n"
val ext = for (i <- 2 to list2bench.size) yield "'' u " + (i+1) + " t " + (i+1)
cmd + ext.mkString (", ", ", ", " ") + "\n"
}
}
import annotation.tailrec
object UniqueSortedFilteredBench extends Benchcoat [Array[Int], List[Int]] {
val name = "usfBench"
def iGenerator (n: Int) = (for (x <- 1 to n) yield (r.nextInt (n))).toArray
def takePart (li: Array[Int], n: Int) = li.take (n)
// ----------
def danielCsUSF (someArray: Array[Int]): List[Int] = {
val a = scala.collection.immutable.SortedSet (someArray filter (0 !=): _*)
a.toList
}
def luigiPlingeUSF (someArray: Array[Int]): List[Int] =
collection.SortedSet (someArray.filter (_ > 0) :_*).toList
def matthewFarwellUSF (someArray: Array[Int]): List[Int] = {
val a = someArray.toSet.filter (_ > 0).toArray
java.util.Arrays.sort (a) // quicksort, mutable data structures bad :-)
a.toList
}
def rexKerrUSF (someArray: Array[Int]): List[Int] = {
java.util.Arrays.sort (someArray)
var overzero = 0
var ndiff = 0
var last = 0
var i = 0
while (i < someArray.length) {
if (someArray (i) <= 0) overzero += 1
else if (someArray (i) > last) {
last = someArray (i)
ndiff += 1
}
i += 1
}
var result = List [Int] ()
var j = 0
i = overzero
last = 0
while (i < someArray.length) {
if (someArray (i) > last) {
result = someArray (i) :: result
last = someArray (i)
j += 1
}
i += 1
}
result
}
def jedWesleySmithUSF (someArray: Array[Int]): List[Int] = {
util.Sorting.quickSort (someArray)
someArray.foldRight (List.empty [Int]) {
case (a, b) =>
if (!b.isEmpty && b(0) == a)
b
else
a :: b
}
}
def userUnknownUSF (someArray: Array[Int]): List[Int] =
someArray.toList.filter (_ > 0).sortWith (_ > _).distinct
// -----------------------
val list2bench: List [(String, Array[Int] => List[Int])] = List (
"danielCsUSF" -> danielCsUSF _
, "luigiPlingeUSF" -> luigiPlingeUSF _
, "matthewFarwellUSF"-> matthewFarwellUSF _
, "rexKerrUSF" -> rexKerrUSF _
, "jedWesleySmithUSF"-> jedWesleySmithUSF _
, "userUnknownUSF" -> userUnknownUSF _
)
}
@Stefan-Wagner
Copy link
Author

What it is - how to use?

The program is an utility to measure and compare code snippets, a microbenchmark.

You normally don't modify, but extend the abstract base class:

abstract class Benchcoat [I, O] {

Where i is the Input of your code to measure, and O the output.
Let's see an example:

object SampleBench extends Benchcoat [Array[Int], List[Int]] {

The name is arbitrary, but the I and O is important. Array of Int and List of Int.
There is a name, which is used for the file-names, which are produced:

val name = "usfBench"

The iGenerator generates i Elements (as Array of Int in this special case),
where i is the problem size:

def iGenerator (i: Int) = (for (x <- 1 to i) yield (r.nextInt (i))).toArray 

A common use case is to produce a big number of random values, and put them into a
collection. Another possibility is, to read parts from a big file as input; just returning
'i' itself can be appropriate too, for example if you have to find all primes below that
value with different algorithms to measure.

TakePart takes 10%, 20%, 30%, ... 100% of the elements, generated by the Generator. Again,
the type is the I-type, Array of Int:

def takePart (li: Array[Int], n: Int) = li.take (n) 

Now you write the code to compare. Again I and O are important:

def m1usf (someArray: Array[Int]): List[Int] = {
  val a = scala.collection.immutable.SortedSet (someArray filter (0 !=): _*)
  a.toList
}

def m2usf (someArray: Array[Int]): List[Int] =
  someArray.toList.filter (_ > 0).sortWith (_ > _).distinct

Then we need to fill our list2bench with names, which correspond with above methods,
so that we can report:

  val list2bench: List [(String, Array[Int] => List[Int])] = List (
        "m1USF" -> m1usf _
      , "m2usf" -> m2usf _
  ) 
}

After writing your implementation, you compile the scala code, and run it with an
appropriate size of elements.

scala SampleBench 400000 

Start with small values, to find out, how long it takes, to perform your tests.
Sample output:

m1usf
0.324
0.419
0.709
1.048
1.34
2.0
2.936
2.964
3.354
3.304

m2usf
0.13
0.235
0.361
0.743
0.81
0.868
0.94
1.024
1.524
1.597

0   m1usf   m2usf   
1   0.324   0.13    
2   0.419   0.235   
3   0.709   0.361   
4   1.048   0.743   
5   1.34    0.81    
6   2.0     0.868   
7   2.936   0.94    
8   2.964   1.024   
9   3.354   1.524   
10  3.304   1.597   

The first part is to observe the data generation, which may take some time.
The second part in table form is the same data, but can be put into a Spreadsheet, posted by
email and so on. Meanwhile 2 files are created, usfBench.data and usfBench.plot. The table is
identic with usfBench.data, which is read by the .plot-commands too.

You call

 cat usfBench.plot | gnuplot 

which performs some gnuplot-commands, and a third file is produced, usfBench.jpg

Here are the plot-commands:

# set terminal png transparent nocrop enhanced font arial 8 size 420,320
set output 'usfBench.png'
set terminal png
set ylabel 'Time in s'
set xlabel 'Problem size from 400k to 4000k
set style data linespoints
set xtics border in scale 1,0.5 nomirror offset character 0, 0, 0
set title 'performance comparision v0.3'
plot 'usfBench.data' using 2:xtic(1) t 2, '' u 3 t 3 

If we replace 'terminal png' with 'terminal dumb', we get this documentation
friendly result instead:

                        performance comparision v0.3
Time in s
3.5 ++-----------------------------------------------------------------++
  |                                          jedWesleySmithUSFA**A****A
  |                                                 rexKerrUSF ##B### |
3 ++                                           A*******A*            ++
  |                                          **                       |
2.5 ++                                        *                        ++
  |                                        *                          |
  |                                      **                           |
2 ++                                   *A                            ++
  |                                 ***                               |
1.5 ++                              **                         #B#######B
  |                            *A*                         ##         |
  |                        ****                          ##           |
1 ++                 ****A*                 ###B#######B#            ++
  |             *A***  ##B######B#######B###                          |
0.5 ++        ****   ####                                              ++
  A*******A*  ###B#                                                   |
  B#######B###   +       +      +       +      +       +      +       +
   0 ++------+------+-------+------+-------+------+-------+------+------++
  1       2      3       4      5       6      7       8      9      10
                     Problem size from 400k to 4000k

@Stefan-Wagner
Copy link
Author

import annotation.tailrec

object DigitsBench extends Benchcoat [List[Int], Seq[Seq[Int]]] {

type I=List[Int]
type O=Seq[Seq[Int]]

val name = "digitBench"
/**
We return a List of 4-digit random numbers here or, for bigger values, of Int.MaxValue.
*/
def iGenerator (n: Int) : I = (for (x <- 1 to n) yield random.nextInt (Int.MaxValue)).toList
// def iGenerator (n: Int) : I = (for (x <- 1 to n) yield random.nextInt (9999)).toList

def takePart (li: I, n: Int) : I = li.take (n)

// ----------
/*
Each algorithm is called by a mapping to the elementary method.
*/
def whileDigits (n: Int) = {
var l = List Int
var i = n
while (i != 0) {
l ::= i % 10
i /= 10 }
l
}
def digits1 (i: I) : O = i.map (whileDigits)

def toStringDigits (n: Int) = n.toString.map (_.asDigit).toList
def digits2 (i: I) : O = i.map (toStringDigits)

def matchDigits (n: Int): Seq [Int] = n match {
case 0 => List.empty [Int]
case _ => matchDigits (n / 10) :+ (n % 10)
}
def digits3 (i: I) : O = i.map (matchDigits)

def unfold [T1,T2](x: T1) (fn: T1 => Option [(T2, T1)]): Stream [T2] =
fn(x) match {
case None => Stream()
case Some ((result, next)) => result #:: unfold (next) (fn)
}
def unfoldDigits (n: Int) =
unfold (n) (n => if (n == 0) None else Some ((n % 10, n / 10))).toList.reverse
def digits4 (i: I) : O = i.map (unfoldDigits)

def split (n: Int) = if (n == 0) List(0) else {
(Stream.iterate (n) (/10) takeWhile ( != 0) map (_ % 10) toList) reverse
}
def digits5 (i: I) : O = i.map (split)

@tailrec
def carryDigits (n: Int, carry: List[Int]): List [Int] = if (n < 10) n :: carry else carryDigits (n/10, (n % 10) :: carry)
def digits6 (i: I) : O = i.map (n => carryDigits (n, List ()))

// -----------------------

val list2bench: List [(String, I => O)] = List (
"whileDigits" -> digits1 _
, "toStrDigits" -> digits2 _
, "matchDigits" -> digits3 _
, "unfoldDigits" -> digits4 _
, "iterateDigits" -> digits5 _
, "carryDigits" -> digits6 _
)
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment