Skip to content

Instantly share code, notes, and snippets.

@jpivarski
Last active November 4, 2016 17:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpivarski/e8b9da99152bccf70ba187cdab149563 to your computer and use it in GitHub Desktop.
Save jpivarski/e8b9da99152bccf70ba187cdab149563 to your computer and use it in GitHub Desktop.
Performance test comparison of C++ ROOT and Java ROOT
# 1. get the Java ROOT reader and compile it
git clone https://github.com/diana-hep/root4j.git
cd root4j
git checkout tags/0.1-pre2
mvn package
cd ..
# 2. get some data and make some independent (though identical) copies
wget http://opendata.cern.ch/eos/opendata/cms/Run2010B/Mu/AOD/Apr21ReReco-v1/0000/00459D48-EB70-E011-AF09-90E6BA19A252.root -O Mu_Run2010B-Apr21ReReco-v1_AOD.root
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy2.root
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy3.root
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy4.root
cp Mu_Run2010B-Apr21ReReco-v1_AOD.root copy5.root
# 3. compile and run the Java performance tests
javac -cp root4j/target/classes/ ReadOneBranch.java
java -cp `ls root4j/target/{*.jar,lib/*.jar} | tr '\n' ':'`. ReadOneBranch | tee java-results.log
# 4. compile and run the C++ performance tests
root -l readOneBranch.cxx+ | tee cxx-results.log
# 5. look at them side-by-side
diff -y -W 100 java-results.log cxx-results.log
# 6. gape because there's something wrong with that discrepancy
#include <iostream>
#include <time.h>
#include "TFile.h"
#include "TTree.h"
#include "TLeaf.h"
#include "TBranch.h"
#include "TClass.h"
#include "TBranchElement.h"
#include "TCollection.h"
#include "TSystem.h"
void runfile(const char* fileName) {
std::cout << fileName << std::endl;
double total = 0.0;
TFile *tfile = new TFile(fileName);
TTree *ttree;
tfile->GetObject("Events", ttree);
std::cout << "number of TBaskets ???" << std::endl;
// get the branch with a full path
// it's a TBranchElement because recoTracks_generalTracks__RECO.obj is a structure
// flat ntuples don't have TBranchElements, but we only need TBranchElement::GetBranchCount for structures
TBranchElement *branch = (TBranchElement*)ttree->GetBranch("recoTracks_generalTracks__RECO.obj.chi2_");
// essential!!! MakeClass mode lets us view the structure one leaf at a time.
ttree->SetMakeClass(1);
// has to be reassigned for each new TFile (we only know the maximum for *this file*)
int bufferSize = ((TLeaf*)(branch->GetListOfLeaves()->First()))->GetLeafCount()->GetMaximum();
// allocate a buffer that's just big enough
Float_t *buffer = new Float_t[bufferSize];
// have the branch fill this buffer
ttree->SetBranchAddress("recoTracks_generalTracks__RECO.obj.chi2_", buffer);
// allocating a place to put the size is also essential
Int_t size = 0;
ttree->SetBranchAddress("recoTracks_generalTracks__RECO.obj",&size);
// clock stuff
clock_t lastTime = clock();
long items = 0L;
long itemsPerPrint = 100000L;
// the loop over events
Long64_t numEvents = ttree->GetEntries();
for (Long64_t i = 0; i < numEvents; i++) {
// essential! GetEntry from the branch, not the ttree
branch->GetEntry(i);
// get the number of elements (tracks in this case)
int numTracks = branch->GetBranchCount()->GetNdata();
// and loop over them
for (int j = 0; j < numTracks; ++j) {
// getting the data does not involve any function calls
total += buffer[j];
// clock stuff
items++;
if (items % itemsPerPrint == 0) {
clock_t now = clock();
std::cout << 1.0 * (now - lastTime) / itemsPerPrint / CLOCKS_PER_SEC * 1e9 << " ns/item" << std::endl;
lastTime = now;
}
}
}
// clock and checksum results
std::cout << "check total " << total << " == 1.55104e+07 (" << (abs(total - 1.55104e+07) < 1e-6*1.55104e+07 ? "true" : "false") << ")" << std::endl;
}
void readOneBranch() {
runfile("Mu_Run2010B-Apr21ReReco-v1_AOD.root");
runfile("copy2.root");
runfile("copy3.root");
runfile("copy4.root");
runfile("copy5.root");
gSystem->Exit(0);
}
// SetMakeClass(1)
// GetLenType() * GetLenStatic() * GetLeafCount()->GetMaximum()
import hep.io.root.core.RootInput;
import hep.io.root.*;
import hep.io.root.interfaces.*;
public class ReadOneBranch {
final static long itemsPerPrint = 100000L;
static long lastTime = 0L;
static long items = 0;
private static void printout() {
long now = System.nanoTime();
System.out.println(String.format("%g ns/item", 1.0 * (now - lastTime) / itemsPerPrint));
lastTime = now;
}
public static void main(String[] args) throws java.io.IOException, RootClassNotFound {
String[] fileNames = {"Mu_Run2010B-Apr21ReReco-v1_AOD.root", "copy2.root", "copy3.root", "copy4.root", "copy5.root"};
for (String fileName : fileNames) {
System.out.println(fileName);
double total = 0.0;
RootFileReader reader = new RootFileReader(fileName);
TTree tree = (TTree)reader.get("Events");
TBranch branch = tree.getBranch("recoTracks_generalTracks__RECO.").getBranchForName("obj").getBranchForName("chi2_");
TLeaf leaf = (TLeaf)branch.getLeaves().get(0);
long[] startingEntries = branch.getBasketEntry();
System.out.println(String.format("number of TBaskets %d", startingEntries.length - 1));
lastTime = System.nanoTime();
items = 0;
for (int i = 0; i < startingEntries.length - 1; i++) {
long endEntry = startingEntries[i + 1];
// all but the last one
for (long entry = startingEntries[i]; entry < endEntry - 1; entry++) {
RootInput in = branch.setPosition(leaf, entry + 1);
long endPosition = in.getPosition();
in = branch.setPosition(leaf, entry);
while (in.getPosition() < endPosition) {
total += in.readFloat();
items += 1;
if (items % itemsPerPrint == 0) printout();
}
}
// the last one
RootInput in = branch.setPosition(leaf, endEntry - 1);
long endPosition = in.getLast();
while (in.getPosition() < endPosition) {
total += in.readFloat();
items += 1;
if (items % itemsPerPrint == 0) printout();
}
}
System.out.println(String.format("check total %g == 1.55104e+07 (%s)", total, Math.abs(total - 1.55104e+07) > 1e-12*1.55104e+07 ? "true" : "false"));
}
}
}
>
> Processing readOneBranch.cxx+...
Mu_Run2010B-Apr21ReReco-v1_AOD.root Mu_Run2010B-Apr21ReReco-v1_AOD.root
number of TBaskets 41 | number of TBaskets ???
688.906 ns/item | 69.49 ns/item
320.936 ns/item | 51.83 ns/item
1796.14 ns/item | 50.64 ns/item
247.927 ns/item | 51.85 ns/item
269.145 ns/item | 64.4 ns/item
246.458 ns/item | 51.42 ns/item
238.634 ns/item | 51.86 ns/item
193.092 ns/item | 52.54 ns/item
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true)
copy2.root copy2.root
number of TBaskets 41 | number of TBaskets ???
636.489 ns/item | 65.28 ns/item
244.687 ns/item | 53.48 ns/item
265.210 ns/item | 50.44 ns/item
244.471 ns/item | 51.34 ns/item
593.997 ns/item | 63.25 ns/item
247.644 ns/item | 51.3 ns/item
245.790 ns/item | 51.23 ns/item
247.591 ns/item | 51.99 ns/item
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true)
copy3.root copy3.root
number of TBaskets 41 | number of TBaskets ???
348.744 ns/item | 64.75 ns/item
183.472 ns/item | 52.26 ns/item
183.461 ns/item | 50.96 ns/item
181.422 ns/item | 51.65 ns/item
194.688 ns/item | 62.68 ns/item
175.473 ns/item | 50.87 ns/item
185.986 ns/item | 51.31 ns/item
176.463 ns/item | 52.11 ns/item
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true)
copy4.root copy4.root
number of TBaskets 41 | number of TBaskets ???
499.383 ns/item | 64.82 ns/item
341.218 ns/item | 52.15 ns/item
346.253 ns/item | 50.94 ns/item
354.199 ns/item | 51.4 ns/item
236.469 ns/item | 61.88 ns/item
127.569 ns/item | 51.55 ns/item
127.590 ns/item | 51.54 ns/item
129.381 ns/item | 52.24 ns/item
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true)
copy5.root copy5.root
number of TBaskets 41 | number of TBaskets ???
413.388 ns/item | 64.8 ns/item
179.847 ns/item | 52.09 ns/item
185.854 ns/item | 51.96 ns/item
177.821 ns/item | 53.35 ns/item
192.151 ns/item | 62.77 ns/item
168.369 ns/item | 50.85 ns/item
128.449 ns/item | 51.09 ns/item
134.405 ns/item | 52.15 ns/item
check total 1.55104e+07 == 1.55104e+07 (true) check total 1.55104e+07 == 1.55104e+07 (true)
@jpivarski
Copy link
Author

jpivarski commented Nov 3, 2016

Ignoring the Java startup phase (training HotSpot optimizations), the Java time is 130-200 ns/item and C++ is 80 ns/item.

Compression on this branch is 1.09, so Java is 15-25 MB/second and C++ is 40 MB/second.

Java is 1.6 to 2 times slower than C++. That makes sense; typical of Java/C++ performance on the benchmarks game.

@jpivarski
Copy link
Author

Updated with some more tweaks and now C++ is 50-60 ns/item, or 60-70 MB/second! I've heard that the theoretical limit (due to gzip) was 45 MB/second, so this is getting hard to explain. Nevertheless, I don't see any errors in my timing code.

@jpivarski
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment