agarciamontoro/0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch

## 0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch
From 16daa489ab290265379e5ff44c9ceb871367b233 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Garc=C3=ADa=20Montoro?=
 <alejandro.garciamontoro@gmail.com>
Date: Tue, 28 Mar 2017 01:53:45 +0200
Subject: [PATCH] Implements TMath::Gaus_v, vectorized version of TMath::Gaus
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.12.1"

This is a multi-part message in MIME format.
--------------2.12.1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit

---
 math/mathcore/CMakeLists.txt             |  5 +-
 math/mathcore/inc/TMath.h                |  5 +-
 math/mathcore/inc/TMath_VecTest.h        | 43 +++++++++++++++
 math/mathcore/src/TMath.cxx              | 60 +++++++++++++++++++++
 math/mathcore/test/vectorizationTest.cxx | 90 ++++++++++++++++++++++++++++++++
 5 files changed, 197 insertions(+), 6 deletions(-)
 create mode 100644 math/mathcore/inc/TMath_VecTest.h
 create mode 100644 math/mathcore/test/vectorizationTest.cxx


--------------2.12.1
Content-Type: text/x-patch; name="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"

diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
index 8bcbd1f760..df980d89af 100644
--- a/math/mathcore/CMakeLists.txt
+++ b/math/mathcore/CMakeLists.txt
@@ -17,6 +17,7 @@ set(MATHCORE_HEADERS TRandom.h
   Math/ChebyshevPol.h Math/KDTree.h Math/TDataPoint.h Math/TDataPointN.h Math/Delaunay2D.h
   Math/Random.h Math/TRandomEngine.h Math/RandomFunctions.h Math/StdEngine.h
   Math/MersenneTwisterEngine.h Math/MixMaxEngine.h   TRandomGen.h Math/LCGEngine.h
+  TMath_VecTest.h
 )

 ROOT_GENERATE_DICTIONARY(G__MathCore   TComplex.h TMath.h ${MATHCORE_HEADERS} Fit/*.h MODULE MathCore LINKDEF LinkDef.h OPTIONS "-writeEmptyRootPCM")
@@ -33,7 +34,3 @@ ROOT_LINKER_LIBRARY(MathCore *.cxx *.c G__MathCore.cxx LIBRARIES ${CMAKE_THREAD_
 ROOT_INSTALL_HEADERS()

 ROOT_ADD_TEST_SUBDIRECTORY(test)
-
-
-
-
diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h
index 5a6a365504..b9aea8db83 100644
--- a/math/mathcore/inc/TMath.h
+++ b/math/mathcore/inc/TMath.h
@@ -35,6 +35,7 @@
 #include <limits>
 #include <cmath>

+
 namespace TMath {

    /* ************************* */
@@ -600,7 +601,7 @@ namespace detailsForFastMath {
    inline int IsNaN(double x)
    {
       UInt_t hx, lx;
-
+
       EXTRACT_WORDS(hx, lx, x);

       lx |= hx & 0xfffff;
@@ -888,7 +889,7 @@ Double_t TMath::RMS(Iterator first, Iterator last, WeightIterator w)
    while ( first != last ) {
       Double_t x = Double_t(*first);
       sumw += *w;
-      sumw2 += (*w) * (*w);
+      sumw2 += (*w) * (*w);
       tot += (*w) * (x - mean)*(x - mean);
       ++first;
       ++w;
diff --git a/math/mathcore/inc/TMath_VecTest.h b/math/mathcore/inc/TMath_VecTest.h
new file mode 100644
index 0000000000..dba0dc0109
--- /dev/null
+++ b/math/mathcore/inc/TMath_VecTest.h
@@ -0,0 +1,43 @@
+// @(#)root/mathcore:$Id$
+// Authors: Alejandro García Montoro
+
+/*************************************************************************
+ * Copyright (C) 2017, Alejandro García Montoro.                         *
+ * All rights reserved.                                                  *
+ *                                                                       *
+ * For the licensing terms see $ROOTSYS/LICENSE.                         *
+ * For the list of contributors see $ROOTSYS/README/CREDITS.             *
+ *************************************************************************/
+
+#ifndef ROOT_TMath_VecTest
+#define ROOT_TMath_VecTest
+
+
+//////////////////////////////////////////////////////////////////////////
+//                                                                      //
+// TMath                                                                //
+//                                                                      //
+// Encapsulate most frequently used Math functions.                     //
+// NB. The basic functions Min, Max, Abs and Sign are defined           //
+// in TMathBase.                                                        //
+//                                                                      //
+//////////////////////////////////////////////////////////////////////////
+#ifndef ROOT_Rtypes
+#include "Rtypes.h"
+#endif
+#ifndef ROOT_TMathBase
+#include "TMathBase.h"
+#endif
+
+#include "TError.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+
+#include "Math/Math_vectypes.hxx"
+
+namespace TMath{
+    Double_v Gaus_v(Double_v x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE);
+}
+
+#endif // ROOT_TMath_VecTest
diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx
index 942bb55637..11312ca758 100644
--- a/math/mathcore/src/TMath.cxx
+++ b/math/mathcore/src/TMath.cxx
@@ -18,6 +18,7 @@
 //////////////////////////////////////////////////////////////////////////

 #include "TMath.h"
+#include "TMath_VecTest.h"
 #include "TError.h"
 #include <math.h>
 #include <string.h>
@@ -461,6 +462,65 @@ Double_t TMath::Gaus(Double_t x, Double_t mean, Double_t sigma, Bool_t norm)
 }

 ////////////////////////////////////////////////////////////////////////////////
+/// Calculate a gaussian function with mean and sigma.
+/// If norm=kTRUE (default is kFALSE) the result is divided
+/// by sqrt(2*Pi)*sigma.
+
+Double_v TMath::Gaus_v(Double_v x, Double_t mean, Double_t sigma, Bool_t norm)
+{
+   if (sigma == 0)
+      return 1.e30;
+
+   Double_v arg = (x-mean)/sigma;
+
+   // for |arg| > 39  result is zero in double precision
+   vecCore::Mask_v<Double_v> mask = !(arg < -39.0 || arg > 39.0);
+
+   // Initialize the result to 0.0
+   Double_v res(0.0);
+
+   // Compute the function only when the arg meets the criteria, using the mask computed before
+   vecCore::MaskedAssign<Double_v>(res, mask, vecCore::math::Exp<Double_v>(-0.5 * arg * arg));
+
+   if (!norm)
+      return res;
+
+   return res/(2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+/// Calculate a gaussian function with mean and sigma.
+/// If norm=kTRUE (default is kFALSE) the result is divided
+/// by sqrt(2*Pi)*sigma.
+
+// Double_v TMath::Gaus_v_v(Double_v x, Double_v mean, Double_v sigma, Mask<Double_v> norm)
+// {
+//    vecCore::Mask_v<Double_v> null_sigma = (sigma == 0);
+//
+//    Double_v res = 0.0;
+//    res(null_sigma) = 1.e30;
+//
+//    if(!null_sigma.all_of())
+//    {
+//       Double_v arg;
+//
+//       arg(!null_sigma) = (x-mean)/sigma;
+//
+//       // for |arg| > 39  result is zero in double precision
+//       vecCore::Mask_v<Double_v> good_arg = !(arg < -39.0 || arg > 39.0);
+//
+//       res( !null_sigma && good_arg ) = vecCore::math::Exp(-0.5 * arg * arg);
+//
+//       if (norm)
+//          res(!null_sigma) /= (2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024
+//    }
+//
+//    return res;
+// }
+
+
+////////////////////////////////////////////////////////////////////////////////
 /// The LANDAU function.
 /// mu is a location parameter and correspond approximately to the most probable value
 /// and sigma is a scale parameter (not the sigma of the full distribution which is not defined)
diff --git a/math/mathcore/test/vectorizationTest.cxx b/math/mathcore/test/vectorizationTest.cxx
new file mode 100644
index 0000000000..e7d70b7771
--- /dev/null
+++ b/math/mathcore/test/vectorizationTest.cxx
@@ -0,0 +1,90 @@
+#include "Math/Random.h"
+
+#include "TRandom1.h"
+#include "TStopwatch.h"
+#include "TMath_VecTest.h"
+
+#include <iostream>
+#include <random>
+
+// using namespace ROOT::Math;
+using namespace ROOT;
+using namespace std;
+
+// TStopwatch w; w.S
+
+int main(){
+    // Test and vectorization size
+    const int numRepetitions = 1000;
+    const int inputSize = 100000;
+    const int vecSize = vecCore::VectorSize<Double_v>();
+
+    // Vectorized and linear input
+    Double_v vectorInput[inputSize];
+    Double_t linearInput[inputSize * vecSize];
+
+    // Vectorized and linear output
+    Double_v vectorOutput[inputSize];
+    Double_t linearOutput[inputSize * vecSize];
+
+    // Parameters vector
+    Double_t mean[inputSize];
+    Double_t sigma[inputSize];
+
+    // Randomize input data and parameters
+    TRandom1 rndmzr;
+    rndmzr.RndmArray(inputSize * vecSize, linearInput);
+    rndmzr.RndmArray(inputSize, mean);
+    rndmzr.RndmArray(inputSize, sigma);
+
+    // Set -100 < mean < 100 and 0 < sigma < 200
+    for (size_t i = 0; i < inputSize; i++) {
+        mean[i] = mean[i] * 200 - 100;
+        sigma[i] *= 200;
+    }
+
+    // Copy input linear data to the vectorized array
+    for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+        for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
+            vectorInput[caseIdx][vecIdx] = linearInput[vecSize * caseIdx + vecIdx];
+        }
+    }
+
+    // Clocks to measure (l)inear and (v)ectorized performance
+    TStopwatch clock_l, clock_v;
+
+    // Vectorized computation
+    clock_v.Start();
+    for (size_t _ = 0; _ < numRepetitions; _++) {
+        for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+            vectorOutput[caseIdx] = TMath::Gaus_v(vectorInput[caseIdx], mean[caseIdx], sigma[caseIdx]);
+        }
+    }
+    clock_v.Stop();
+
+    // Linear computation
+    int idx;
+    clock_l.Start();
+    for (size_t _ = 0; _ < numRepetitions; _++) {
+    for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+            for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
+                idx = caseIdx * vecSize + vecIdx;
+                linearOutput[idx] = TMath::Gaus(linearInput[idx], mean[caseIdx], sigma[caseIdx]);
+            }
+        }
+    }
+    clock_l.Stop();
+
+    // Check
+    for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+        for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
+            Double_t diff = TMath::Abs(linearOutput[caseIdx*vecSize + vecIdx] - vectorOutput[caseIdx][vecIdx]);
+            if(diff > 1e-15)
+                std::cout << diff << std::endl;
+        }
+    }
+
+    cout << "Linear:     "; clock_l.Print();
+    cout << "Vectorized: "; clock_v.Print();
+    cout << "SPEEDUP:    x" << clock_l.RealTime() / clock_v.RealTime() << endl;
+}

--------------2.12.1--
	From 16daa489ab290265379e5ff44c9ceb871367b233 Mon Sep 17 00:00:00 2001
	From: =?UTF-8?q?Alejandro=20Garc=C3=ADa=20Montoro?=
	<alejandro.garciamontoro@gmail.com>
	Date: Tue, 28 Mar 2017 01:53:45 +0200
	Subject: [PATCH] Implements TMath::Gaus_v, vectorized version of TMath::Gaus
	MIME-Version: 1.0
	Content-Type: multipart/mixed; boundary="------------2.12.1"

	This is a multi-part message in MIME format.
	--------------2.12.1
	Content-Type: text/plain; charset=UTF-8; format=fixed
	Content-Transfer-Encoding: 8bit

	---
	math/mathcore/CMakeLists.txt \| 5 +-
	math/mathcore/inc/TMath.h \| 5 +-
	math/mathcore/inc/TMath_VecTest.h \| 43 +++++++++++++++
	math/mathcore/src/TMath.cxx \| 60 +++++++++++++++++++++
	math/mathcore/test/vectorizationTest.cxx \| 90 ++++++++++++++++++++++++++++++++
	5 files changed, 197 insertions(+), 6 deletions(-)
	create mode 100644 math/mathcore/inc/TMath_VecTest.h
	create mode 100644 math/mathcore/test/vectorizationTest.cxx


	--------------2.12.1
	Content-Type: text/x-patch; name="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"
	Content-Transfer-Encoding: 8bit
	Content-Disposition: attachment; filename="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"

	diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
	index 8bcbd1f760..df980d89af 100644
	--- a/math/mathcore/CMakeLists.txt
	+++ b/math/mathcore/CMakeLists.txt
	@@ -17,6 +17,7 @@ set(MATHCORE_HEADERS TRandom.h
	Math/ChebyshevPol.h Math/KDTree.h Math/TDataPoint.h Math/TDataPointN.h Math/Delaunay2D.h
	Math/Random.h Math/TRandomEngine.h Math/RandomFunctions.h Math/StdEngine.h
	Math/MersenneTwisterEngine.h Math/MixMaxEngine.h TRandomGen.h Math/LCGEngine.h
	+ TMath_VecTest.h
	)

	ROOT_GENERATE_DICTIONARY(G__MathCore TComplex.h TMath.h ${MATHCORE_HEADERS} Fit/*.h MODULE MathCore LINKDEF LinkDef.h OPTIONS "-writeEmptyRootPCM")
	@@ -33,7 +34,3 @@ ROOT_LINKER_LIBRARY(MathCore .cxx .c G__MathCore.cxx LIBRARIES ${CMAKE_THREAD_
	ROOT_INSTALL_HEADERS()

	ROOT_ADD_TEST_SUBDIRECTORY(test)
	-
	-
	-
	-
	diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h
	index 5a6a365504..b9aea8db83 100644
	--- a/math/mathcore/inc/TMath.h
	+++ b/math/mathcore/inc/TMath.h
	@@ -35,6 +35,7 @@
	#include <limits>
	#include <cmath>

	+
	namespace TMath {

	/* ************************* */
	@@ -600,7 +601,7 @@ namespace detailsForFastMath {
	inline int IsNaN(double x)
	{
	UInt_t hx, lx;
	-
	+
	EXTRACT_WORDS(hx, lx, x);

	lx \|= hx & 0xfffff;
	@@ -888,7 +889,7 @@ Double_t TMath::RMS(Iterator first, Iterator last, WeightIterator w)
	while ( first != last ) {
	Double_t x = Double_t(*first);
	sumw += *w;
	- sumw2 += (w) (*w);
	+ sumw2 += (w) (*w);
	tot += (w) (x - mean)*(x - mean);
	++first;
	++w;
	diff --git a/math/mathcore/inc/TMath_VecTest.h b/math/mathcore/inc/TMath_VecTest.h
	new file mode 100644
	index 0000000000..dba0dc0109
	--- /dev/null
	+++ b/math/mathcore/inc/TMath_VecTest.h
	@@ -0,0 +1,43 @@
	+// @(#)root/mathcore:$Id$
	+// Authors: Alejandro García Montoro
	+
	+/*************************************************************************
	+ * Copyright (C) 2017, Alejandro García Montoro. *
	+ * All rights reserved. *
	+ * *
	+ * For the licensing terms see $ROOTSYS/LICENSE. *
	+ * For the list of contributors see $ROOTSYS/README/CREDITS. *
	+ *************************************************************************/
	+
	+#ifndef ROOT_TMath_VecTest
	+#define ROOT_TMath_VecTest
	+
	+
	+//////////////////////////////////////////////////////////////////////////
	+// //
	+// TMath //
	+// //
	+// Encapsulate most frequently used Math functions. //
	+// NB. The basic functions Min, Max, Abs and Sign are defined //
	+// in TMathBase. //
	+// //
	+//////////////////////////////////////////////////////////////////////////
	+#ifndef ROOT_Rtypes
	+#include "Rtypes.h"
	+#endif
	+#ifndef ROOT_TMathBase
	+#include "TMathBase.h"
	+#endif
	+
	+#include "TError.h"
	+#include <algorithm>
	+#include <limits>
	+#include <cmath>
	+
	+#include "Math/Math_vectypes.hxx"
	+
	+namespace TMath{
	+ Double_v Gaus_v(Double_v x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE);
	+}
	+
	+#endif // ROOT_TMath_VecTest
	diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx
	index 942bb55637..11312ca758 100644
	--- a/math/mathcore/src/TMath.cxx
	+++ b/math/mathcore/src/TMath.cxx
	@@ -18,6 +18,7 @@
	//////////////////////////////////////////////////////////////////////////

	#include "TMath.h"
	+#include "TMath_VecTest.h"
	#include "TError.h"
	#include <math.h>
	#include <string.h>
	@@ -461,6 +462,65 @@ Double_t TMath::Gaus(Double_t x, Double_t mean, Double_t sigma, Bool_t norm)
	}

	////////////////////////////////////////////////////////////////////////////////
	+/// Calculate a gaussian function with mean and sigma.
	+/// If norm=kTRUE (default is kFALSE) the result is divided
	+/// by sqrt(2Pi)sigma.
	+
	+Double_v TMath::Gaus_v(Double_v x, Double_t mean, Double_t sigma, Bool_t norm)
	+{
	+ if (sigma == 0)
	+ return 1.e30;
	+
	+ Double_v arg = (x-mean)/sigma;
	+
	+ // for \|arg\| > 39 result is zero in double precision
	+ vecCore::Mask_v<Double_v> mask = !(arg < -39.0 \|\| arg > 39.0);
	+
	+ // Initialize the result to 0.0
	+ Double_v res(0.0);
	+
	+ // Compute the function only when the arg meets the criteria, using the mask computed before
	+ vecCore::MaskedAssign<Double_v>(res, mask, vecCore::math::Exp<Double_v>(-0.5 * arg * arg));
	+
	+ if (!norm)
	+ return res;
	+
	+ return res/(2.50662827463100024sigma); //sqrt(2Pi)=2.50662827463100024
	+}
	+
	+
	+////////////////////////////////////////////////////////////////////////////////
	+/// Calculate a gaussian function with mean and sigma.
	+/// If norm=kTRUE (default is kFALSE) the result is divided
	+/// by sqrt(2Pi)sigma.
	+
	+// Double_v TMath::Gaus_v_v(Double_v x, Double_v mean, Double_v sigma, Mask<Double_v> norm)
	+// {
	+// vecCore::Mask_v<Double_v> null_sigma = (sigma == 0);
	+//
	+// Double_v res = 0.0;
	+// res(null_sigma) = 1.e30;
	+//
	+// if(!null_sigma.all_of())
	+// {
	+// Double_v arg;
	+//
	+// arg(!null_sigma) = (x-mean)/sigma;
	+//
	+// // for \|arg\| > 39 result is zero in double precision
	+// vecCore::Mask_v<Double_v> good_arg = !(arg < -39.0 \|\| arg > 39.0);
	+//
	+// res( !null_sigma && good_arg ) = vecCore::math::Exp(-0.5 * arg * arg);
	+//
	+// if (norm)
	+// res(!null_sigma) /= (2.50662827463100024sigma); //sqrt(2Pi)=2.50662827463100024
	+// }
	+//
	+// return res;
	+// }
	+
	+
	+////////////////////////////////////////////////////////////////////////////////
	/// The LANDAU function.
	/// mu is a location parameter and correspond approximately to the most probable value
	/// and sigma is a scale parameter (not the sigma of the full distribution which is not defined)
	diff --git a/math/mathcore/test/vectorizationTest.cxx b/math/mathcore/test/vectorizationTest.cxx
	new file mode 100644
	index 0000000000..e7d70b7771
	--- /dev/null
	+++ b/math/mathcore/test/vectorizationTest.cxx
	@@ -0,0 +1,90 @@
	+#include "Math/Random.h"
	+
	+#include "TRandom1.h"
	+#include "TStopwatch.h"
	+#include "TMath_VecTest.h"
	+
	+#include <iostream>
	+#include <random>
	+
	+// using namespace ROOT::Math;
	+using namespace ROOT;
	+using namespace std;
	+
	+// TStopwatch w; w.S
	+
	+int main(){
	+ // Test and vectorization size
	+ const int numRepetitions = 1000;
	+ const int inputSize = 100000;
	+ const int vecSize = vecCore::VectorSize<Double_v>();
	+
	+ // Vectorized and linear input
	+ Double_v vectorInput[inputSize];
	+ Double_t linearInput[inputSize * vecSize];
	+
	+ // Vectorized and linear output
	+ Double_v vectorOutput[inputSize];
	+ Double_t linearOutput[inputSize * vecSize];
	+
	+ // Parameters vector
	+ Double_t mean[inputSize];
	+ Double_t sigma[inputSize];
	+
	+ // Randomize input data and parameters
	+ TRandom1 rndmzr;
	+ rndmzr.RndmArray(inputSize * vecSize, linearInput);
	+ rndmzr.RndmArray(inputSize, mean);
	+ rndmzr.RndmArray(inputSize, sigma);
	+
	+ // Set -100 < mean < 100 and 0 < sigma < 200
	+ for (size_t i = 0; i < inputSize; i++) {
	+ mean[i] = mean[i] * 200 - 100;
	+ sigma[i] *= 200;
	+ }
	+
	+ // Copy input linear data to the vectorized array
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
	+ vectorInput[caseIdx][vecIdx] = linearInput[vecSize * caseIdx + vecIdx];
	+ }
	+ }
	+
	+ // Clocks to measure (l)inear and (v)ectorized performance
	+ TStopwatch clock_l, clock_v;
	+
	+ // Vectorized computation
	+ clock_v.Start();
	+ for (size_t _ = 0; _ < numRepetitions; _++) {
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ vectorOutput[caseIdx] = TMath::Gaus_v(vectorInput[caseIdx], mean[caseIdx], sigma[caseIdx]);
	+ }
	+ }
	+ clock_v.Stop();
	+
	+ // Linear computation
	+ int idx;
	+ clock_l.Start();
	+ for (size_t _ = 0; _ < numRepetitions; _++) {
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
	+ idx = caseIdx * vecSize + vecIdx;
	+ linearOutput[idx] = TMath::Gaus(linearInput[idx], mean[caseIdx], sigma[caseIdx]);
	+ }
	+ }
	+ }
	+ clock_l.Stop();
	+
	+ // Check
	+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
	+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
	+ Double_t diff = TMath::Abs(linearOutput[caseIdx*vecSize + vecIdx] - vectorOutput[caseIdx][vecIdx]);
	+ if(diff > 1e-15)
	+ std::cout << diff << std::endl;
	+ }
	+ }
	+
	+ cout << "Linear: "; clock_l.Print();
	+ cout << "Vectorized: "; clock_v.Print();
	+ cout << "SPEEDUP: x" << clock_l.RealTime() / clock_v.RealTime() << endl;
	+}

	--------------2.12.1--