Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save agarciamontoro/c5a302a9d2d17acf89946006be5aa73b to your computer and use it in GitHub Desktop.
Save agarciamontoro/c5a302a9d2d17acf89946006be5aa73b to your computer and use it in GitHub Desktop.
Implementation of a vectorized version of TMath::Gaus
From 16daa489ab290265379e5ff44c9ceb871367b233 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Garc=C3=ADa=20Montoro?=
<alejandro.garciamontoro@gmail.com>
Date: Tue, 28 Mar 2017 01:53:45 +0200
Subject: [PATCH] Implements TMath::Gaus_v, vectorized version of TMath::Gaus
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.12.1"
This is a multi-part message in MIME format.
--------------2.12.1
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit
---
math/mathcore/CMakeLists.txt | 5 +-
math/mathcore/inc/TMath.h | 5 +-
math/mathcore/inc/TMath_VecTest.h | 43 +++++++++++++++
math/mathcore/src/TMath.cxx | 60 +++++++++++++++++++++
math/mathcore/test/vectorizationTest.cxx | 90 ++++++++++++++++++++++++++++++++
5 files changed, 197 insertions(+), 6 deletions(-)
create mode 100644 math/mathcore/inc/TMath_VecTest.h
create mode 100644 math/mathcore/test/vectorizationTest.cxx
--------------2.12.1
Content-Type: text/x-patch; name="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: attachment; filename="0001-Implements-TMath-Gaus_v-vectorized-version-of-TMath-.patch"
diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt
index 8bcbd1f760..df980d89af 100644
--- a/math/mathcore/CMakeLists.txt
+++ b/math/mathcore/CMakeLists.txt
@@ -17,6 +17,7 @@ set(MATHCORE_HEADERS TRandom.h
Math/ChebyshevPol.h Math/KDTree.h Math/TDataPoint.h Math/TDataPointN.h Math/Delaunay2D.h
Math/Random.h Math/TRandomEngine.h Math/RandomFunctions.h Math/StdEngine.h
Math/MersenneTwisterEngine.h Math/MixMaxEngine.h TRandomGen.h Math/LCGEngine.h
+ TMath_VecTest.h
)
ROOT_GENERATE_DICTIONARY(G__MathCore TComplex.h TMath.h ${MATHCORE_HEADERS} Fit/*.h MODULE MathCore LINKDEF LinkDef.h OPTIONS "-writeEmptyRootPCM")
@@ -33,7 +34,3 @@ ROOT_LINKER_LIBRARY(MathCore *.cxx *.c G__MathCore.cxx LIBRARIES ${CMAKE_THREAD_
ROOT_INSTALL_HEADERS()
ROOT_ADD_TEST_SUBDIRECTORY(test)
-
-
-
-
diff --git a/math/mathcore/inc/TMath.h b/math/mathcore/inc/TMath.h
index 5a6a365504..b9aea8db83 100644
--- a/math/mathcore/inc/TMath.h
+++ b/math/mathcore/inc/TMath.h
@@ -35,6 +35,7 @@
#include <limits>
#include <cmath>
+
namespace TMath {
/* ************************* */
@@ -600,7 +601,7 @@ namespace detailsForFastMath {
inline int IsNaN(double x)
{
UInt_t hx, lx;
-
+
EXTRACT_WORDS(hx, lx, x);
lx |= hx & 0xfffff;
@@ -888,7 +889,7 @@ Double_t TMath::RMS(Iterator first, Iterator last, WeightIterator w)
while ( first != last ) {
Double_t x = Double_t(*first);
sumw += *w;
- sumw2 += (*w) * (*w);
+ sumw2 += (*w) * (*w);
tot += (*w) * (x - mean)*(x - mean);
++first;
++w;
diff --git a/math/mathcore/inc/TMath_VecTest.h b/math/mathcore/inc/TMath_VecTest.h
new file mode 100644
index 0000000000..dba0dc0109
--- /dev/null
+++ b/math/mathcore/inc/TMath_VecTest.h
@@ -0,0 +1,43 @@
+// @(#)root/mathcore:$Id$
+// Authors: Alejandro García Montoro
+
+/*************************************************************************
+ * Copyright (C) 2017, Alejandro García Montoro. *
+ * All rights reserved. *
+ * *
+ * For the licensing terms see $ROOTSYS/LICENSE. *
+ * For the list of contributors see $ROOTSYS/README/CREDITS. *
+ *************************************************************************/
+
+#ifndef ROOT_TMath_VecTest
+#define ROOT_TMath_VecTest
+
+
+//////////////////////////////////////////////////////////////////////////
+// //
+// TMath //
+// //
+// Encapsulate most frequently used Math functions. //
+// NB. The basic functions Min, Max, Abs and Sign are defined //
+// in TMathBase. //
+// //
+//////////////////////////////////////////////////////////////////////////
+#ifndef ROOT_Rtypes
+#include "Rtypes.h"
+#endif
+#ifndef ROOT_TMathBase
+#include "TMathBase.h"
+#endif
+
+#include "TError.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+
+#include "Math/Math_vectypes.hxx"
+
+namespace TMath{
+ Double_v Gaus_v(Double_v x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE);
+}
+
+#endif // ROOT_TMath_VecTest
diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx
index 942bb55637..11312ca758 100644
--- a/math/mathcore/src/TMath.cxx
+++ b/math/mathcore/src/TMath.cxx
@@ -18,6 +18,7 @@
//////////////////////////////////////////////////////////////////////////
#include "TMath.h"
+#include "TMath_VecTest.h"
#include "TError.h"
#include <math.h>
#include <string.h>
@@ -461,6 +462,65 @@ Double_t TMath::Gaus(Double_t x, Double_t mean, Double_t sigma, Bool_t norm)
}
////////////////////////////////////////////////////////////////////////////////
+/// Calculate a gaussian function with mean and sigma.
+/// If norm=kTRUE (default is kFALSE) the result is divided
+/// by sqrt(2*Pi)*sigma.
+
+Double_v TMath::Gaus_v(Double_v x, Double_t mean, Double_t sigma, Bool_t norm)
+{
+ if (sigma == 0)
+ return 1.e30;
+
+ Double_v arg = (x-mean)/sigma;
+
+ // for |arg| > 39 result is zero in double precision
+ vecCore::Mask_v<Double_v> mask = !(arg < -39.0 || arg > 39.0);
+
+ // Initialize the result to 0.0
+ Double_v res(0.0);
+
+ // Compute the function only when the arg meets the criteria, using the mask computed before
+ vecCore::MaskedAssign<Double_v>(res, mask, vecCore::math::Exp<Double_v>(-0.5 * arg * arg));
+
+ if (!norm)
+ return res;
+
+ return res/(2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+/// Calculate a gaussian function with mean and sigma.
+/// If norm=kTRUE (default is kFALSE) the result is divided
+/// by sqrt(2*Pi)*sigma.
+
+// Double_v TMath::Gaus_v_v(Double_v x, Double_v mean, Double_v sigma, Mask<Double_v> norm)
+// {
+// vecCore::Mask_v<Double_v> null_sigma = (sigma == 0);
+//
+// Double_v res = 0.0;
+// res(null_sigma) = 1.e30;
+//
+// if(!null_sigma.all_of())
+// {
+// Double_v arg;
+//
+// arg(!null_sigma) = (x-mean)/sigma;
+//
+// // for |arg| > 39 result is zero in double precision
+// vecCore::Mask_v<Double_v> good_arg = !(arg < -39.0 || arg > 39.0);
+//
+// res( !null_sigma && good_arg ) = vecCore::math::Exp(-0.5 * arg * arg);
+//
+// if (norm)
+// res(!null_sigma) /= (2.50662827463100024*sigma); //sqrt(2*Pi)=2.50662827463100024
+// }
+//
+// return res;
+// }
+
+
+////////////////////////////////////////////////////////////////////////////////
/// The LANDAU function.
/// mu is a location parameter and correspond approximately to the most probable value
/// and sigma is a scale parameter (not the sigma of the full distribution which is not defined)
diff --git a/math/mathcore/test/vectorizationTest.cxx b/math/mathcore/test/vectorizationTest.cxx
new file mode 100644
index 0000000000..e7d70b7771
--- /dev/null
+++ b/math/mathcore/test/vectorizationTest.cxx
@@ -0,0 +1,90 @@
+#include "Math/Random.h"
+
+#include "TRandom1.h"
+#include "TStopwatch.h"
+#include "TMath_VecTest.h"
+
+#include <iostream>
+#include <random>
+
+// using namespace ROOT::Math;
+using namespace ROOT;
+using namespace std;
+
+// TStopwatch w; w.S
+
+int main(){
+ // Test and vectorization size
+ const int numRepetitions = 1000;
+ const int inputSize = 100000;
+ const int vecSize = vecCore::VectorSize<Double_v>();
+
+ // Vectorized and linear input
+ Double_v vectorInput[inputSize];
+ Double_t linearInput[inputSize * vecSize];
+
+ // Vectorized and linear output
+ Double_v vectorOutput[inputSize];
+ Double_t linearOutput[inputSize * vecSize];
+
+ // Parameters vector
+ Double_t mean[inputSize];
+ Double_t sigma[inputSize];
+
+ // Randomize input data and parameters
+ TRandom1 rndmzr;
+ rndmzr.RndmArray(inputSize * vecSize, linearInput);
+ rndmzr.RndmArray(inputSize, mean);
+ rndmzr.RndmArray(inputSize, sigma);
+
+ // Set -100 < mean < 100 and 0 < sigma < 200
+ for (size_t i = 0; i < inputSize; i++) {
+ mean[i] = mean[i] * 200 - 100;
+ sigma[i] *= 200;
+ }
+
+ // Copy input linear data to the vectorized array
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
+ vectorInput[caseIdx][vecIdx] = linearInput[vecSize * caseIdx + vecIdx];
+ }
+ }
+
+ // Clocks to measure (l)inear and (v)ectorized performance
+ TStopwatch clock_l, clock_v;
+
+ // Vectorized computation
+ clock_v.Start();
+ for (size_t _ = 0; _ < numRepetitions; _++) {
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+ vectorOutput[caseIdx] = TMath::Gaus_v(vectorInput[caseIdx], mean[caseIdx], sigma[caseIdx]);
+ }
+ }
+ clock_v.Stop();
+
+ // Linear computation
+ int idx;
+ clock_l.Start();
+ for (size_t _ = 0; _ < numRepetitions; _++) {
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
+ idx = caseIdx * vecSize + vecIdx;
+ linearOutput[idx] = TMath::Gaus(linearInput[idx], mean[caseIdx], sigma[caseIdx]);
+ }
+ }
+ }
+ clock_l.Stop();
+
+ // Check
+ for (size_t caseIdx = 0; caseIdx < inputSize; caseIdx++) {
+ for (size_t vecIdx = 0; vecIdx < vecSize; vecIdx++) {
+ Double_t diff = TMath::Abs(linearOutput[caseIdx*vecSize + vecIdx] - vectorOutput[caseIdx][vecIdx]);
+ if(diff > 1e-15)
+ std::cout << diff << std::endl;
+ }
+ }
+
+ cout << "Linear: "; clock_l.Print();
+ cout << "Vectorized: "; clock_v.Print();
+ cout << "SPEEDUP: x" << clock_l.RealTime() / clock_v.RealTime() << endl;
+}
--------------2.12.1--
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment