Skip to content

Instantly share code, notes, and snippets.

@nesteruk
Last active June 7, 2020 15:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nesteruk/ebde71e9550f90ab725cb26dcab01698 to your computer and use it in GitHub Desktop.
Save nesteruk/ebde71e9550f90ab725cb26dcab01698 to your computer and use it in GitHub Desktop.
#define _USE_MATH_DEFINES
#include <iostream>
#include <cmath>
#include <array>
#include <chrono>
#include <immintrin.h>
#include <random>
// Standard normal probability density function
double norm_pdf(const double& x) {
const static auto one_over_root_two_pi = (1.0 / (sqrt(2 * M_PI)));
return one_over_root_two_pi * exp(-0.5 * x * x);
}
__m256d norm_pdf(const __m256d& x)
{
const static double mul = 1.0 / (sqrt(2 * M_PI));
const static __m256d mul_vector = {mul,mul,mul,mul};
const static __m256d minus_half = {-0.5,-0.5,-0.5,-0.5};
return _mm256_mul_pd(
mul_vector,
_mm256_exp_pd(
_mm256_mul_pd(minus_half,
_mm256_mul_pd(x, x)
)
)
);
}
// An approximation to the cumulative distribution function
// for the standard normal distribution
// Note: This is a recursive function
double norm_cdf(const double& x) {
double k = 1.0 / (1.0 + 0.2316419 * x);
double k_sum = k * (0.319381530 + k * (-0.356563782 + k * (1.781477937 + k * (-1.821255978 + 1.330274429 * k))));
if (x >= 0.0) {
return (1.0 - (1.0 / (sqrt(2 * M_PI))) * exp(-0.5 * x * x) * k_sum);
}
else {
return 1.0 - norm_cdf(-x);
}
}
// This calculates d_j, for j in {1,2}. This term appears in the closed
// form solution for the European call or put price
double d_j(const int& j, const double& S, const double& K, const double& r, const double& v, const double& T) {
return (log(S / K) + (r + (pow(-1, j - 1)) * 0.5 * v * v) * T) / (v * (sqrt(T)));
}
__m256d d1(const __m256d& s, const __m256d& k,
const __m256d& r, const __m256d& v, const __m256d& t)
{
const __m256d half = _mm256_set1_pd(0.5);
return _mm256_fmadd_pd(
_mm256_log_pd(_mm256_div_pd(s, k)), /* log(s/k) */
_mm256_mul_pd(t, _mm256_fmadd_pd(_mm256_mul_pd(v, v), half, r)), /* (r + 0.5*v*v) */
_mm256_invsqrt_pd(_mm256_mul_pd(_mm256_mul_pd(v, v), t)));
}
__m256d d2(const __m256d& s, const __m256d& k,
const __m256d& r, const __m256d& v, const __m256d& t)
{
return _mm256_sub_pd(
d1(s, k, r, v, t),
_mm256_mul_pd(v, _mm256_sqrt_pd(t)));
}
__m256d call_price(const __m256d& s, const __m256d& k,
const __m256d& r, const __m256d& v, const __m256d& t)
{
const auto D1 = d1(s, k, r, v, t);
const auto D2 = d2(s, k, r, v, t);
const auto s_n_d1 = _mm256_mul_pd(s, _mm256_cdfnorm_pd(D1));
const auto e_min_rt = _mm256_exp_pd(
_mm256_xor_pd(_mm256_mul_pd(r, t), _mm256_set1_pd(-0.0))
);
// fma it!
return _mm256_fnmadd_pd(_mm256_mul_pd(k, e_min_rt),_mm256_cdfnorm_pd(D2), s_n_d1);
}
__m256d put_price(const __m256d& s, const __m256d& k,
const __m256d& r, const __m256d& v, const __m256d& t)
{
const auto zero = _mm256_set1_pd(0.0);
auto e_min_rt = _mm256_exp_pd(
//_mm256_fnmadd_pd(r, t, zero)
_mm256_xor_pd(_mm256_mul_pd(r, t), _mm256_set1_pd(-0.0))
);
return
_mm256_add_pd(
_mm256_sub_pd(_mm256_mul_pd(k, e_min_rt), s),
call_price(s, k, r, v, t));
}
// Calculate the European vanilla call price based on
// underlying S, strike K, risk-free rate r, volatility of
// underlying sigma and time to maturity T
double call_price(const double& S, const double& K, const double& r, const double& v, const double& T) {
return S * norm_cdf(d_j(1, S, K, r, v, T)) - K * exp(-r * T) * norm_cdf(d_j(2, S, K, r, v, T));
}
// Calculate the European vanilla put price based on
// underlying S, strike K, risk-free rate r, volatility of
// underlying sigma and time to maturity T
double put_price(const double& S, const double& K, const double& r, const double& v, const double& T) {
return -S * norm_cdf(-d_j(1, S, K, r, v, T)) + K * exp(-r * T) * norm_cdf(-d_j(2, S, K, r, v, T));
}
int main(int argc, char** argv)
{
using namespace std;
using namespace chrono;
constexpr int count{1024};
random_device rd;
mt19937_64 gen(rd());
const uniform_int_distribution<> price_dist(50, 150);
const uniform_real_distribution<> rate_dist(0.01, 0.1);
const uniform_real_distribution<> vol_dist(0.1, 0.4);
const uniform_real<> time_dist(0.1, 1);
// scalars
array<double, count> s, k, r, v, t, c, p;
array<__m256d, count<<2> ss, kk, rr, vv, tt, cc, pp;
for (int i = 0; i < count; ++i)
{
s[i] = price_dist(gen);
k[i] = price_dist(gen);
r[i] = rate_dist(gen);
v[i] = vol_dist(gen);
t[i] = time_dist(gen);
ss[i >> 2].m256d_f64[i%4] = price_dist(gen);
kk[i >> 2].m256d_f64[i%4] = price_dist(gen);
rr[i >> 2].m256d_f64[i%4] = rate_dist(gen);
vv[i >> 2].m256d_f64[i%4] = vol_dist(gen);
tt[i >> 2].m256d_f64[i%4] = time_dist(gen);
}
constexpr int iterations{1024*16};
double scalar_total{0.0}, vector_total{0.0};
for (int i = 0; i < iterations; ++i)
{
auto scalar_start = high_resolution_clock::now();
for (int i = 0; i < count; ++i)
{
c[i] = call_price(s[i], k[i], r[i], v[i], t[i]);
p[i] = put_price(s[i], k[i], r[i], v[i], t[i]);
}
auto scalar_end = high_resolution_clock::now();
scalar_total += duration_cast<microseconds>(scalar_end-scalar_start).count();
}
cout << "scalar execution took " << (scalar_total / iterations) << " microseconds\n";
for (int i = 0; i < iterations; ++i)
{
auto vector_start = high_resolution_clock::now();
for (int i = 0; i < (count >> 2); ++i)
{
cc[i] = call_price(ss[i], kk[i], rr[i], vv[i], tt[i]);
pp[i] = put_price(ss[i], kk[i], rr[i], vv[i], tt[i]);
}
auto vector_end = high_resolution_clock::now();
vector_total += duration_cast<microseconds>(vector_end-vector_start).count();
}
cout << "vector execution took " << (vector_total / iterations) << " microseconds\n";
cout << "speedup factor is " << scalar_total / vector_total << "\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment