dc1394/vmc_xorshift.cpp

## vmc_xorshift.cpp
// 元のコード→ https://miyantarumi.hatenablog.com/entry/2022/04/08/080000
#include <array> // for std::array
#include <cmath> // for std::asin, std::cos, std::exp, std::fabs, std::hypot, std::log, std::sqrt, std::sin
#include <cstdint> // for std::int32_t
#include <iomanip> // for std::setprecision
#include <ios> // for std::ios::fixed, std::ios::floatfield
#include <iostream> // for std::cout, std::endl
#include <utility> // for std::make_pair, std::pair
#include <vector> // for std::vector
#define __AVX2_AVAILABLE__
#include "SIMDInstructionSet.h"
#include "Xoshiro256Plus.h"

namespace {
static auto constexpr ALPHA_0 = 0.8; // 変分を始める初期値
static auto constexpr H = 1.0E-3; // 数値微分の幅
static auto constexpr NSAMPLES = 30000;
static auto constexpr NSKIP = 4000;
static auto constexpr NWALKERS = 400;
static auto constexpr SEED = 20240102;
static auto constexpr SIGMA = 0.4; // metropoliswalkでの正規乱数の分散
static auto constexpr THRESHOLD = 1.0E-6;

class MyRand final {
    using Xoshiro256PlusAVX2 = SEFUtility::RNG::Xoshiro256Plus<SIMDInstructionSet::AVX2>;
    static auto constexpr ARRAYSIZE = 4;

public:
    MyRand(double mean, double sigma);
    MyRand() = delete;
    ~MyRand() = default;

    double normal_dist_rand();
    double rand();

private:
    void make_nor_rndarray();
    void make_rndarray();

    Xoshiro256PlusAVX2 avx_rng_;
    std::int32_t cnt_nor_rnd_ = 0;
    std::int32_t cnt_rnd_ = 0;
    double mean_;
    alignas(32) std::array<double, ARRAYSIZE> normal_dist_randarray_;
    __m256d randarray_;
    double sigma_;
};

struct Walker_pos {
    Walker_pos()
        : x_(NWALKERS, 0.0)
        , y_(NWALKERS, 0.0)
        , z_(NWALKERS, 0.0)
    {
    }
    std::vector<double> x_;
    std::vector<double> y_;
    std::vector<double> z_;
};

void initial_position(MyRand& mr, Walker_pos& wa_pos);
std::pair<double, double> metropolis_test(double alpha, MyRand& mr);
void run_vmc();
}

int main()
{
    run_vmc();

    return 0;
}

namespace {
inline MyRand::MyRand(double mean, double sigma)
    : avx_rng_(SEED)
    , mean_(mean)
    , sigma_(sigma)
{
    make_rndarray();
    make_nor_rndarray();
}

// inline void MyRand::make_nor_rndarray()
// {
//     static auto const pi = std::asin(1.0) * 2.0;

//     __m256d pi_vec = _mm256_set1_pd(pi);
//     __m256d sigma_vec = _mm256_set1_pd(sigma_);
//     __m256d two_vec = _mm256_set1_pd(2.0);
//     __m256d minus_two_vec = _mm256_set1_pd(-2.0);

//     // Generate 4 random numbers at once using a suitable AVX2-optimized random number generator
//     __m256d p_vec = avx_rng_.dnext4();
//     __m256d q_vec = avx_rng_.dnext4();

//     // Calculate intermediate values using AVX2 instructions
//     __m256d log_p_vec = _mm256_set_pd(std::log(p_vec[0]), std::log(p_vec[1]), std::log(p_vec[2]), std::log(p_vec[3]));
//     __m256d sqrt_minus_two_log_p_vec = _mm256_sqrt_pd(_mm256_mul_pd(minus_two_vec, log_p_vec));
//     auto const tmp1_vec = _mm256_mul_pd(two_vec, _mm256_mul_pd(pi_vec, q_vec));
//     __m256d cos_vec = _mm256_set_pd(std::cos(tmp1_vec[0]), std::cos(tmp1_vec[1]), std::cos(tmp1_vec[2]), std::cos(tmp1_vec[3]));
//     auto const tmp2_vec = _mm256_mul_pd(two_vec, _mm256_mul_pd(pi_vec, q_vec));
//     __m256d sin_vec = _mm256_set_pd(std::sin(tmp2_vec[0]), std::sin(tmp2_vec[1]), std::sin(tmp2_vec[2]), std::sin(tmp2_vec[3]));

//     // Combine results and store in the output array
//     _mm256_storeu_pd(normal_dist_randarray_.data(), _mm256_mul_pd(sigma_vec, _mm256_blend_pd(cos_vec, sin_vec, 0b1010)));
//     _mm256_storeu_pd(normal_dist_randarray_.data() + 4, _mm256_mul_pd(sigma_vec, _mm256_blend_pd(sin_vec, cos_vec, 0b1010)));
// }

inline void MyRand::make_nor_rndarray()
{
    auto const u = avx_rng_.dnext4();
    normal_dist_randarray_ = { sigma_ * std::sqrt(-2.0 * std::log(u[0])),
        sigma_ * std::sqrt(-2.0 * std::log(u[1])),
        sigma_ * std::sqrt(-2.0 * std::log(u[2])),
        sigma_ * std::sqrt(-2.0 * std::log(u[3])) };
}

inline void MyRand::make_rndarray()
{
    randarray_ = avx_rng_.dnext4();
}

inline double MyRand::normal_dist_rand()
{
    auto const val = normal_dist_randarray_[cnt_nor_rnd_];

    cnt_nor_rnd_++;
    if (cnt_nor_rnd_ >= ARRAYSIZE) {
        make_nor_rndarray();
        cnt_nor_rnd_ = 0;
    }

    return val;
}

inline double MyRand::rand()
{
    auto const val = randarray_[cnt_rnd_];

    cnt_rnd_++;
    if (cnt_rnd_ >= ARRAYSIZE) {
        make_rndarray();
        cnt_rnd_ = 0;
    }

    return val;
}

void initial_position(MyRand& mr, Walker_pos& wa_pos)
{
    for (auto i = 0; i < NWALKERS; i++) {
        wa_pos.x_[i] = (mr.rand() - 0.5) * 2.0 * 2.0;
        wa_pos.y_[i] = (mr.rand() - 0.5) * 2.0 * 2.0;
        wa_pos.z_[i] = (mr.rand() - 0.5) * 2.0 * 2.0;
    }
}

std::pair<double, double> metropolis_test(double alpha, MyRand& mr)
{
    std::vector<double> E_w(NWALKERS, 0.0);
    std::vector<double> Edlnpsi_w(NWALKERS, 0.0);
    std::vector<double> dlnpsi_w(NWALKERS, 0.0);

    Walker_pos wa_pos;

    initial_position(mr, wa_pos);

    for (auto i = 0; i < NSAMPLES; i++) {
        // Metropolis Testを行う

        for (auto j = 0; j < NWALKERS; j++) {
            auto const x = wa_pos.x_[j];
            auto const y = wa_pos.y_[j];
            auto const z = wa_pos.z_[j];

            auto const xp = x + mr.normal_dist_rand();
            auto const yp = y + mr.normal_dist_rand();
            auto const zp = z + mr.normal_dist_rand();

            auto const r = std::hypot(x, y, z);
            auto const rp = std::hypot(xp, yp, zp);

            if (mr.rand() < std::exp(2.0 * alpha * (-rp + r))) {
                wa_pos.x_[j] = xp;
                wa_pos.y_[j] = yp;
                wa_pos.z_[j] = zp;
            }

            // Metropolis Testの終了
            // 変数を更新し終えた

            // walkerについて<E>や<Edlnpsi>,<dlnpsi>を計算する.
            if (i >= NSKIP) {
                auto const rtemp = std::hypot(wa_pos.x_[j], wa_pos.y_[j], wa_pos.z_[j]);
                E_w[j] += -1.0 / rtemp - 0.5 * alpha * (alpha - 2.0 / rtemp);
                Edlnpsi_w[j] += -rtemp * (-1.0 / rtemp - 0.5 * alpha * (alpha - 2.0 / rtemp));
                dlnpsi_w[j] -= rtemp;
            }
        }
    }

    auto E = 0.0; // 変数の初期化
    auto Edlnpsi = 0.0;
    auto dlnpsi = 0.0;

    for (auto i = 0; i < NWALKERS; i++) {
        E_w[i] /= static_cast<double>(NSAMPLES - NSKIP);
        Edlnpsi_w[i] /= static_cast<double>(NSAMPLES - NSKIP);
        dlnpsi_w[i] /= static_cast<double>(NSAMPLES - NSKIP);
    }

    // walker全ての平均を取ることにする
    for (auto j = 0; j < NWALKERS; j++) {
        E += E_w[j];
        Edlnpsi += Edlnpsi_w[j];
        dlnpsi += dlnpsi_w[j];
    }

    E /= NWALKERS;
    Edlnpsi /= NWALKERS;
    dlnpsi /= NWALKERS; // dE/dalphaの計算をするための部品を計算した.

    auto const dE = 2.0 * (Edlnpsi - E * dlnpsi); // dE/dalphaの計算ができた.

    return std::make_pair(E, dE);
}

void run_vmc()
{
    MyRand mr(0.0, SIGMA);
    std::cout.setf(std::ios::fixed, std::ios::floatfield);

    double alpha_ini; // Newton法の始まり値

    auto alpha_fin = ALPHA_0; // Newton法の更新値

    do {
        alpha_ini = alpha_fin; // 前のループの結果を始まりの値に代入

        // **********************************************
        // alpha_iniでのエネルギーの微分dE/dalphaを計算する.
        // **********************************************

        auto const dE = metropolis_test(alpha_ini, mr).second;

        // ***************************
        // ddE/ddalphaの数値微分を行う.
        // **************************

        auto const alpha_plus = alpha_ini + H; // 微分のためにalphaを変化させる
        auto const alpha_minus = alpha_ini - H;

        // *********************************
        // *********************************
        // alpha_plusでのdE/dalphaを計算する.
        // *********************************
        // *********************************

        auto const dE_plus = metropolis_test(alpha_plus, mr).second;

        // *********************************
        // *********************************
        // alpha_minusでのdE/dalphaを計算する.
        // *********************************
        // *********************************

        auto const dE_minus = metropolis_test(alpha_minus, mr).second; // dE/dalpha_minusの計算ができた.

        // *************************************************
        // *************************************************
        // alphaをNewton法で求めるための部品をすべて計算し終えた
        // *************************************************
        // *************************************************

        // alphaの更新を行う

        auto const ddE = (dE_plus - dE_minus) / (2.0 * H); // dE/dalphaの数値微分ddEが計算できた.

        alpha_fin = alpha_ini - dE / ddE; // alphaを更新する.

        std::cout << std::setprecision(10)
                  << "alpha_fin = "
                  << alpha_fin
                  << ", alpha_ini = "
                  << alpha_ini
                  << '\n';

    } while (std::fabs(alpha_fin - alpha_ini) > THRESHOLD);

    auto const alpha_va = alpha_fin; // 変分で最適化した変分パラメータ

    std::cout << "エネルギーが最小になる変分パラメータはα = " << alpha_va << '\n';

    // ****************************************************
    // alpha_vaでの変分で求めた基底状態のエネルギーを計算しよう
    // ****************************************************

    std::cout << "基底状態のエネルギーは"
              << metropolis_test(alpha_va, mr).first
              << " (Hartree)"
              << std::endl;
}
}
	// 元のコード→ https://miyantarumi.hatenablog.com/entry/2022/04/08/080000
	#include <array> // for std::array
	#include <cmath> // for std::asin, std::cos, std::exp, std::fabs, std::hypot, std::log, std::sqrt, std::sin
	#include <cstdint> // for std::int32_t
	#include <iomanip> // for std::setprecision
	#include <ios> // for std::ios::fixed, std::ios::floatfield
	#include <iostream> // for std::cout, std::endl
	#include <utility> // for std::make_pair, std::pair
	#include <vector> // for std::vector
	#define __AVX2_AVAILABLE__
	#include "SIMDInstructionSet.h"
	#include "Xoshiro256Plus.h"

	namespace {
	static auto constexpr ALPHA_0 = 0.8; // 変分を始める初期値
	static auto constexpr H = 1.0E-3; // 数値微分の幅
	static auto constexpr NSAMPLES = 30000;
	static auto constexpr NSKIP = 4000;
	static auto constexpr NWALKERS = 400;
	static auto constexpr SEED = 20240102;
	static auto constexpr SIGMA = 0.4; // metropoliswalkでの正規乱数の分散
	static auto constexpr THRESHOLD = 1.0E-6;

	class MyRand final {
	using Xoshiro256PlusAVX2 = SEFUtility::RNG::Xoshiro256Plus<SIMDInstructionSet::AVX2>;
	static auto constexpr ARRAYSIZE = 4;

	public:
	MyRand(double mean, double sigma);
	MyRand() = delete;
	~MyRand() = default;

	double normal_dist_rand();
	double rand();

	private:
	void make_nor_rndarray();
	void make_rndarray();

	Xoshiro256PlusAVX2 avx_rng_;
	std::int32_t cnt_nor_rnd_ = 0;
	std::int32_t cnt_rnd_ = 0;
	double mean_;
	alignas(32) std::array<double, ARRAYSIZE> normal_dist_randarray_;
	__m256d randarray_;
	double sigma_;
	};

	struct Walker_pos {
	Walker_pos()
	: x_(NWALKERS, 0.0)
	, y_(NWALKERS, 0.0)
	, z_(NWALKERS, 0.0)
	{
	}
	std::vector<double> x_;
	std::vector<double> y_;
	std::vector<double> z_;
	};

	void initial_position(MyRand& mr, Walker_pos& wa_pos);
	std::pair<double, double> metropolis_test(double alpha, MyRand& mr);
	void run_vmc();
	}

	int main()
	{
	run_vmc();

	return 0;
	}

	namespace {
	inline MyRand::MyRand(double mean, double sigma)
	: avx_rng_(SEED)
	, mean_(mean)
	, sigma_(sigma)
	{
	make_rndarray();
	make_nor_rndarray();
	}

	// inline void MyRand::make_nor_rndarray()
	// {
	// static auto const pi = std::asin(1.0) * 2.0;

	// __m256d pi_vec = _mm256_set1_pd(pi);
	// __m256d sigma_vec = _mm256_set1_pd(sigma_);
	// __m256d two_vec = _mm256_set1_pd(2.0);
	// __m256d minus_two_vec = _mm256_set1_pd(-2.0);

	// // Generate 4 random numbers at once using a suitable AVX2-optimized random number generator
	// __m256d p_vec = avx_rng_.dnext4();
	// __m256d q_vec = avx_rng_.dnext4();

	// // Calculate intermediate values using AVX2 instructions
	// __m256d log_p_vec = _mm256_set_pd(std::log(p_vec[0]), std::log(p_vec[1]), std::log(p_vec[2]), std::log(p_vec[3]));
	// __m256d sqrt_minus_two_log_p_vec = _mm256_sqrt_pd(_mm256_mul_pd(minus_two_vec, log_p_vec));
	// auto const tmp1_vec = _mm256_mul_pd(two_vec, _mm256_mul_pd(pi_vec, q_vec));
	// __m256d cos_vec = _mm256_set_pd(std::cos(tmp1_vec[0]), std::cos(tmp1_vec[1]), std::cos(tmp1_vec[2]), std::cos(tmp1_vec[3]));
	// auto const tmp2_vec = _mm256_mul_pd(two_vec, _mm256_mul_pd(pi_vec, q_vec));
	// __m256d sin_vec = _mm256_set_pd(std::sin(tmp2_vec[0]), std::sin(tmp2_vec[1]), std::sin(tmp2_vec[2]), std::sin(tmp2_vec[3]));

	// // Combine results and store in the output array
	// _mm256_storeu_pd(normal_dist_randarray_.data(), _mm256_mul_pd(sigma_vec, _mm256_blend_pd(cos_vec, sin_vec, 0b1010)));
	// _mm256_storeu_pd(normal_dist_randarray_.data() + 4, _mm256_mul_pd(sigma_vec, _mm256_blend_pd(sin_vec, cos_vec, 0b1010)));
	// }

	inline void MyRand::make_nor_rndarray()
	{
	auto const u = avx_rng_.dnext4();
	normal_dist_randarray_ = { sigma_ * std::sqrt(-2.0 * std::log(u[0])),
	sigma_ * std::sqrt(-2.0 * std::log(u[1])),
	sigma_ * std::sqrt(-2.0 * std::log(u[2])),
	sigma_ * std::sqrt(-2.0 * std::log(u[3])) };
	}

	inline void MyRand::make_rndarray()
	{
	randarray_ = avx_rng_.dnext4();
	}

	inline double MyRand::normal_dist_rand()
	{
	auto const val = normal_dist_randarray_[cnt_nor_rnd_];

	cnt_nor_rnd_++;
	if (cnt_nor_rnd_ >= ARRAYSIZE) {
	make_nor_rndarray();
	cnt_nor_rnd_ = 0;
	}

	return val;
	}

	inline double MyRand::rand()
	{
	auto const val = randarray_[cnt_rnd_];

	cnt_rnd_++;
	if (cnt_rnd_ >= ARRAYSIZE) {
	make_rndarray();
	cnt_rnd_ = 0;
	}

	return val;
	}

	void initial_position(MyRand& mr, Walker_pos& wa_pos)
	{
	for (auto i = 0; i < NWALKERS; i++) {
	wa_pos.x_[i] = (mr.rand() - 0.5) * 2.0 * 2.0;
	wa_pos.y_[i] = (mr.rand() - 0.5) * 2.0 * 2.0;
	wa_pos.z_[i] = (mr.rand() - 0.5) * 2.0 * 2.0;
	}
	}

	std::pair<double, double> metropolis_test(double alpha, MyRand& mr)
	{
	std::vector<double> E_w(NWALKERS, 0.0);
	std::vector<double> Edlnpsi_w(NWALKERS, 0.0);
	std::vector<double> dlnpsi_w(NWALKERS, 0.0);

	Walker_pos wa_pos;

	initial_position(mr, wa_pos);

	for (auto i = 0; i < NSAMPLES; i++) {
	// Metropolis Testを行う

	for (auto j = 0; j < NWALKERS; j++) {
	auto const x = wa_pos.x_[j];
	auto const y = wa_pos.y_[j];
	auto const z = wa_pos.z_[j];

	auto const xp = x + mr.normal_dist_rand();
	auto const yp = y + mr.normal_dist_rand();
	auto const zp = z + mr.normal_dist_rand();

	auto const r = std::hypot(x, y, z);
	auto const rp = std::hypot(xp, yp, zp);

	if (mr.rand() < std::exp(2.0 * alpha * (-rp + r))) {
	wa_pos.x_[j] = xp;
	wa_pos.y_[j] = yp;
	wa_pos.z_[j] = zp;
	}

	// Metropolis Testの終了
	// 変数を更新し終えた

	// walkerについて<E>や<Edlnpsi>,<dlnpsi>を計算する.
	if (i >= NSKIP) {
	auto const rtemp = std::hypot(wa_pos.x_[j], wa_pos.y_[j], wa_pos.z_[j]);
	E_w[j] += -1.0 / rtemp - 0.5 * alpha * (alpha - 2.0 / rtemp);
	Edlnpsi_w[j] += -rtemp * (-1.0 / rtemp - 0.5 * alpha * (alpha - 2.0 / rtemp));
	dlnpsi_w[j] -= rtemp;
	}
	}
	}

	auto E = 0.0; // 変数の初期化
	auto Edlnpsi = 0.0;
	auto dlnpsi = 0.0;

	for (auto i = 0; i < NWALKERS; i++) {
	E_w[i] /= static_cast<double>(NSAMPLES - NSKIP);
	Edlnpsi_w[i] /= static_cast<double>(NSAMPLES - NSKIP);
	dlnpsi_w[i] /= static_cast<double>(NSAMPLES - NSKIP);
	}

	// walker全ての平均を取ることにする
	for (auto j = 0; j < NWALKERS; j++) {
	E += E_w[j];
	Edlnpsi += Edlnpsi_w[j];
	dlnpsi += dlnpsi_w[j];
	}

	E /= NWALKERS;
	Edlnpsi /= NWALKERS;
	dlnpsi /= NWALKERS; // dE/dalphaの計算をするための部品を計算した.

	auto const dE = 2.0 * (Edlnpsi - E * dlnpsi); // dE/dalphaの計算ができた.

	return std::make_pair(E, dE);
	}

	void run_vmc()
	{
	MyRand mr(0.0, SIGMA);
	std::cout.setf(std::ios::fixed, std::ios::floatfield);

	double alpha_ini; // Newton法の始まり値

	auto alpha_fin = ALPHA_0; // Newton法の更新値

	do {
	alpha_ini = alpha_fin; // 前のループの結果を始まりの値に代入

	// **********************************************
	// alpha_iniでのエネルギーの微分dE/dalphaを計算する.
	// **********************************************

	auto const dE = metropolis_test(alpha_ini, mr).second;

	// ***************************
	// ddE/ddalphaの数値微分を行う.
	// **************************

	auto const alpha_plus = alpha_ini + H; // 微分のためにalphaを変化させる
	auto const alpha_minus = alpha_ini - H;

	// *********************************
	// *********************************
	// alpha_plusでのdE/dalphaを計算する.
	// *********************************
	// *********************************

	auto const dE_plus = metropolis_test(alpha_plus, mr).second;

	// *********************************
	// *********************************
	// alpha_minusでのdE/dalphaを計算する.
	// *********************************
	// *********************************

	auto const dE_minus = metropolis_test(alpha_minus, mr).second; // dE/dalpha_minusの計算ができた.

	// *************************************************
	// *************************************************
	// alphaをNewton法で求めるための部品をすべて計算し終えた
	// *************************************************
	// *************************************************

	// alphaの更新を行う

	auto const ddE = (dE_plus - dE_minus) / (2.0 * H); // dE/dalphaの数値微分ddEが計算できた.

	alpha_fin = alpha_ini - dE / ddE; // alphaを更新する.

	std::cout << std::setprecision(10)
	<< "alpha_fin = "
	<< alpha_fin
	<< ", alpha_ini = "
	<< alpha_ini
	<< '\n';

	} while (std::fabs(alpha_fin - alpha_ini) > THRESHOLD);

	auto const alpha_va = alpha_fin; // 変分で最適化した変分パラメータ

	std::cout << "エネルギーが最小になる変分パラメータはα = " << alpha_va << '\n';

	// ****************************************************
	// alpha_vaでの変分で求めた基底状態のエネルギーを計算しよう
	// ****************************************************

	std::cout << "基底状態のエネルギーは"
	<< metropolis_test(alpha_va, mr).first
	<< " (Hartree)"
	<< std::endl;
	}
	}