realies/memory_speeds4.cpp

## memory_speeds4.cpp
#include <iostream>
#include <chrono>
#include <cstdint>
#include <unistd.h>
#include <sys/sysctl.h>
#include <mach/mach_host.h>

const int64_t REPETITIONS = 5;

inline void flush_cache() {
    __builtin_arm_dsb(0b1111);
}

void measure_memory_latency(int64_t* arr, int64_t size) {
    if (arr == nullptr) {
        std::cerr << "Invalid pointer" << std::endl;
        return;
    }

    auto start = std::chrono::high_resolution_clock::now();

    for (int64_t i = 0; i < REPETITIONS; ++i) {
        for (int64_t j = 0; j < size; ++j) {
            int64_t tmp;
            __asm__ __volatile__("ldxr %0, [%1]\n\t"
                                 "stxr wzr, %0, [%1]\n\t" : "=&r"(tmp) : "r"(&arr[j]) : "memory");
        }
        flush_cache();
    }

    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = end - start;
    double time_per_operation = (elapsed.count() / (size * REPETITIONS)) * 1e9;

    std::cout << "Memory latency: " << time_per_operation << " ns" << std::endl;
}


void measure_read_write_speed(int64_t* arr, int64_t size) {
    if (arr == nullptr) {
        std::cerr << "Invalid pointer" << std::endl;
        return;
    }

    auto start = std::chrono::high_resolution_clock::now();
    int64_t sum = 0;

    for (int64_t i = 0; i < REPETITIONS; ++i) {
        for (int64_t j = 0; j < size; ++j) {
            arr[j] = arr[j] + 1;
            sum += arr[j];
        }
        flush_cache();
    }

    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = end - start;
    double time_per_operation = (elapsed.count() / (size * REPETITIONS)) * 1e9;

    double read_speed = (sizeof(int64_t) * size * REPETITIONS) / elapsed.count() / 1e9;

    start = std::chrono::high_resolution_clock::now();

    for (int64_t i = 0; i < REPETITIONS; ++i) {
        for (int64_t j = 0; j < size; ++j) {
            arr[j] = i;
        }
        flush_cache();
    }

    end = std::chrono::high_resolution_clock::now();
    elapsed = end - start;
    time_per_operation = (elapsed.count() / (size * REPETITIONS)) * 1e9;

    double write_speed = (sizeof(int64_t) * size * REPETITIONS) / elapsed.count() / 1e9;

    std::cout << "Read speed: " << read_speed << " GB/s" << std::endl;
    std::cout << "Write speed: " << write_speed << " GB/s" << std::endl;
}

int main() {
    vm_size_t page_size;
    mach_port_t mach_port;
    mach_msg_type_number_t count;
    vm_statistics_data_t vm_stats;

    mach_port = mach_host_self();
    count = sizeof(vm_stats) / sizeof(natural_t);

    if (host_page_size(mach_port, &page_size) != KERN_SUCCESS) {
        std::cerr << "Failed to get page size" << std::endl;
        return 1;
    }

    if (host_statistics(mach_port, HOST_VM_INFO, (host_info_t)&vm_stats, &count) != KERN_SUCCESS) {
        std::cerr << "Failed to get VM statistics" << std::endl;
        return 1;
    }

    int64_t free_mem_bytes = static_cast<int64_t>(vm_stats.free_count) * static_cast<int64_t>(page_size);

    int64_t array_size = free_mem_bytes / sizeof(int64_t);
    std::cout << "Allocating " << array_size << " integers (" << free_mem_bytes << " bytes)" << std::endl;

    int64_t* arr = new int64_t[array_size];

    // Check if allocation succeeded
    if (arr == nullptr) {
        std::cerr << "Failed to allocate memory" << std::endl;
        return 1;
    }

    // Initialize array
    for (int64_t i = 0; i < array_size; ++i) {
        arr[i] = i;
    }

    measure_memory_latency(arr, array_size);
    measure_read_write_speed(arr, array_size);

    delete[] arr;
    return 0;
}
	#include <iostream>
	#include <chrono>
	#include <cstdint>
	#include <unistd.h>
	#include <sys/sysctl.h>
	#include <mach/mach_host.h>

	const int64_t REPETITIONS = 5;

	inline void flush_cache() {
	__builtin_arm_dsb(0b1111);
	}

	void measure_memory_latency(int64_t* arr, int64_t size) {
	if (arr == nullptr) {
	std::cerr << "Invalid pointer" << std::endl;
	return;
	}

	auto start = std::chrono::high_resolution_clock::now();

	for (int64_t i = 0; i < REPETITIONS; ++i) {
	for (int64_t j = 0; j < size; ++j) {
	int64_t tmp;
	__asm__ __volatile__("ldxr %0, [%1]\n\t"
	"stxr wzr, %0, [%1]\n\t" : "=&r"(tmp) : "r"(&arr[j]) : "memory");
	}
	flush_cache();
	}

	auto end = std::chrono::high_resolution_clock::now();
	std::chrono::duration<double> elapsed = end - start;
	double time_per_operation = (elapsed.count() / (size * REPETITIONS)) * 1e9;

	std::cout << "Memory latency: " << time_per_operation << " ns" << std::endl;
	}


	void measure_read_write_speed(int64_t* arr, int64_t size) {
	if (arr == nullptr) {
	std::cerr << "Invalid pointer" << std::endl;
	return;
	}

	auto start = std::chrono::high_resolution_clock::now();
	int64_t sum = 0;

	for (int64_t i = 0; i < REPETITIONS; ++i) {
	for (int64_t j = 0; j < size; ++j) {
	arr[j] = arr[j] + 1;
	sum += arr[j];
	}
	flush_cache();
	}

	auto end = std::chrono::high_resolution_clock::now();
	std::chrono::duration<double> elapsed = end - start;
	double time_per_operation = (elapsed.count() / (size * REPETITIONS)) * 1e9;

	double read_speed = (sizeof(int64_t) * size * REPETITIONS) / elapsed.count() / 1e9;

	start = std::chrono::high_resolution_clock::now();

	for (int64_t i = 0; i < REPETITIONS; ++i) {
	for (int64_t j = 0; j < size; ++j) {
	arr[j] = i;
	}
	flush_cache();
	}

	end = std::chrono::high_resolution_clock::now();
	elapsed = end - start;
	time_per_operation = (elapsed.count() / (size * REPETITIONS)) * 1e9;

	double write_speed = (sizeof(int64_t) * size * REPETITIONS) / elapsed.count() / 1e9;

	std::cout << "Read speed: " << read_speed << " GB/s" << std::endl;
	std::cout << "Write speed: " << write_speed << " GB/s" << std::endl;
	}

	int main() {
	vm_size_t page_size;
	mach_port_t mach_port;
	mach_msg_type_number_t count;
	vm_statistics_data_t vm_stats;

	mach_port = mach_host_self();
	count = sizeof(vm_stats) / sizeof(natural_t);

	if (host_page_size(mach_port, &page_size) != KERN_SUCCESS) {
	std::cerr << "Failed to get page size" << std::endl;
	return 1;
	}

	if (host_statistics(mach_port, HOST_VM_INFO, (host_info_t)&vm_stats, &count) != KERN_SUCCESS) {
	std::cerr << "Failed to get VM statistics" << std::endl;
	return 1;
	}

	int64_t free_mem_bytes = static_cast<int64_t>(vm_stats.free_count) * static_cast<int64_t>(page_size);

	int64_t array_size = free_mem_bytes / sizeof(int64_t);
	std::cout << "Allocating " << array_size << " integers (" << free_mem_bytes << " bytes)" << std::endl;

	int64_t* arr = new int64_t[array_size];

	// Check if allocation succeeded
	if (arr == nullptr) {
	std::cerr << "Failed to allocate memory" << std::endl;
	return 1;
	}

	// Initialize array
	for (int64_t i = 0; i < array_size; ++i) {
	arr[i] = i;
	}

	measure_memory_latency(arr, array_size);
	measure_read_write_speed(arr, array_size);

	delete[] arr;
	return 0;
	}