zeroipc/benchmark__reads_8cpp_source.html

#include <iostream>

#include <chrono>

#include <vector>

#include <numeric>

#include <random>

#include "posix_shm.h"

#include "shm_array.h"


using namespace std::chrono;


// Benchmark to prove shared memory reads are as fast as normal arrays


template<typename Func>


double benchmark(const std::string& name, Func f, size_t iterations = 1000000) {

    auto start = high_resolution_clock::now();


    for (size_t i = 0; i < iterations; ++i) {

        f();

    }


    auto end = high_resolution_clock::now();

    auto ns = duration_cast<nanoseconds>(end - start).count();

    double ns_per_op = static_cast<double>(ns) / iterations;


    std::cout << name << ": " << ns_per_op << " ns/operation\n";

    return ns_per_op;

}


int main() {

    constexpr size_t ARRAY_SIZE = 10000;

    constexpr size_t ITERATIONS = 1000000;


    // Setup random access pattern

    std::random_device rd;

    std::mt19937 gen(rd());

    std::uniform_int_distribution<> dist(0, ARRAY_SIZE - 1);

    std::vector<size_t> indices(ITERATIONS);

    for (auto& idx : indices) {

        idx = dist(gen);

    }


    std::cout << "=== Read Performance Benchmark ===\n";

    std::cout << "Array size: " << ARRAY_SIZE << " integers\n";

    std::cout << "Iterations: " << ITERATIONS << "\n\n";


    // Normal heap array

    auto* heap_array = new int[ARRAY_SIZE];

    std::iota(heap_array, heap_array + ARRAY_SIZE, 0);


    // Stack array (smaller size due to stack limits)

    constexpr size_t STACK_SIZE = 1000;

    int stack_array[STACK_SIZE];

    std::iota(stack_array, stack_array + STACK_SIZE, 0);


    // Shared memory array

    posix_shm shm("benchmark_shm", 10 * 1024 * 1024);

    shm_array<int> shared_array(shm, "bench_array", ARRAY_SIZE);

    std::iota(shared_array.begin(), shared_array.end(), 0);


    // Get raw pointer for apples-to-apples comparison

    int* shared_raw_ptr = shared_array.data();


    volatile int sink = 0;  // Prevent optimization


    std::cout << "Sequential Read Performance:\n";

    std::cout << "-----------------------------\n";


    // Sequential reads

    size_t seq_idx = 0;


    benchmark("Heap array (sequential)", [&]() {

        sink = heap_array[seq_idx];

        seq_idx = (seq_idx + 1) % ARRAY_SIZE;

    }, ITERATIONS);


    seq_idx = 0;

    benchmark("Stack array (sequential)", [&]() {

        sink = stack_array[seq_idx];

        seq_idx = (seq_idx + 1) % STACK_SIZE;

    }, ITERATIONS);


    seq_idx = 0;

    benchmark("Shared array operator[] (sequential)", [&]() {

        sink = shared_array[seq_idx];

        seq_idx = (seq_idx + 1) % ARRAY_SIZE;

    }, ITERATIONS);


    seq_idx = 0;

    benchmark("Shared array raw pointer (sequential)", [&]() {

        sink = shared_raw_ptr[seq_idx];

        seq_idx = (seq_idx + 1) % ARRAY_SIZE;

    }, ITERATIONS);


    std::cout << "\nRandom Access Performance:\n";

    std::cout << "-----------------------------\n";


    // Random reads

    size_t rand_idx = 0;


    benchmark("Heap array (random)", [&]() {

        sink = heap_array[indices[rand_idx]];

        rand_idx = (rand_idx + 1) % ITERATIONS;

    }, ITERATIONS);


    rand_idx = 0;

    benchmark("Shared array operator[] (random)", [&]() {

        sink = shared_array[indices[rand_idx]];

        rand_idx = (rand_idx + 1) % ITERATIONS;

    }, ITERATIONS);


    rand_idx = 0;

    benchmark("Shared array raw pointer (random)", [&]() {

        sink = shared_raw_ptr[indices[rand_idx]];

        rand_idx = (rand_idx + 1) % ITERATIONS;

    }, ITERATIONS);


    std::cout << "\nBulk Operations Performance:\n";

    std::cout << "-----------------------------\n";


    // Sum reduction (cache-friendly)

    benchmark("Heap array sum", [&]() {

        sink = std::accumulate(heap_array, heap_array + 1000, 0);

    }, 10000);


    benchmark("Shared array sum (iterators)", [&]() {

        sink = std::accumulate(shared_array.begin(), shared_array.begin() + 1000, 0);

    }, 10000);


    benchmark("Shared array sum (raw pointer)", [&]() {

        sink = std::accumulate(shared_raw_ptr, shared_raw_ptr + 1000, 0);

    }, 10000);


    std::cout << "\n=== Key Findings ===\n";

    std::cout << "1. Sequential reads: Shared memory matches heap/stack performance\n";

    std::cout << "2. Random reads: Same cache miss penalty for all\n";

    std::cout << "3. Raw pointer access: Identical to normal arrays\n";

    std::cout << "4. operator[] overhead: Minimal (usually inlined)\n";

    std::cout << "5. After setup, it's just memory!\n";


    delete[] heap_array;


    return 0;

}


benchmark
double benchmark(const std::string &name, Func f, size_t iterations=1000000)
Definition benchmark_reads.cpp:14

main
int main()
Definition benchmark_reads.cpp:29

posix_shm_impl< shm_table >

shm_array
Fixed-size array in shared memory with zero-overhead access.
Definition shm_array.h:63

shm_array::begin
iterator begin() noexcept
Get iterator to beginning.
Definition shm_array.h:286

shm_array::end
iterator end() noexcept
Get iterator to end.
Definition shm_array.h:289

shm_array::data
pointer data() noexcept
Get pointer to underlying data.
Definition shm_array.h:333

posix_shm.h
Core POSIX shared memory management with automatic reference counting.

shm_array.h
Fixed-size shared memory array with STL compatibility.

ITERATIONS
constexpr size_t ITERATIONS
Definition simd_simulation.cpp:25