POSIX Shared Memory Data Structures 1.0
High-performance lock-free data structures for inter-process communication
Loading...
Searching...
No Matches
simd_simulation.cpp
Go to the documentation of this file.
1
13#include <iostream>
14#include <chrono>
15#include <random>
16#include <iomanip>
17#include "posix_shm.h"
18#include "shm_array.h"
19#include "shm_simd_utils.h"
20
21using namespace std::chrono;
22
23// Number of particles in simulation
24constexpr size_t NUM_PARTICLES = 100000;
25constexpr size_t ITERATIONS = 100;
26
32 shm_array<float> x, y, z; // Position
33 shm_array<float> vx, vy, vz; // Velocity
34 shm_array<float> ax, ay, az; // Acceleration
36
37 ParticleSystemSoA(posix_shm& shm, size_t count)
38 : x(shm, "pos_x", count), y(shm, "pos_y", count), z(shm, "pos_z", count),
39 vx(shm, "vel_x", count), vy(shm, "vel_y", count), vz(shm, "vel_z", count),
40 ax(shm, "acc_x", count), ay(shm, "acc_y", count), az(shm, "acc_z", count),
41 mass(shm, "mass", count) {}
42};
43
44// Initialize particles with random values
46 std::random_device rd;
47 std::mt19937 gen(rd());
48 std::uniform_real_distribution<float> pos_dist(-100.0f, 100.0f);
49 std::uniform_real_distribution<float> vel_dist(-1.0f, 1.0f);
50 std::uniform_real_distribution<float> mass_dist(0.1f, 10.0f);
51
52 for (size_t i = 0; i < NUM_PARTICLES; ++i) {
53 particles.x[i] = pos_dist(gen);
54 particles.y[i] = pos_dist(gen);
55 particles.z[i] = pos_dist(gen);
56
57 particles.vx[i] = vel_dist(gen);
58 particles.vy[i] = vel_dist(gen);
59 particles.vz[i] = vel_dist(gen);
60
61 particles.ax[i] = 0.0f;
62 particles.ay[i] = 0.0f;
63 particles.az[i] = -9.81f; // Gravity
64
65 particles.mass[i] = mass_dist(gen);
66 }
67}
68
69// Scalar version of physics update
70void update_physics_scalar(ParticleSystemSoA& particles, float dt) {
71 for (size_t i = 0; i < NUM_PARTICLES; ++i) {
72 // Update velocity: v = v + a * dt
73 particles.vx[i] += particles.ax[i] * dt;
74 particles.vy[i] += particles.ay[i] * dt;
75 particles.vz[i] += particles.az[i] * dt;
76
77 // Update position: p = p + v * dt
78 particles.x[i] += particles.vx[i] * dt;
79 particles.y[i] += particles.vy[i] * dt;
80 particles.z[i] += particles.vz[i] * dt;
81
82 // Simple boundary collision (bounce)
83 if (particles.z[i] < 0.0f) {
84 particles.z[i] = 0.0f;
85 particles.vz[i] = -particles.vz[i] * 0.8f; // Energy loss
86 }
87 }
88}
89
90// SIMD version of physics update
91void update_physics_simd(ParticleSystemSoA& particles, float dt) {
92 // Prepare dt vectors
93 __m256 dt_vec = _mm256_set1_ps(dt);
94 __m256 zero_vec = _mm256_setzero_ps();
95 __m256 bounce_factor = _mm256_set1_ps(0.8f);
96
97 size_t simd_count = NUM_PARTICLES & ~7; // Process 8 at a time
98
99 for (size_t i = 0; i < simd_count; i += 8) {
100 // Load current values
101 __m256 vx = _mm256_loadu_ps(&particles.vx[i]);
102 __m256 vy = _mm256_loadu_ps(&particles.vy[i]);
103 __m256 vz = _mm256_loadu_ps(&particles.vz[i]);
104
105 __m256 ax = _mm256_loadu_ps(&particles.ax[i]);
106 __m256 ay = _mm256_loadu_ps(&particles.ay[i]);
107 __m256 az = _mm256_loadu_ps(&particles.az[i]);
108
109 __m256 px = _mm256_loadu_ps(&particles.x[i]);
110 __m256 py = _mm256_loadu_ps(&particles.y[i]);
111 __m256 pz = _mm256_loadu_ps(&particles.z[i]);
112
113 // Update velocity: v = v + a * dt (using FMA)
114 vx = _mm256_fmadd_ps(ax, dt_vec, vx);
115 vy = _mm256_fmadd_ps(ay, dt_vec, vy);
116 vz = _mm256_fmadd_ps(az, dt_vec, vz);
117
118 // Update position: p = p + v * dt
119 px = _mm256_fmadd_ps(vx, dt_vec, px);
120 py = _mm256_fmadd_ps(vy, dt_vec, py);
121 pz = _mm256_fmadd_ps(vz, dt_vec, pz);
122
123 // Boundary collision for z
124 __m256 below_ground = _mm256_cmp_ps(pz, zero_vec, _CMP_LT_OQ);
125 pz = _mm256_max_ps(pz, zero_vec); // Clamp to ground
126
127 // Reverse and dampen velocity for particles that hit ground
128 __m256 neg_vz = _mm256_mul_ps(vz, _mm256_set1_ps(-1.0f));
129 __m256 damped_vz = _mm256_mul_ps(neg_vz, bounce_factor);
130 vz = _mm256_blendv_ps(vz, damped_vz, below_ground);
131
132 // Store results
133 _mm256_storeu_ps(&particles.vx[i], vx);
134 _mm256_storeu_ps(&particles.vy[i], vy);
135 _mm256_storeu_ps(&particles.vz[i], vz);
136
137 _mm256_storeu_ps(&particles.x[i], px);
138 _mm256_storeu_ps(&particles.y[i], py);
139 _mm256_storeu_ps(&particles.z[i], pz);
140 }
141
142 // Handle remainder with scalar code
143 for (size_t i = simd_count; i < NUM_PARTICLES; ++i) {
144 particles.vx[i] += particles.ax[i] * dt;
145 particles.vy[i] += particles.ay[i] * dt;
146 particles.vz[i] += particles.az[i] * dt;
147
148 particles.x[i] += particles.vx[i] * dt;
149 particles.y[i] += particles.vy[i] * dt;
150 particles.z[i] += particles.vz[i] * dt;
151
152 if (particles.z[i] < 0.0f) {
153 particles.z[i] = 0.0f;
154 particles.vz[i] = -particles.vz[i] * 0.8f;
155 }
156 }
157}
158
159// Calculate total kinetic energy (demonstrates reduction)
161 float total_ke = 0.0f;
162
163 for (size_t i = 0; i < NUM_PARTICLES; ++i) {
164 float v_squared = particles.vx[i] * particles.vx[i] +
165 particles.vy[i] * particles.vy[i] +
166 particles.vz[i] * particles.vz[i];
167 total_ke += 0.5f * particles.mass[i] * v_squared;
168 }
169
170 return total_ke;
171}
172
173// SIMD version using helper functions
175 // Create temporary arrays for v²
176 shm_array<float> v_squared(particles.x.shm, "v_squared_temp", NUM_PARTICLES);
177
178 // Calculate v² = vx² + vy² + vz² using SIMD
179 size_t simd_count = NUM_PARTICLES & ~7;
180
181 for (size_t i = 0; i < simd_count; i += 8) {
182 __m256 vx = _mm256_loadu_ps(&particles.vx[i]);
183 __m256 vy = _mm256_loadu_ps(&particles.vy[i]);
184 __m256 vz = _mm256_loadu_ps(&particles.vz[i]);
185
186 __m256 vx2 = _mm256_mul_ps(vx, vx);
187 __m256 vy2 = _mm256_mul_ps(vy, vy);
188 __m256 vz2 = _mm256_mul_ps(vz, vz);
189
190 __m256 v2 = _mm256_add_ps(vx2, _mm256_add_ps(vy2, vz2));
191 _mm256_storeu_ps(&v_squared[i], v2);
192 }
193
194 // Handle remainder
195 for (size_t i = simd_count; i < NUM_PARTICLES; ++i) {
196 v_squared[i] = particles.vx[i] * particles.vx[i] +
197 particles.vy[i] * particles.vy[i] +
198 particles.vz[i] * particles.vz[i];
199 }
200
201 // Scale by 0.5 * mass
202 shm_simd::scale_floats(v_squared.data(), NUM_PARTICLES, 0.5f);
203
204 // Use SIMD dot product to sum (mass * v²/2)
205 return shm_simd::dot_product(particles.mass.data(), v_squared.data(), NUM_PARTICLES);
206}
207
208int main() {
209 try {
210 // Create shared memory segment
211 size_t shm_size = 20 * NUM_PARTICLES * sizeof(float); // Space for all arrays
212 posix_shm shm("simd_simulation", shm_size);
213
214 std::cout << "=== SIMD Particle Simulation ===" << std::endl;
215 std::cout << "Particles: " << NUM_PARTICLES << std::endl;
216 std::cout << "Iterations: " << ITERATIONS << std::endl;
217 std::cout << std::endl;
218
219 // Create particle system
220 ParticleSystemSoA particles(shm, NUM_PARTICLES);
221 initialize_particles(particles);
222
223 const float dt = 0.01f; // Time step
224
225 // Benchmark scalar version
226 auto start = high_resolution_clock::now();
227 for (size_t i = 0; i < ITERATIONS; ++i) {
228 update_physics_scalar(particles, dt);
229 }
230 float ke_scalar = calculate_kinetic_energy_scalar(particles);
231 auto end = high_resolution_clock::now();
232 auto scalar_time = duration_cast<microseconds>(end - start).count();
233
234 // Reset particles
235 initialize_particles(particles);
236
237 // Benchmark SIMD version
238 start = high_resolution_clock::now();
239 for (size_t i = 0; i < ITERATIONS; ++i) {
240 update_physics_simd(particles, dt);
241 }
242 float ke_simd = calculate_kinetic_energy_simd(particles);
243 end = high_resolution_clock::now();
244 auto simd_time = duration_cast<microseconds>(end - start).count();
245
246 // Results
247 std::cout << "Performance Results:" << std::endl;
248 std::cout << "-------------------" << std::endl;
249 std::cout << std::fixed << std::setprecision(2);
250 std::cout << "Scalar version: " << scalar_time << " µs" << std::endl;
251 std::cout << "SIMD version: " << simd_time << " µs" << std::endl;
252 std::cout << "Speedup: " << (float)scalar_time / simd_time << "x" << std::endl;
253 std::cout << std::endl;
254
255 std::cout << "Final kinetic energy:" << std::endl;
256 std::cout << "Scalar: " << ke_scalar << " J" << std::endl;
257 std::cout << "SIMD: " << ke_simd << " J" << std::endl;
258 std::cout << "Difference: " << std::abs(ke_scalar - ke_simd) << " J" << std::endl;
259 std::cout << std::endl;
260
261 // Demonstrate other SIMD operations
262 std::cout << "SIMD Helper Functions:" << std::endl;
263 std::cout << "---------------------" << std::endl;
264
265 shm_simd::SimdArray vx_simd(particles.vx);
266 std::cout << "Min velocity X: " << vx_simd.min() << " m/s" << std::endl;
267 std::cout << "Max velocity X: " << vx_simd.max() << " m/s" << std::endl;
268 std::cout << "Sum velocity X: " << vx_simd.sum() << " m/s" << std::endl;
269
270 // Check alignment
271 std::cout << std::endl;
272 std::cout << "Memory Alignment:" << std::endl;
273 std::cout << "----------------" << std::endl;
274 std::cout << "Position X aligned to 32: "
275 << shm_simd::is_aligned(particles.x.data(), 32) << std::endl;
276 std::cout << "Velocity X aligned to 32: "
277 << shm_simd::is_aligned(particles.vx.data(), 32) << std::endl;
278
279 return 0;
280
281 } catch (const std::exception& e) {
282 std::cerr << "Error: " << e.what() << std::endl;
283 return 1;
284 }
285}
Fixed-size array in shared memory with zero-overhead access.
Definition shm_array.h:63
pointer data() noexcept
Get pointer to underlying data.
Definition shm_array.h:333
Helper class for SIMD operations on shm_array.
float max() const noexcept
float min() const noexcept
float sum() const noexcept
ShmType & shm
Definition shm_span.h:13
bool is_aligned(const void *ptr, size_t alignment) noexcept
Check if pointer is aligned to boundary.
float dot_product(const float *a, const float *b, size_t count) noexcept
Vectorized dot product of two float arrays.
void scale_floats(float *data, size_t count, float scale) noexcept
Vectorized array scaling (multiply by scalar)
Core POSIX shared memory management with automatic reference counting.
Fixed-size shared memory array with STL compatibility.
SIMD-optimized utilities for simulation workloads.
void update_physics_scalar(ParticleSystemSoA &particles, float dt)
float calculate_kinetic_energy_simd(ParticleSystemSoA &particles)
constexpr size_t ITERATIONS
float calculate_kinetic_energy_scalar(ParticleSystemSoA &particles)
void initialize_particles(ParticleSystemSoA &particles)
constexpr size_t NUM_PARTICLES
void update_physics_simd(ParticleSystemSoA &particles, float dt)
int main()
shm_array< float > ax
ParticleSystemSoA(posix_shm &shm, size_t count)
shm_array< float > y
shm_array< float > ay
shm_array< float > vz
shm_array< float > mass
shm_array< float > x
shm_array< float > az
shm_array< float > vx
shm_array< float > vy
shm_array< float > z