diff --git a/Makefile b/Makefile
index 1f671fc..6bea4a1 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ WEB_HEAP_SIZE := 335544320
WEB_STACK_SIZE := 196608
WEB_SHELL := src/shell.html
-COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g -flto
+COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g -flto -msse4.2 -mavx
COMPILER_FLAGS += -DRPROF_IMPLEMENTATION
COMPILER_FLAGS += -DRAYGUI_IMPLEMENTATION
# COMPILER_FLAGS += -DRLGL_IMPLEMENTATION
@@ -75,6 +75,7 @@ ifeq ($(PLATFORM), web)
EMSCRIPTEN_PATH ?= $(EMSDK_PATH)/upstream/emscripten
COMPILER_FLAGS += -I$(EMSCRIPTEN_PATH)/cache/sysroot/include
COMPILER_FLAGS += -D_DEFAULT_SOURCE
+ COMPILER_FLAGS += -msimd128
LINKER_FLAGS += -s USE_GLFW=3
LINKER_FLAGS += -s FORCE_FILESYSTEM=1
LINKER_FLAGS += $(RAYLIB_RELEASE_PATH)/libraylib.a
@@ -83,7 +84,7 @@ ifeq ($(PLATFORM), web)
LINKER_FLAGS += -s STACK_SIZE=$(WEB_STACK_SIZE)
LIB_DEPENDENCIES += emsdk
else
- COMPILER_FLAGS += -march=native
+ COMPILER_FLAGS += -mavx2 -mfma -DSIMD256
endif
LINKER_FLAGS += -L$(RAYLIB_RELEASE_PATH)
diff --git a/src/main.cpp b/src/main.cpp
index 7c3008f..68d2922 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -19,10 +19,6 @@
#include "world.cpp"
#include "ui.cpp"
-//#define USE_TEST_MAIN
-
-// #include "raygui.h"
-
#define FRAMERATE 60
#define TIME_PER_FRAME (1.0/FRAMERATE)
@@ -37,8 +33,8 @@ void UpdateDrawFrame();
static void profiling_test();
int main() {
- // profiling_test();
- // return 0;
+ profiling_test();
+ return 0;
SetTraceLogLevel(LOG_TRACE);
@@ -89,7 +85,7 @@ static void profiling_test() {
SetRandomSeed(10);
float border = g_visuals.boid_edge_size;
- for (int i = 0; i < 45000; i++) {
+ for (int i = 0; i < 50000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
@@ -105,8 +101,9 @@ static void profiling_test() {
rprof_end();
printf("interactions: %d\n", g_prof_interactions);
- if (g_prof_interactions != 33119854) { // 22 051 739
- printf("!!!!!! ITERACTIONS DONT MATCH, %d\n", g_prof_interactions - 33119854);
+ int expected_interactions = 40501984;
+ if (g_prof_interactions != expected_interactions) { // 22 051 739
+ printf("!!!!!! ITERACTIONS DONT MATCH, %d\n", g_prof_interactions - expected_interactions);
}
rprof_output(NULL);
diff --git a/src/shell.html b/src/shell.html
index 32fb4b5..e80b09f 100644
--- a/src/shell.html
+++ b/src/shell.html
@@ -15,9 +15,9 @@
-
-
-
+
+
+
diff --git a/src/simd.cpp b/src/simd.cpp
new file mode 100644
index 0000000..2eb9254
--- /dev/null
+++ b/src/simd.cpp
@@ -0,0 +1,46 @@
+#include
+#include
+
+#ifdef SIMD256
+#define SIMD_BITS 256
+typedef __m256 __simd;
+typedef __m256i __simdi;
+
+static inline bool mm_is_zero(__simdi x) {
+ return _mm256_testz_si256(x, x);
+}
+
+#define mm_set1_ps(x) _mm256_set1_ps(x)
+#define mm_sub_ps(a, b) _mm256_sub_ps(a, b)
+#define mm_mul_ps(a, b) _mm256_mul_ps(a, b)
+#define mm_and_ps(a, b) _mm256_and_ps(a, b)
+#define mm_store_ps(a, b) _mm256_store_ps(a, b)
+#define mm_fmadd_ps(a, b, c) _mm256_fmadd_ps(a, b, c)
+#define mm_cmp_ps(a, b, c) _mm256_cmp_ps(a, b, c)
+#define mm_rsqrt_ps(a) _mm256_rsqrt_ps(a)
+#define mm_blendv_ps(a, b, c) _mm256_blendv_ps(a, b, c)
+#define mm_cmpeq_epi32(a, b) _mm256_cmpeq_epi32(a, b)
+#else
+#define SIMD_BITS 128
+typedef __m128 __simd;
+typedef __m128i __simdi;
+
+static inline bool mm_is_zero(__simdi x) {
+ return _mm_test_all_zeros(x, _mm_set1_epi8(0xFF));
+}
+
+#define mm_set1_ps(x) _mm_set1_ps(x)
+#define mm_sub_ps(a, b) _mm_sub_ps(a, b)
+#define mm_mul_ps(a, b) _mm_mul_ps(a, b)
+#define mm_and_ps(a, b) _mm_and_ps(a, b)
+#define mm_store_ps(a, b) _mm_store_ps(a, b)
+#define mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps(a, b), c)
+#define mm_cmp_ps(a, b, c) _mm_cmp_ps(a, b, c)
+#define mm_rsqrt_ps(a) _mm_rsqrt_ps(a)
+#define mm_blendv_ps(a, b, c) _mm_blendv_ps(a, b, c)
+#define mm_cmpeq_epi32(a, b) _mm_cmpeq_epi32(a, b)
+#endif
+
+#define SIMD_32B_LANES (SIMD_BITS/32)
+
+#define mm_is_zero(x) mm_is_zero((__simdi)x)
diff --git a/src/world.cpp b/src/world.cpp
index f546c58..fb13773 100644
--- a/src/world.cpp
+++ b/src/world.cpp
@@ -3,7 +3,7 @@
#include "rprof.h"
#include "boid-list.hpp"
-#include
+#include "simd.cpp"
static float vector2_atan2(Vector2 a) {
return std::atan2(a.y, a.x);
@@ -175,198 +175,92 @@ static int nearest_multiple(int num, int divisor) {
return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor;
}
-// b2l = boid to (list of boids) comparison
-static void assign_local_boids_b2l(World *world, BoidList *local_boids, uboid_t from_boid, uboid_t *to_boids, uboid_t to_boids_count) {
- Boid *boids = world->boids.data();
-
- // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
- float dot_threshold = cosf(world->view_angle/2);
- float view_radius_sqr = world->view_radius * world->view_radius;
-
- for (int i = 0; i < to_boids_count; i++) {
- uint16_t to_boid = to_boids[i];
- assert(to_boid != from_boid);
-
- Vector2 offset = Vector2Subtract(boids[from_boid].pos, boids[to_boid].pos);
- float length_sqr = Vector2LengthSqr(offset);
- if (length_sqr > view_radius_sqr) continue;
-
- Vector2 normalized = offset;
- if (length_sqr != 0)
- {
- float ilength = 1.0f/sqrtf(length_sqr);
- normalized.x *= ilength;
- normalized.y *= ilength;
- }
-
- if (Vector2DotProduct(boids[from_boid].dir, Vector2Negate(normalized)) >= dot_threshold) {
- boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid);
- g_prof_interactions++;
- }
- if (Vector2DotProduct(boids[to_boid].dir, normalized) >= dot_threshold) {
- boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid);
- g_prof_interactions++;
- }
- }
-}
-
-#ifndef __EMSCRIPTEN__
-static void print_m256_f32(__m256 value) {
- float *value_f32 = (float*)&value;
- printf("%f", value_f32[0]);
- for (int i = 1; i < 8; i++) {
- printf(",%f", value_f32[i]);
- }
- printf("\n");
-}
-
-static inline bool mm256_is_zero(__m256i x) {
- return _mm256_testz_si256(x, x);
-}
-static inline bool mm256_is_zero(__m256 x) {
- return mm256_is_zero((__m256i)x);
-}
-
struct boid_pair {
uboid_t from;
uboid_t to;
};
-static void world_process_local_boid_pairs(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count) {
+#ifdef SIMD256
+#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
+ _mm256_set_ps( \
+ boids[b2b_cmps[8*i+7].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+6].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+5].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+4].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+3].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+2].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+1].SIDE].FIELD, \
+ boids[b2b_cmps[8*i+0].SIDE].FIELD \
+ )
+#else
+#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
+ _mm_set_ps( \
+ boids[b2b_cmps[4*i+3].SIDE].FIELD, \
+ boids[b2b_cmps[4*i+2].SIDE].FIELD, \
+ boids[b2b_cmps[4*i+1].SIDE].FIELD, \
+ boids[b2b_cmps[4*i+0].SIDE].FIELD \
+ )
+#endif
+static void world_calc_distances_and_angles(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count) {
+ RPROF_START("Calc dot products and distances");
// Simplified from:
// float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
// |
// v
// float dot_threshold = cosf(world->view_angle/2);
- __m256 dot_threshold = _mm256_set1_ps(cosf(world->view_angle/2));
+ __simd dot_threshold = mm_set1_ps(cosf(world->view_angle/2));
float view_radius_sqr = world->view_radius * world->view_radius;
- __m256 view_radius = _mm256_set1_ps(view_radius_sqr);
- __m256 zero = _mm256_set1_ps(0);
- __m256 negative_one = _mm256_set1_ps(-1);
+ __simd view_radius = mm_set1_ps(view_radius_sqr);
+ __simd zero = mm_set1_ps(0);
+ __simd negative_one = mm_set1_ps(-1);
Boid *boids = world->boids.data();
- int32_t *do_append_mask1_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*8, 32);
- int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*8, 32);
+ int32_t *do_append_mask1_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32);
+ int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32);
- int simd_iteration_count = nearest_multiple(*b2b_cmps_count, 8)/8;
+ int simd_iteration_count = nearest_multiple(*b2b_cmps_count, SIMD_32B_LANES)/SIMD_32B_LANES;
for (int i = 0; i < simd_iteration_count; i++) {
- __m256 from_pos_x = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].from].pos.x,
- boids[b2b_cmps[8*i+6].from].pos.x,
- boids[b2b_cmps[8*i+5].from].pos.x,
- boids[b2b_cmps[8*i+4].from].pos.x,
- boids[b2b_cmps[8*i+3].from].pos.x,
- boids[b2b_cmps[8*i+2].from].pos.x,
- boids[b2b_cmps[8*i+1].from].pos.x,
- boids[b2b_cmps[8*i+0].from].pos.x
- );
+ __simd from_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.x);
+ __simd from_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.y);
- __m256 from_pos_y = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].from].pos.y,
- boids[b2b_cmps[8*i+6].from].pos.y,
- boids[b2b_cmps[8*i+5].from].pos.y,
- boids[b2b_cmps[8*i+4].from].pos.y,
- boids[b2b_cmps[8*i+3].from].pos.y,
- boids[b2b_cmps[8*i+2].from].pos.y,
- boids[b2b_cmps[8*i+1].from].pos.y,
- boids[b2b_cmps[8*i+0].from].pos.y
- );
+ __simd to_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, to, pos.x);
+ __simd to_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, to, pos.y);
- __m256 to_pos_x = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].to].pos.x,
- boids[b2b_cmps[8*i+6].to].pos.x,
- boids[b2b_cmps[8*i+5].to].pos.x,
- boids[b2b_cmps[8*i+4].to].pos.x,
- boids[b2b_cmps[8*i+3].to].pos.x,
- boids[b2b_cmps[8*i+2].to].pos.x,
- boids[b2b_cmps[8*i+1].to].pos.x,
- boids[b2b_cmps[8*i+0].to].pos.x
- );
+ __simd sub_x = mm_sub_ps(from_pos_x, to_pos_x);
+ __simd sub_y = mm_sub_ps(from_pos_y, to_pos_y);
- __m256 to_pos_y = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].to].pos.y,
- boids[b2b_cmps[8*i+6].to].pos.y,
- boids[b2b_cmps[8*i+5].to].pos.y,
- boids[b2b_cmps[8*i+4].to].pos.y,
- boids[b2b_cmps[8*i+3].to].pos.y,
- boids[b2b_cmps[8*i+2].to].pos.y,
- boids[b2b_cmps[8*i+1].to].pos.y,
- boids[b2b_cmps[8*i+0].to].pos.y
- );
+ __simd length_sqr = mm_fmadd_ps(sub_y, sub_y, mm_mul_ps(sub_x, sub_x));
+ __simdi in_range_mask = (__simdi)mm_cmp_ps(length_sqr, view_radius, _CMP_LE_OQ);
+ if (mm_is_zero(in_range_mask)) continue;
- __m256 sub_x = _mm256_sub_ps(from_pos_x, to_pos_x);
- __m256 sub_y = _mm256_sub_ps(from_pos_y, to_pos_y);
+ __simd from_dir_x = GET_F32_CHUNK_FROM_BOIDS(i, from, dir.x);
+ __simd from_dir_y = GET_F32_CHUNK_FROM_BOIDS(i, from, dir.y);
- __m256 length_sqr = _mm256_fmadd_ps(sub_y, sub_y, _mm256_mul_ps(sub_x, sub_x));
- __m256i in_range_mask = (__m256i)_mm256_cmp_ps(length_sqr, view_radius, _CMP_LE_OQ);
- if (mm256_is_zero(in_range_mask)) continue;
+ __simd to_dir_x = GET_F32_CHUNK_FROM_BOIDS(i, to, dir.x);
+ __simd to_dir_y = GET_F32_CHUNK_FROM_BOIDS(i, to, dir.y);
- __m256 from_dir_x = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].from].dir.x,
- boids[b2b_cmps[8*i+6].from].dir.x,
- boids[b2b_cmps[8*i+5].from].dir.x,
- boids[b2b_cmps[8*i+4].from].dir.x,
- boids[b2b_cmps[8*i+3].from].dir.x,
- boids[b2b_cmps[8*i+2].from].dir.x,
- boids[b2b_cmps[8*i+1].from].dir.x,
- boids[b2b_cmps[8*i+0].from].dir.x
- );
+ __simd is_length_zero = (__simd)mm_cmpeq_epi32((__simdi)length_sqr, (__simdi)zero);
+ __simd ilength = mm_blendv_ps(mm_rsqrt_ps(length_sqr), zero, is_length_zero);
- __m256 from_dir_y = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].from].dir.y,
- boids[b2b_cmps[8*i+6].from].dir.y,
- boids[b2b_cmps[8*i+5].from].dir.y,
- boids[b2b_cmps[8*i+4].from].dir.y,
- boids[b2b_cmps[8*i+3].from].dir.y,
- boids[b2b_cmps[8*i+2].from].dir.y,
- boids[b2b_cmps[8*i+1].from].dir.y,
- boids[b2b_cmps[8*i+0].from].dir.y
- );
+ __simd x_norm = mm_mul_ps(sub_x, ilength);
+ __simd y_norm = mm_mul_ps(sub_y, ilength);
- __m256 to_dir_x = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].to].dir.x,
- boids[b2b_cmps[8*i+6].to].dir.x,
- boids[b2b_cmps[8*i+5].to].dir.x,
- boids[b2b_cmps[8*i+4].to].dir.x,
- boids[b2b_cmps[8*i+3].to].dir.x,
- boids[b2b_cmps[8*i+2].to].dir.x,
- boids[b2b_cmps[8*i+1].to].dir.x,
- boids[b2b_cmps[8*i+0].to].dir.x
- );
+ __simd x_neg_norm = mm_mul_ps(x_norm, negative_one);
+ __simd y_neg_norm = mm_mul_ps(y_norm, negative_one);
- __m256 to_dir_y = _mm256_set_ps(
- boids[b2b_cmps[8*i+7].to].dir.y,
- boids[b2b_cmps[8*i+6].to].dir.y,
- boids[b2b_cmps[8*i+5].to].dir.y,
- boids[b2b_cmps[8*i+4].to].dir.y,
- boids[b2b_cmps[8*i+3].to].dir.y,
- boids[b2b_cmps[8*i+2].to].dir.y,
- boids[b2b_cmps[8*i+1].to].dir.y,
- boids[b2b_cmps[8*i+0].to].dir.y
- );
+ __simd dot_product1 = mm_fmadd_ps(from_dir_y, y_neg_norm, mm_mul_ps(from_dir_x, x_neg_norm));
+ __simd in_angle_mask1 = mm_cmp_ps(dot_product1, dot_threshold, _CMP_GE_OQ);
+ __simd do_append_mask1 = mm_and_ps(in_angle_mask1, (__simd)in_range_mask);
- __m256 is_length_zero = (__m256)_mm256_cmpeq_epi32((__m256i)length_sqr, (__m256i)zero);
- __m256 ilength = _mm256_blendv_ps(_mm256_rsqrt_ps(length_sqr), zero, is_length_zero);
+ __simd dot_product2 = mm_fmadd_ps(to_dir_y, y_norm, mm_mul_ps(to_dir_x, x_norm));
+ __simd in_angle_mask2 = mm_cmp_ps(dot_product2, dot_threshold, _CMP_GE_OQ);
+ __simd do_append_mask2 = mm_and_ps(in_angle_mask2, (__simd)in_range_mask);
- __m256 x_norm = _mm256_mul_ps(sub_x, ilength);
- __m256 y_norm = _mm256_mul_ps(sub_y, ilength);
-
- __m256 x_neg_norm = _mm256_mul_ps(x_norm, negative_one);
- __m256 y_neg_norm = _mm256_mul_ps(y_norm, negative_one);
-
- __m256 dot_product1 = _mm256_fmadd_ps(from_dir_y, y_neg_norm, _mm256_mul_ps(from_dir_x, x_neg_norm));
- __m256 in_angle_mask1 = _mm256_cmp_ps(dot_product1, dot_threshold, _CMP_GE_OQ);
- __m256 do_append_mask1 = _mm256_and_ps(in_angle_mask1, (__m256)in_range_mask);
-
- __m256 dot_product2 = _mm256_fmadd_ps(to_dir_y, y_norm, _mm256_mul_ps(to_dir_x, x_norm));
- __m256 in_angle_mask2 = _mm256_cmp_ps(dot_product2, dot_threshold, _CMP_GE_OQ);
- __m256 do_append_mask2 = _mm256_and_ps(in_angle_mask2, (__m256)in_range_mask);
-
- _mm256_store_ps((float*)do_append_mask1_f32, do_append_mask1);
- _mm256_store_ps((float*)do_append_mask2_f32, do_append_mask2);
- for (int j = 0; j < 8; j++) {
- uboid_t cmp_idx = 8*i + j;
+ mm_store_ps((float*)do_append_mask1_f32, do_append_mask1);
+ mm_store_ps((float*)do_append_mask2_f32, do_append_mask2);
+ for (int j = 0; j < SIMD_32B_LANES; j++) {
+ uboid_t cmp_idx = SIMD_32B_LANES*i + j;
if (cmp_idx >= *b2b_cmps_count) break;
uboid_t from_boid = b2b_cmps[cmp_idx].from;
@@ -383,9 +277,10 @@ static void world_process_local_boid_pairs(World *world, BoidList *local_boids,
}
*b2b_cmps_count = 0;
+ RPROF_STOP();
}
-static void world_compute_local_boids_simd(BoidList *local_boids, World *world, ChunkGrid *chunks) {
+static void world_compute_local_boids(BoidList *local_boids, World *world, ChunkGrid *chunks) {
Boid *boids = world->boids.data();
int boid_count = world->boids.size();
MemoryArena *arena = &world->frame_arena;
@@ -410,7 +305,7 @@ static void world_compute_local_boids_simd(BoidList *local_boids, World *world,
}
RPROF_STOP();
- RPROF_START("Calc dot products and ranges (simd)");
+ RPROF_START("Iterate over chunks");
for (int y = 0; y < chunks->height; y++) {
for (int x = 0; x < chunks->width; x++) {
size_t chunk_idx = chunkgrid_get_idx(chunks, x, y);
@@ -458,55 +353,13 @@ static void world_compute_local_boids_simd(BoidList *local_boids, World *world,
}
if (b2b_cmps_count > 2048*3) {
- world_process_local_boid_pairs(world, local_boids, b2b_cmps, &b2b_cmps_count);
+ world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count);
}
}
}
if (b2b_cmps_count > 0) {
- world_process_local_boid_pairs(world, local_boids, b2b_cmps, &b2b_cmps_count);
- }
-
- RPROF_STOP();
-}
-#endif
-
-static void world_compute_local_boids_scalar(BoidList *local_boids, World *world, ChunkGrid *chunks) {
- RPROF_START("Calc dot products and ranges (scalar)");
- for (int y = 0; y < chunks->height; y++) {
- for (int x = 0; x < chunks->width; x++) {
- BoidList *chunk = chunkgrid_get(chunks, x, y);
- if (chunk->count == 0) continue;
-
- uboid_t chunk_boids[chunk->count];
- boid_list_to_array(chunk_boids, chunk);
- for (int i = 0; i < chunk->count-1; i++) {
- uboid_t from_boid = chunk_boids[i];
- uboid_t *to_boids = &chunk_boids[i+1];
- uboid_t to_boids_count = chunk->count-i-1;
- assign_local_boids_b2l(world, local_boids, from_boid, to_boids, to_boids_count);
- }
-
- Vector2 neighbours[] = { { 1, 0 }, { 0, 1 }, { 1, 1 }, { -1, 1 } };
- for (int i = 0; i < ARRAY_LEN(neighbours); i++) {
- int chunk_y = y + neighbours[i].y;
- int chunk_x = x + neighbours[i].x;
- if (chunk_y < 0 || chunk_y >= chunks->height) continue;
- if (chunk_x < 0 || chunk_x >= chunks->width) continue;
-
- BoidList *neighbour_chunk = chunkgrid_get(chunks, chunk_x, chunk_y);
- if (neighbour_chunk->count == 0) continue;
-
- uboid_t neighbour_ids[neighbour_chunk->count];
- boid_list_to_array(neighbour_ids, neighbour_chunk);
-
- uboid_t boid1;
- BoidsListNodeIterator it1 = boid_list_get_iterator(chunk);
- while (boid_list_iterator_next(&it1, &boid1)) {
- assign_local_boids_b2l(world, local_boids, boid1, neighbour_ids, neighbour_chunk->count);
- }
- }
- }
+ world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count);
}
RPROF_STOP();
}
@@ -543,13 +396,7 @@ static BoidList* world_compute_local_boids(World *world) {
RPROF_STOP();
RPROF_START("world_compute_local_boids()");
-#ifdef __EMSCRIPTEN__
- // TODO: Rewrite simd version to only use SSE, not AVX2
- world_compute_local_boids_scalar(all_local_boids, world, &chunks);
-#else
- world_compute_local_boids_simd(all_local_boids, world, &chunks);
-#endif
-
+ world_compute_local_boids(all_local_boids, world, &chunks);
RPROF_STOP();
return all_local_boids;