From be2c56fb28d84d705b20108097024b880611c7e8 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Wed, 2 Aug 2023 21:34:12 +0300 Subject: [PATCH] add define to switch between simd 128 and 256 --- Makefile | 5 +- src/main.cpp | 15 ++- src/shell.html | 6 +- src/simd.cpp | 46 ++++++++ src/world.cpp | 285 ++++++++++++------------------------------------- 5 files changed, 124 insertions(+), 233 deletions(-) create mode 100644 src/simd.cpp diff --git a/Makefile b/Makefile index 1f671fc..6bea4a1 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ WEB_HEAP_SIZE := 335544320 WEB_STACK_SIZE := 196608 WEB_SHELL := src/shell.html -COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g -flto +COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g -flto -msse4.2 -mavx COMPILER_FLAGS += -DRPROF_IMPLEMENTATION COMPILER_FLAGS += -DRAYGUI_IMPLEMENTATION # COMPILER_FLAGS += -DRLGL_IMPLEMENTATION @@ -75,6 +75,7 @@ ifeq ($(PLATFORM), web) EMSCRIPTEN_PATH ?= $(EMSDK_PATH)/upstream/emscripten COMPILER_FLAGS += -I$(EMSCRIPTEN_PATH)/cache/sysroot/include COMPILER_FLAGS += -D_DEFAULT_SOURCE + COMPILER_FLAGS += -msimd128 LINKER_FLAGS += -s USE_GLFW=3 LINKER_FLAGS += -s FORCE_FILESYSTEM=1 LINKER_FLAGS += $(RAYLIB_RELEASE_PATH)/libraylib.a @@ -83,7 +84,7 @@ ifeq ($(PLATFORM), web) LINKER_FLAGS += -s STACK_SIZE=$(WEB_STACK_SIZE) LIB_DEPENDENCIES += emsdk else - COMPILER_FLAGS += -march=native + COMPILER_FLAGS += -mavx2 -mfma -DSIMD256 endif LINKER_FLAGS += -L$(RAYLIB_RELEASE_PATH) diff --git a/src/main.cpp b/src/main.cpp index 7c3008f..68d2922 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -19,10 +19,6 @@ #include "world.cpp" #include "ui.cpp" -//#define USE_TEST_MAIN - -// #include "raygui.h" - #define FRAMERATE 60 #define TIME_PER_FRAME (1.0/FRAMERATE) @@ -37,8 +33,8 @@ void UpdateDrawFrame(); static void profiling_test(); int main() { - // profiling_test(); - // return 0; + profiling_test(); + return 0; SetTraceLogLevel(LOG_TRACE); @@ -89,7 +85,7 @@ static void profiling_test() { SetRandomSeed(10); float border = g_visuals.boid_edge_size; - for (int i = 0; i < 45000; i++) { + for (int i = 0; i < 50000; i++) { Boid boid; boid_rand_init(&g_world, &boid, border); g_world.boids.push_back(boid); @@ -105,8 +101,9 @@ static void profiling_test() { rprof_end(); printf("interactions: %d\n", g_prof_interactions); - if (g_prof_interactions != 33119854) { // 22 051 739 - printf("!!!!!! ITERACTIONS DONT MATCH, %d\n", g_prof_interactions - 33119854); + int expected_interactions = 40501984; + if (g_prof_interactions != expected_interactions) { // 22 051 739 + printf("!!!!!! ITERACTIONS DONT MATCH, %d\n", g_prof_interactions - expected_interactions); } rprof_output(NULL); diff --git a/src/shell.html b/src/shell.html index 32fb4b5..e80b09f 100644 --- a/src/shell.html +++ b/src/shell.html @@ -15,9 +15,9 @@ - - - + + + diff --git a/src/simd.cpp b/src/simd.cpp new file mode 100644 index 0000000..2eb9254 --- /dev/null +++ b/src/simd.cpp @@ -0,0 +1,46 @@ +#include +#include + +#ifdef SIMD256 +#define SIMD_BITS 256 +typedef __m256 __simd; +typedef __m256i __simdi; + +static inline bool mm_is_zero(__simdi x) { + return _mm256_testz_si256(x, x); +} + +#define mm_set1_ps(x) _mm256_set1_ps(x) +#define mm_sub_ps(a, b) _mm256_sub_ps(a, b) +#define mm_mul_ps(a, b) _mm256_mul_ps(a, b) +#define mm_and_ps(a, b) _mm256_and_ps(a, b) +#define mm_store_ps(a, b) _mm256_store_ps(a, b) +#define mm_fmadd_ps(a, b, c) _mm256_fmadd_ps(a, b, c) +#define mm_cmp_ps(a, b, c) _mm256_cmp_ps(a, b, c) +#define mm_rsqrt_ps(a) _mm256_rsqrt_ps(a) +#define mm_blendv_ps(a, b, c) _mm256_blendv_ps(a, b, c) +#define mm_cmpeq_epi32(a, b) _mm256_cmpeq_epi32(a, b) +#else +#define SIMD_BITS 128 +typedef __m128 __simd; +typedef __m128i __simdi; + +static inline bool mm_is_zero(__simdi x) { + return _mm_test_all_zeros(x, _mm_set1_epi8(0xFF)); +} + +#define mm_set1_ps(x) _mm_set1_ps(x) +#define mm_sub_ps(a, b) _mm_sub_ps(a, b) +#define mm_mul_ps(a, b) _mm_mul_ps(a, b) +#define mm_and_ps(a, b) _mm_and_ps(a, b) +#define mm_store_ps(a, b) _mm_store_ps(a, b) +#define mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps(a, b), c) +#define mm_cmp_ps(a, b, c) _mm_cmp_ps(a, b, c) +#define mm_rsqrt_ps(a) _mm_rsqrt_ps(a) +#define mm_blendv_ps(a, b, c) _mm_blendv_ps(a, b, c) +#define mm_cmpeq_epi32(a, b) _mm_cmpeq_epi32(a, b) +#endif + +#define SIMD_32B_LANES (SIMD_BITS/32) + +#define mm_is_zero(x) mm_is_zero((__simdi)x) diff --git a/src/world.cpp b/src/world.cpp index f546c58..fb13773 100644 --- a/src/world.cpp +++ b/src/world.cpp @@ -3,7 +3,7 @@ #include "rprof.h" #include "boid-list.hpp" -#include +#include "simd.cpp" static float vector2_atan2(Vector2 a) { return std::atan2(a.y, a.x); @@ -175,198 +175,92 @@ static int nearest_multiple(int num, int divisor) { return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor; } -// b2l = boid to (list of boids) comparison -static void assign_local_boids_b2l(World *world, BoidList *local_boids, uboid_t from_boid, uboid_t *to_boids, uboid_t to_boids_count) { - Boid *boids = world->boids.data(); - - // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); - float dot_threshold = cosf(world->view_angle/2); - float view_radius_sqr = world->view_radius * world->view_radius; - - for (int i = 0; i < to_boids_count; i++) { - uint16_t to_boid = to_boids[i]; - assert(to_boid != from_boid); - - Vector2 offset = Vector2Subtract(boids[from_boid].pos, boids[to_boid].pos); - float length_sqr = Vector2LengthSqr(offset); - if (length_sqr > view_radius_sqr) continue; - - Vector2 normalized = offset; - if (length_sqr != 0) - { - float ilength = 1.0f/sqrtf(length_sqr); - normalized.x *= ilength; - normalized.y *= ilength; - } - - if (Vector2DotProduct(boids[from_boid].dir, Vector2Negate(normalized)) >= dot_threshold) { - boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid); - g_prof_interactions++; - } - if (Vector2DotProduct(boids[to_boid].dir, normalized) >= dot_threshold) { - boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid); - g_prof_interactions++; - } - } -} - -#ifndef __EMSCRIPTEN__ -static void print_m256_f32(__m256 value) { - float *value_f32 = (float*)&value; - printf("%f", value_f32[0]); - for (int i = 1; i < 8; i++) { - printf(",%f", value_f32[i]); - } - printf("\n"); -} - -static inline bool mm256_is_zero(__m256i x) { - return _mm256_testz_si256(x, x); -} -static inline bool mm256_is_zero(__m256 x) { - return mm256_is_zero((__m256i)x); -} - struct boid_pair { uboid_t from; uboid_t to; }; -static void world_process_local_boid_pairs(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count) { +#ifdef SIMD256 +#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \ + _mm256_set_ps( \ + boids[b2b_cmps[8*i+7].SIDE].FIELD, \ + boids[b2b_cmps[8*i+6].SIDE].FIELD, \ + boids[b2b_cmps[8*i+5].SIDE].FIELD, \ + boids[b2b_cmps[8*i+4].SIDE].FIELD, \ + boids[b2b_cmps[8*i+3].SIDE].FIELD, \ + boids[b2b_cmps[8*i+2].SIDE].FIELD, \ + boids[b2b_cmps[8*i+1].SIDE].FIELD, \ + boids[b2b_cmps[8*i+0].SIDE].FIELD \ + ) +#else +#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \ + _mm_set_ps( \ + boids[b2b_cmps[4*i+3].SIDE].FIELD, \ + boids[b2b_cmps[4*i+2].SIDE].FIELD, \ + boids[b2b_cmps[4*i+1].SIDE].FIELD, \ + boids[b2b_cmps[4*i+0].SIDE].FIELD \ + ) +#endif +static void world_calc_distances_and_angles(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count) { + RPROF_START("Calc dot products and distances"); // Simplified from: // float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); // | // v // float dot_threshold = cosf(world->view_angle/2); - __m256 dot_threshold = _mm256_set1_ps(cosf(world->view_angle/2)); + __simd dot_threshold = mm_set1_ps(cosf(world->view_angle/2)); float view_radius_sqr = world->view_radius * world->view_radius; - __m256 view_radius = _mm256_set1_ps(view_radius_sqr); - __m256 zero = _mm256_set1_ps(0); - __m256 negative_one = _mm256_set1_ps(-1); + __simd view_radius = mm_set1_ps(view_radius_sqr); + __simd zero = mm_set1_ps(0); + __simd negative_one = mm_set1_ps(-1); Boid *boids = world->boids.data(); - int32_t *do_append_mask1_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*8, 32); - int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*8, 32); + int32_t *do_append_mask1_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32); + int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32); - int simd_iteration_count = nearest_multiple(*b2b_cmps_count, 8)/8; + int simd_iteration_count = nearest_multiple(*b2b_cmps_count, SIMD_32B_LANES)/SIMD_32B_LANES; for (int i = 0; i < simd_iteration_count; i++) { - __m256 from_pos_x = _mm256_set_ps( - boids[b2b_cmps[8*i+7].from].pos.x, - boids[b2b_cmps[8*i+6].from].pos.x, - boids[b2b_cmps[8*i+5].from].pos.x, - boids[b2b_cmps[8*i+4].from].pos.x, - boids[b2b_cmps[8*i+3].from].pos.x, - boids[b2b_cmps[8*i+2].from].pos.x, - boids[b2b_cmps[8*i+1].from].pos.x, - boids[b2b_cmps[8*i+0].from].pos.x - ); + __simd from_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.x); + __simd from_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.y); - __m256 from_pos_y = _mm256_set_ps( - boids[b2b_cmps[8*i+7].from].pos.y, - boids[b2b_cmps[8*i+6].from].pos.y, - boids[b2b_cmps[8*i+5].from].pos.y, - boids[b2b_cmps[8*i+4].from].pos.y, - boids[b2b_cmps[8*i+3].from].pos.y, - boids[b2b_cmps[8*i+2].from].pos.y, - boids[b2b_cmps[8*i+1].from].pos.y, - boids[b2b_cmps[8*i+0].from].pos.y - ); + __simd to_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, to, pos.x); + __simd to_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, to, pos.y); - __m256 to_pos_x = _mm256_set_ps( - boids[b2b_cmps[8*i+7].to].pos.x, - boids[b2b_cmps[8*i+6].to].pos.x, - boids[b2b_cmps[8*i+5].to].pos.x, - boids[b2b_cmps[8*i+4].to].pos.x, - boids[b2b_cmps[8*i+3].to].pos.x, - boids[b2b_cmps[8*i+2].to].pos.x, - boids[b2b_cmps[8*i+1].to].pos.x, - boids[b2b_cmps[8*i+0].to].pos.x - ); + __simd sub_x = mm_sub_ps(from_pos_x, to_pos_x); + __simd sub_y = mm_sub_ps(from_pos_y, to_pos_y); - __m256 to_pos_y = _mm256_set_ps( - boids[b2b_cmps[8*i+7].to].pos.y, - boids[b2b_cmps[8*i+6].to].pos.y, - boids[b2b_cmps[8*i+5].to].pos.y, - boids[b2b_cmps[8*i+4].to].pos.y, - boids[b2b_cmps[8*i+3].to].pos.y, - boids[b2b_cmps[8*i+2].to].pos.y, - boids[b2b_cmps[8*i+1].to].pos.y, - boids[b2b_cmps[8*i+0].to].pos.y - ); + __simd length_sqr = mm_fmadd_ps(sub_y, sub_y, mm_mul_ps(sub_x, sub_x)); + __simdi in_range_mask = (__simdi)mm_cmp_ps(length_sqr, view_radius, _CMP_LE_OQ); + if (mm_is_zero(in_range_mask)) continue; - __m256 sub_x = _mm256_sub_ps(from_pos_x, to_pos_x); - __m256 sub_y = _mm256_sub_ps(from_pos_y, to_pos_y); + __simd from_dir_x = GET_F32_CHUNK_FROM_BOIDS(i, from, dir.x); + __simd from_dir_y = GET_F32_CHUNK_FROM_BOIDS(i, from, dir.y); - __m256 length_sqr = _mm256_fmadd_ps(sub_y, sub_y, _mm256_mul_ps(sub_x, sub_x)); - __m256i in_range_mask = (__m256i)_mm256_cmp_ps(length_sqr, view_radius, _CMP_LE_OQ); - if (mm256_is_zero(in_range_mask)) continue; + __simd to_dir_x = GET_F32_CHUNK_FROM_BOIDS(i, to, dir.x); + __simd to_dir_y = GET_F32_CHUNK_FROM_BOIDS(i, to, dir.y); - __m256 from_dir_x = _mm256_set_ps( - boids[b2b_cmps[8*i+7].from].dir.x, - boids[b2b_cmps[8*i+6].from].dir.x, - boids[b2b_cmps[8*i+5].from].dir.x, - boids[b2b_cmps[8*i+4].from].dir.x, - boids[b2b_cmps[8*i+3].from].dir.x, - boids[b2b_cmps[8*i+2].from].dir.x, - boids[b2b_cmps[8*i+1].from].dir.x, - boids[b2b_cmps[8*i+0].from].dir.x - ); + __simd is_length_zero = (__simd)mm_cmpeq_epi32((__simdi)length_sqr, (__simdi)zero); + __simd ilength = mm_blendv_ps(mm_rsqrt_ps(length_sqr), zero, is_length_zero); - __m256 from_dir_y = _mm256_set_ps( - boids[b2b_cmps[8*i+7].from].dir.y, - boids[b2b_cmps[8*i+6].from].dir.y, - boids[b2b_cmps[8*i+5].from].dir.y, - boids[b2b_cmps[8*i+4].from].dir.y, - boids[b2b_cmps[8*i+3].from].dir.y, - boids[b2b_cmps[8*i+2].from].dir.y, - boids[b2b_cmps[8*i+1].from].dir.y, - boids[b2b_cmps[8*i+0].from].dir.y - ); + __simd x_norm = mm_mul_ps(sub_x, ilength); + __simd y_norm = mm_mul_ps(sub_y, ilength); - __m256 to_dir_x = _mm256_set_ps( - boids[b2b_cmps[8*i+7].to].dir.x, - boids[b2b_cmps[8*i+6].to].dir.x, - boids[b2b_cmps[8*i+5].to].dir.x, - boids[b2b_cmps[8*i+4].to].dir.x, - boids[b2b_cmps[8*i+3].to].dir.x, - boids[b2b_cmps[8*i+2].to].dir.x, - boids[b2b_cmps[8*i+1].to].dir.x, - boids[b2b_cmps[8*i+0].to].dir.x - ); + __simd x_neg_norm = mm_mul_ps(x_norm, negative_one); + __simd y_neg_norm = mm_mul_ps(y_norm, negative_one); - __m256 to_dir_y = _mm256_set_ps( - boids[b2b_cmps[8*i+7].to].dir.y, - boids[b2b_cmps[8*i+6].to].dir.y, - boids[b2b_cmps[8*i+5].to].dir.y, - boids[b2b_cmps[8*i+4].to].dir.y, - boids[b2b_cmps[8*i+3].to].dir.y, - boids[b2b_cmps[8*i+2].to].dir.y, - boids[b2b_cmps[8*i+1].to].dir.y, - boids[b2b_cmps[8*i+0].to].dir.y - ); + __simd dot_product1 = mm_fmadd_ps(from_dir_y, y_neg_norm, mm_mul_ps(from_dir_x, x_neg_norm)); + __simd in_angle_mask1 = mm_cmp_ps(dot_product1, dot_threshold, _CMP_GE_OQ); + __simd do_append_mask1 = mm_and_ps(in_angle_mask1, (__simd)in_range_mask); - __m256 is_length_zero = (__m256)_mm256_cmpeq_epi32((__m256i)length_sqr, (__m256i)zero); - __m256 ilength = _mm256_blendv_ps(_mm256_rsqrt_ps(length_sqr), zero, is_length_zero); + __simd dot_product2 = mm_fmadd_ps(to_dir_y, y_norm, mm_mul_ps(to_dir_x, x_norm)); + __simd in_angle_mask2 = mm_cmp_ps(dot_product2, dot_threshold, _CMP_GE_OQ); + __simd do_append_mask2 = mm_and_ps(in_angle_mask2, (__simd)in_range_mask); - __m256 x_norm = _mm256_mul_ps(sub_x, ilength); - __m256 y_norm = _mm256_mul_ps(sub_y, ilength); - - __m256 x_neg_norm = _mm256_mul_ps(x_norm, negative_one); - __m256 y_neg_norm = _mm256_mul_ps(y_norm, negative_one); - - __m256 dot_product1 = _mm256_fmadd_ps(from_dir_y, y_neg_norm, _mm256_mul_ps(from_dir_x, x_neg_norm)); - __m256 in_angle_mask1 = _mm256_cmp_ps(dot_product1, dot_threshold, _CMP_GE_OQ); - __m256 do_append_mask1 = _mm256_and_ps(in_angle_mask1, (__m256)in_range_mask); - - __m256 dot_product2 = _mm256_fmadd_ps(to_dir_y, y_norm, _mm256_mul_ps(to_dir_x, x_norm)); - __m256 in_angle_mask2 = _mm256_cmp_ps(dot_product2, dot_threshold, _CMP_GE_OQ); - __m256 do_append_mask2 = _mm256_and_ps(in_angle_mask2, (__m256)in_range_mask); - - _mm256_store_ps((float*)do_append_mask1_f32, do_append_mask1); - _mm256_store_ps((float*)do_append_mask2_f32, do_append_mask2); - for (int j = 0; j < 8; j++) { - uboid_t cmp_idx = 8*i + j; + mm_store_ps((float*)do_append_mask1_f32, do_append_mask1); + mm_store_ps((float*)do_append_mask2_f32, do_append_mask2); + for (int j = 0; j < SIMD_32B_LANES; j++) { + uboid_t cmp_idx = SIMD_32B_LANES*i + j; if (cmp_idx >= *b2b_cmps_count) break; uboid_t from_boid = b2b_cmps[cmp_idx].from; @@ -383,9 +277,10 @@ static void world_process_local_boid_pairs(World *world, BoidList *local_boids, } *b2b_cmps_count = 0; + RPROF_STOP(); } -static void world_compute_local_boids_simd(BoidList *local_boids, World *world, ChunkGrid *chunks) { +static void world_compute_local_boids(BoidList *local_boids, World *world, ChunkGrid *chunks) { Boid *boids = world->boids.data(); int boid_count = world->boids.size(); MemoryArena *arena = &world->frame_arena; @@ -410,7 +305,7 @@ static void world_compute_local_boids_simd(BoidList *local_boids, World *world, } RPROF_STOP(); - RPROF_START("Calc dot products and ranges (simd)"); + RPROF_START("Iterate over chunks"); for (int y = 0; y < chunks->height; y++) { for (int x = 0; x < chunks->width; x++) { size_t chunk_idx = chunkgrid_get_idx(chunks, x, y); @@ -458,55 +353,13 @@ static void world_compute_local_boids_simd(BoidList *local_boids, World *world, } if (b2b_cmps_count > 2048*3) { - world_process_local_boid_pairs(world, local_boids, b2b_cmps, &b2b_cmps_count); + world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count); } } } if (b2b_cmps_count > 0) { - world_process_local_boid_pairs(world, local_boids, b2b_cmps, &b2b_cmps_count); - } - - RPROF_STOP(); -} -#endif - -static void world_compute_local_boids_scalar(BoidList *local_boids, World *world, ChunkGrid *chunks) { - RPROF_START("Calc dot products and ranges (scalar)"); - for (int y = 0; y < chunks->height; y++) { - for (int x = 0; x < chunks->width; x++) { - BoidList *chunk = chunkgrid_get(chunks, x, y); - if (chunk->count == 0) continue; - - uboid_t chunk_boids[chunk->count]; - boid_list_to_array(chunk_boids, chunk); - for (int i = 0; i < chunk->count-1; i++) { - uboid_t from_boid = chunk_boids[i]; - uboid_t *to_boids = &chunk_boids[i+1]; - uboid_t to_boids_count = chunk->count-i-1; - assign_local_boids_b2l(world, local_boids, from_boid, to_boids, to_boids_count); - } - - Vector2 neighbours[] = { { 1, 0 }, { 0, 1 }, { 1, 1 }, { -1, 1 } }; - for (int i = 0; i < ARRAY_LEN(neighbours); i++) { - int chunk_y = y + neighbours[i].y; - int chunk_x = x + neighbours[i].x; - if (chunk_y < 0 || chunk_y >= chunks->height) continue; - if (chunk_x < 0 || chunk_x >= chunks->width) continue; - - BoidList *neighbour_chunk = chunkgrid_get(chunks, chunk_x, chunk_y); - if (neighbour_chunk->count == 0) continue; - - uboid_t neighbour_ids[neighbour_chunk->count]; - boid_list_to_array(neighbour_ids, neighbour_chunk); - - uboid_t boid1; - BoidsListNodeIterator it1 = boid_list_get_iterator(chunk); - while (boid_list_iterator_next(&it1, &boid1)) { - assign_local_boids_b2l(world, local_boids, boid1, neighbour_ids, neighbour_chunk->count); - } - } - } + world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count); } RPROF_STOP(); } @@ -543,13 +396,7 @@ static BoidList* world_compute_local_boids(World *world) { RPROF_STOP(); RPROF_START("world_compute_local_boids()"); -#ifdef __EMSCRIPTEN__ - // TODO: Rewrite simd version to only use SSE, not AVX2 - world_compute_local_boids_scalar(all_local_boids, world, &chunks); -#else - world_compute_local_boids_simd(all_local_boids, world, &chunks); -#endif - + world_compute_local_boids(all_local_boids, world, &chunks); RPROF_STOP(); return all_local_boids;