From dc34800facb734e951605b5c76e7678f5988bba3 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Wed, 2 Aug 2023 23:31:36 +0300 Subject: [PATCH] tune boid to boid buffer size and threshold --- src/world.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/world.cpp b/src/world.cpp index ca56bd1..a9add17 100644 --- a/src/world.cpp +++ b/src/world.cpp @@ -225,6 +225,10 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids, int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32); int simd_iteration_count = nearest_multiple(*b2b_cmps_count, SIMD_32B_LANES)/SIMD_32B_LANES; + for (int i = *b2b_cmps_count; i < simd_iteration_count*8; i++) { + b2b_cmps[i] = { 0 }; + } + for (int i = 0; i < simd_iteration_count; i++) { __simd from_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.x); __simd from_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.y); @@ -285,11 +289,11 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids, RPROF_STOP(); } -#define B2B_CAPACITY 2048*10 -#define B2B_THRESHOLD B2B_CAPACITY * 0.5 +#define B2B_CAPACITY 1024*8 +#define B2B_THRESHOLD B2B_CAPACITY*0.25 static inline void append_b2b_cmp(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count, boid_pair b2b_cmp) { - if (*b2b_cmps_count == B2B_CAPACITY) { + if ((*b2b_cmps_count) == B2B_CAPACITY) { world_calc_distances_and_angles(world, local_boids, b2b_cmps, b2b_cmps_count); } @@ -301,12 +305,8 @@ static void world_compute_local_boids(BoidList *local_boids, World *world, Chunk int boid_count = world->boids.size(); MemoryArena *arena = &world->frame_arena; - int b2b_padding = 8; - boid_pair b2b_cmps[B2B_CAPACITY + b2b_padding]; + boid_pair b2b_cmps[B2B_CAPACITY + SIMD_32B_LANES]; int b2b_cmps_count = 0; - for (int i = 0; i < b2b_padding; i++) { - memset(&b2b_cmps[B2B_CAPACITY + i], 0, sizeof(boid_pair)); - } RPROF_START("Move chunk data to static arrays"); uboid_t *static_chunks[chunks->width * chunks->height];