Compare commits

...

3 Commits

5 changed files with 48 additions and 22 deletions

View File

@ -14,10 +14,11 @@ WEB_HEAP_SIZE := 335544320
WEB_STACK_SIZE := 196608
WEB_SHELL := src/shell.html
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g -flto -msse4.2 -mavx
DEBUG_MODE := no
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -flto -msse4.2 -mavx
COMPILER_FLAGS += -DRPROF_IMPLEMENTATION
COMPILER_FLAGS += -DRAYGUI_IMPLEMENTATION
# COMPILER_FLAGS += -DRLGL_IMPLEMENTATION
LINKER_FLAGS := -lraylib
# SOURCES := $(wildcard src/*.cpp)
@ -25,6 +26,12 @@ SOURCES := src/main.cpp
COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/
ifeq ($(DEBUG_MODE), yes)
COMPILER_FLAGS += -DDEBUG -g
else
COMPILER_FLAGS += -DRPROF_STUB_OUT
endif
# ----------------- Prepare variables for targets ------------------
EXT :=

View File

@ -4,3 +4,5 @@
-DRLGL_IMPLEMENTATION
-DRPROF_IMPLEMENTATION
-DRAYGUI_IMPLEMENTATION
-DDEBUG
-DSIMD256

View File

@ -9,7 +9,12 @@
#define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0]))
#define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__)
#ifdef DEBUG
#define DEBUG_ASSERT(...) assert(__VA_ARGS__)
#else
#define DEBUG_ASSERT(...)
#endif
typedef uint16_t uboid_t;
#define MAX_BOIDS (1 << (sizeof(uboid_t)*8))

View File

@ -7,7 +7,6 @@
#include <emscripten/emscripten.h>
#endif
// #define RPROF_STUB_OUT
// #define RPROF_ONLY_TOTAL_TIME
#include "rprof.h"
@ -33,8 +32,8 @@ void UpdateDrawFrame();
static void profiling_test();
int main() {
profiling_test();
return 0;
// profiling_test();
// return 0;
SetTraceLogLevel(LOG_TRACE);

View File

@ -170,6 +170,11 @@ static void chunkgrid_init(MemoryArena *arena, ChunkGrid *grid, int width, int h
}
static int g_prof_interactions = 0;
#ifdef DEBUG
#define INCREMENT_INTERACTIONS() g_prof_interactions++;
#else
#define INCREMENT_INTERACTIONS()
#endif
static int nearest_multiple(int num, int divisor) {
return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor;
@ -181,7 +186,7 @@ struct boid_pair {
};
#ifdef SIMD256
#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
_mm256_set_ps( \
boids[b2b_cmps[8*i+7].SIDE].FIELD, \
boids[b2b_cmps[8*i+6].SIDE].FIELD, \
@ -193,7 +198,7 @@ struct boid_pair {
boids[b2b_cmps[8*i+0].SIDE].FIELD \
)
#else
#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
_mm_set_ps( \
boids[b2b_cmps[4*i+3].SIDE].FIELD, \
boids[b2b_cmps[4*i+2].SIDE].FIELD, \
@ -220,6 +225,10 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids,
int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32);
int simd_iteration_count = nearest_multiple(*b2b_cmps_count, SIMD_32B_LANES)/SIMD_32B_LANES;
for (int i = *b2b_cmps_count; i < simd_iteration_count*8; i++) {
b2b_cmps[i] = { 0 };
}
for (int i = 0; i < simd_iteration_count; i++) {
__simd from_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.x);
__simd from_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.y);
@ -267,11 +276,11 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids,
uboid_t to_boid = b2b_cmps[cmp_idx].to;
if (do_append_mask1_f32[j]) {
boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid);
g_prof_interactions++;
INCREMENT_INTERACTIONS();
}
if (do_append_mask2_f32[j]) {
boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid);
g_prof_interactions++;
INCREMENT_INTERACTIONS();
}
}
}
@ -280,18 +289,24 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids,
RPROF_STOP();
}
#define B2B_CAPACITY 1024*10
#define B2B_THRESHOLD B2B_CAPACITY*0.25
static inline void append_b2b_cmp(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count, boid_pair b2b_cmp) {
if ((*b2b_cmps_count) == B2B_CAPACITY) {
world_calc_distances_and_angles(world, local_boids, b2b_cmps, b2b_cmps_count);
}
b2b_cmps[(*b2b_cmps_count)++] = b2b_cmp;
}
static void world_compute_local_boids(BoidList *local_boids, World *world, ChunkGrid *chunks) {
Boid *boids = world->boids.data();
int boid_count = world->boids.size();
MemoryArena *arena = &world->frame_arena;
int b2b_padding = 8;
int b2b_capacity = 2048*10;
boid_pair b2b_cmps[b2b_capacity + b2b_padding];
boid_pair b2b_cmps[B2B_CAPACITY + SIMD_32B_LANES];
int b2b_cmps_count = 0;
for (int i = 0; i < b2b_padding; i++) {
memset(&b2b_cmps[b2b_capacity + i], 0, sizeof(boid_pair));
}
RPROF_START("Move chunk data to static arrays");
uboid_t *static_chunks[chunks->width * chunks->height];
@ -319,12 +334,11 @@ static void world_compute_local_boids(BoidList *local_boids, World *world, Chunk
uboid_t to_boids_count = chunk->count-i-1;
for (int j = 0; j < to_boids_count; j++) {
// TODO:
// DEBUG_ASSERT(b2b_cmps_count < b2b_capacity-1);
b2b_cmps[b2b_cmps_count++] = {
boid_pair b2b_cmp = {
.from = from_boid,
.to = to_boids[j]
};
append_b2b_cmp(world, local_boids, b2b_cmps, &b2b_cmps_count, b2b_cmp);
}
}
@ -342,17 +356,16 @@ static void world_compute_local_boids(BoidList *local_boids, World *world, Chunk
uboid_t *neighbour_boids = static_chunks[neighbour_idx];
for (int i = 0; i < chunk->count; i++) {
for (int j = 0; j < neighbour_chunk->count; j++) {
// TODO:
// DEBUG_ASSERT(b2b_cmps_count < b2b_capacity-1);
b2b_cmps[b2b_cmps_count++] = {
boid_pair b2b_cmp = {
.from = chunk_boids[i],
.to = neighbour_boids[j]
};
append_b2b_cmp(world, local_boids, b2b_cmps, &b2b_cmps_count, b2b_cmp);
}
}
}
if (b2b_cmps_count > 2048*3) {
if (b2b_cmps_count > B2B_THRESHOLD) {
world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count);
}
}