Compare commits

...

3 Commits

5 changed files with 48 additions and 22 deletions

View File

@ -14,10 +14,11 @@ WEB_HEAP_SIZE := 335544320
WEB_STACK_SIZE := 196608 WEB_STACK_SIZE := 196608
WEB_SHELL := src/shell.html WEB_SHELL := src/shell.html
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g -flto -msse4.2 -mavx DEBUG_MODE := no
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -flto -msse4.2 -mavx
COMPILER_FLAGS += -DRPROF_IMPLEMENTATION COMPILER_FLAGS += -DRPROF_IMPLEMENTATION
COMPILER_FLAGS += -DRAYGUI_IMPLEMENTATION COMPILER_FLAGS += -DRAYGUI_IMPLEMENTATION
# COMPILER_FLAGS += -DRLGL_IMPLEMENTATION
LINKER_FLAGS := -lraylib LINKER_FLAGS := -lraylib
# SOURCES := $(wildcard src/*.cpp) # SOURCES := $(wildcard src/*.cpp)
@ -25,6 +26,12 @@ SOURCES := src/main.cpp
COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/ COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/
ifeq ($(DEBUG_MODE), yes)
COMPILER_FLAGS += -DDEBUG -g
else
COMPILER_FLAGS += -DRPROF_STUB_OUT
endif
# ----------------- Prepare variables for targets ------------------ # ----------------- Prepare variables for targets ------------------
EXT := EXT :=

View File

@ -4,3 +4,5 @@
-DRLGL_IMPLEMENTATION -DRLGL_IMPLEMENTATION
-DRPROF_IMPLEMENTATION -DRPROF_IMPLEMENTATION
-DRAYGUI_IMPLEMENTATION -DRAYGUI_IMPLEMENTATION
-DDEBUG
-DSIMD256

View File

@ -9,7 +9,12 @@
#define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0])) #define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0]))
#define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__) #define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__)
#ifdef DEBUG
#define DEBUG_ASSERT(...) assert(__VA_ARGS__) #define DEBUG_ASSERT(...) assert(__VA_ARGS__)
#else
#define DEBUG_ASSERT(...)
#endif
typedef uint16_t uboid_t; typedef uint16_t uboid_t;
#define MAX_BOIDS (1 << (sizeof(uboid_t)*8)) #define MAX_BOIDS (1 << (sizeof(uboid_t)*8))

View File

@ -7,7 +7,6 @@
#include <emscripten/emscripten.h> #include <emscripten/emscripten.h>
#endif #endif
// #define RPROF_STUB_OUT
// #define RPROF_ONLY_TOTAL_TIME // #define RPROF_ONLY_TOTAL_TIME
#include "rprof.h" #include "rprof.h"
@ -33,8 +32,8 @@ void UpdateDrawFrame();
static void profiling_test(); static void profiling_test();
int main() { int main() {
profiling_test(); // profiling_test();
return 0; // return 0;
SetTraceLogLevel(LOG_TRACE); SetTraceLogLevel(LOG_TRACE);

View File

@ -170,6 +170,11 @@ static void chunkgrid_init(MemoryArena *arena, ChunkGrid *grid, int width, int h
} }
static int g_prof_interactions = 0; static int g_prof_interactions = 0;
#ifdef DEBUG
#define INCREMENT_INTERACTIONS() g_prof_interactions++;
#else
#define INCREMENT_INTERACTIONS()
#endif
static int nearest_multiple(int num, int divisor) { static int nearest_multiple(int num, int divisor) {
return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor; return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor;
@ -181,7 +186,7 @@ struct boid_pair {
}; };
#ifdef SIMD256 #ifdef SIMD256
#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \ #define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
_mm256_set_ps( \ _mm256_set_ps( \
boids[b2b_cmps[8*i+7].SIDE].FIELD, \ boids[b2b_cmps[8*i+7].SIDE].FIELD, \
boids[b2b_cmps[8*i+6].SIDE].FIELD, \ boids[b2b_cmps[8*i+6].SIDE].FIELD, \
@ -193,7 +198,7 @@ struct boid_pair {
boids[b2b_cmps[8*i+0].SIDE].FIELD \ boids[b2b_cmps[8*i+0].SIDE].FIELD \
) )
#else #else
#define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \ #define GET_F32_CHUNK_FROM_BOIDS(i, SIDE, FIELD) \
_mm_set_ps( \ _mm_set_ps( \
boids[b2b_cmps[4*i+3].SIDE].FIELD, \ boids[b2b_cmps[4*i+3].SIDE].FIELD, \
boids[b2b_cmps[4*i+2].SIDE].FIELD, \ boids[b2b_cmps[4*i+2].SIDE].FIELD, \
@ -220,6 +225,10 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids,
int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32); int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(&world->frame_arena, sizeof(int32_t)*SIMD_32B_LANES, 32);
int simd_iteration_count = nearest_multiple(*b2b_cmps_count, SIMD_32B_LANES)/SIMD_32B_LANES; int simd_iteration_count = nearest_multiple(*b2b_cmps_count, SIMD_32B_LANES)/SIMD_32B_LANES;
for (int i = *b2b_cmps_count; i < simd_iteration_count*8; i++) {
b2b_cmps[i] = { 0 };
}
for (int i = 0; i < simd_iteration_count; i++) { for (int i = 0; i < simd_iteration_count; i++) {
__simd from_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.x); __simd from_pos_x = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.x);
__simd from_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.y); __simd from_pos_y = GET_F32_CHUNK_FROM_BOIDS(i, from, pos.y);
@ -267,11 +276,11 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids,
uboid_t to_boid = b2b_cmps[cmp_idx].to; uboid_t to_boid = b2b_cmps[cmp_idx].to;
if (do_append_mask1_f32[j]) { if (do_append_mask1_f32[j]) {
boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid); boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid);
g_prof_interactions++; INCREMENT_INTERACTIONS();
} }
if (do_append_mask2_f32[j]) { if (do_append_mask2_f32[j]) {
boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid); boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid);
g_prof_interactions++; INCREMENT_INTERACTIONS();
} }
} }
} }
@ -280,18 +289,24 @@ static void world_calc_distances_and_angles(World *world, BoidList *local_boids,
RPROF_STOP(); RPROF_STOP();
} }
#define B2B_CAPACITY 1024*10
#define B2B_THRESHOLD B2B_CAPACITY*0.25
static inline void append_b2b_cmp(World *world, BoidList *local_boids, boid_pair *b2b_cmps, int *b2b_cmps_count, boid_pair b2b_cmp) {
if ((*b2b_cmps_count) == B2B_CAPACITY) {
world_calc_distances_and_angles(world, local_boids, b2b_cmps, b2b_cmps_count);
}
b2b_cmps[(*b2b_cmps_count)++] = b2b_cmp;
}
static void world_compute_local_boids(BoidList *local_boids, World *world, ChunkGrid *chunks) { static void world_compute_local_boids(BoidList *local_boids, World *world, ChunkGrid *chunks) {
Boid *boids = world->boids.data(); Boid *boids = world->boids.data();
int boid_count = world->boids.size(); int boid_count = world->boids.size();
MemoryArena *arena = &world->frame_arena; MemoryArena *arena = &world->frame_arena;
int b2b_padding = 8; boid_pair b2b_cmps[B2B_CAPACITY + SIMD_32B_LANES];
int b2b_capacity = 2048*10;
boid_pair b2b_cmps[b2b_capacity + b2b_padding];
int b2b_cmps_count = 0; int b2b_cmps_count = 0;
for (int i = 0; i < b2b_padding; i++) {
memset(&b2b_cmps[b2b_capacity + i], 0, sizeof(boid_pair));
}
RPROF_START("Move chunk data to static arrays"); RPROF_START("Move chunk data to static arrays");
uboid_t *static_chunks[chunks->width * chunks->height]; uboid_t *static_chunks[chunks->width * chunks->height];
@ -319,12 +334,11 @@ static void world_compute_local_boids(BoidList *local_boids, World *world, Chunk
uboid_t to_boids_count = chunk->count-i-1; uboid_t to_boids_count = chunk->count-i-1;
for (int j = 0; j < to_boids_count; j++) { for (int j = 0; j < to_boids_count; j++) {
// TODO: boid_pair b2b_cmp = {
// DEBUG_ASSERT(b2b_cmps_count < b2b_capacity-1);
b2b_cmps[b2b_cmps_count++] = {
.from = from_boid, .from = from_boid,
.to = to_boids[j] .to = to_boids[j]
}; };
append_b2b_cmp(world, local_boids, b2b_cmps, &b2b_cmps_count, b2b_cmp);
} }
} }
@ -342,17 +356,16 @@ static void world_compute_local_boids(BoidList *local_boids, World *world, Chunk
uboid_t *neighbour_boids = static_chunks[neighbour_idx]; uboid_t *neighbour_boids = static_chunks[neighbour_idx];
for (int i = 0; i < chunk->count; i++) { for (int i = 0; i < chunk->count; i++) {
for (int j = 0; j < neighbour_chunk->count; j++) { for (int j = 0; j < neighbour_chunk->count; j++) {
// TODO: boid_pair b2b_cmp = {
// DEBUG_ASSERT(b2b_cmps_count < b2b_capacity-1);
b2b_cmps[b2b_cmps_count++] = {
.from = chunk_boids[i], .from = chunk_boids[i],
.to = neighbour_boids[j] .to = neighbour_boids[j]
}; };
append_b2b_cmp(world, local_boids, b2b_cmps, &b2b_cmps_count, b2b_cmp);
} }
} }
} }
if (b2b_cmps_count > 2048*3) { if (b2b_cmps_count > B2B_THRESHOLD) {
world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count); world_calc_distances_and_angles(world, local_boids, b2b_cmps, &b2b_cmps_count);
} }
} }