improve performance to support around 2.5k boids

This commit is contained in:
Rokas Puzonas 2023-07-20 16:02:25 +03:00
parent ca52f12fda
commit c6fb55cbe6

View File

@ -11,6 +11,10 @@
#define RAYGUI_IMPLEMENTATION
#include "raygui.h"
#define RPROF_IMPLEMENTATION
// #define RPROF_ONLY_TOTAL_TIME
#include "rprof.h"
#include "boid-playground.hpp"
#include "raycast.cpp"
@ -194,18 +198,140 @@ static int get_boids_in_view_cone(Boid **boids_in_view, Boid *boid, float view_r
}
static void world_update(World *world, float dt) {
for (int i = 0; i < world->boids.size(); i++) {
Boid *boid = &world->boids[i];
Vector2 acc = { 1, 0 };
Boid *boids = world->boids.data();
int boid_count = world->boids.size();
Boid *local_boids[world->boids.size()];
int local_boids_count = get_boids_in_view_cone(local_boids, boid, world->view_radius, world->view_angle, world->boids.data(), world->boids.size());
RPROF_START("Create groups");
Boid **all_local_boids[boid_count];
int all_local_boid_counts[boid_count];
for (int i = 0; i < boid_count; i++) {
all_local_boids[i] = (Boid **)malloc(boid_count * sizeof(Boid*));
all_local_boid_counts[i] = 0;
}
RPROF_STOP();
// float chunk_size = world->size.x;
float chunk_size = world->view_radius/2;
int chunks_wide = std::ceil(world->size.x / chunk_size);
int chunks_high = std::ceil(world->size.y / chunk_size);
RPROF_START("Alloc chunks");
int *chunks[chunks_high][chunks_wide];
int chunk_boid_counts[chunks_high][chunks_wide];
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
chunks[y][x] = (int*)malloc(boid_count * sizeof(int));
chunk_boid_counts[y][x] = 0;
}
}
RPROF_STOP();
RPROF_START("Creating chunks");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &boids[i];
int chunk_x = boid->pos.x / chunk_size;
int chunk_y = boid->pos.y / chunk_size;
int *boids_in_chunk = &chunk_boid_counts[chunk_y][chunk_x];
chunks[chunk_y][chunk_x][*boids_in_chunk] = i;
(*boids_in_chunk)++;
}
RPROF_STOP();
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
float dot_threshold = cosf(world->view_angle/2);
RPROF_START("alloc checked_local_boids");
bool *checked_local_boids[boid_count];
for (int i = 0; i < boid_count; i++) {
checked_local_boids[i] = (bool*)calloc(boid_count, sizeof(bool));
}
RPROF_STOP();
RPROF_START("Calc dot products and ranges (chunked)");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &boids[i];
int boid_chunk_x = boid->pos.x / chunk_size;
int boid_chunk_y = boid->pos.y / chunk_size;
Vector2 chunk_offset[] = {
{ -1, -1, }, { 0, -1, }, { 1, -1, },
{ -1, 0, }, { 0, 0, }, { 1, 0, },
{ -1, 1, }, { 0, 1, }, { 1, 1, },
};
for (int j = 0; j < 9; j++) {
int chunk_x = boid_chunk_x + chunk_offset[j].x;
int chunk_y = boid_chunk_y + chunk_offset[j].y;
if (chunk_x < 0 || chunk_y < 0 || chunk_x >= chunks_wide || chunk_y >= chunks_high) continue;
int *chunk = chunks[chunk_y][chunk_x];
int boids_in_chunk = chunk_boid_counts[chunk_y][chunk_x];
for (int k = 0; k < boids_in_chunk; k++) {
int other_boid = chunk[k];
if (i == other_boid) continue;
if (checked_local_boids[i][other_boid]) continue;
if (checked_local_boids[other_boid][other_boid]) continue;
Vector2 offset = Vector2Subtract(boids[i].pos, boids[other_boid].pos);
bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius);
if (with_in_range) {
float dot = Vector2DotProduct(boids[i].dir, Vector2Normalize(offset));
if (-dot >= dot_threshold) {
int *count = &all_local_boid_counts[i];
all_local_boids[i][*count] = &boids[other_boid];
(*count)++;
}
if (dot >= dot_threshold) {
int *count = &all_local_boid_counts[other_boid];
all_local_boids[other_boid][*count] = &boids[i];
(*count)++;
}
}
checked_local_boids[i][other_boid] = true;
checked_local_boids[other_boid][i] = true;
}
}
}
RPROF_STOP();
RPROF_START("Free chunks");
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
free(chunks[y][x]);
}
}
RPROF_STOP();
RPROF_START("free checked_local_boids");
for (int i = 0; i < boid_count; i++) {
free(checked_local_boids[i]);
}
RPROF_STOP();
RPROF_START("Apply forces");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &world->boids[i];
Vector2 acc = { 0, 0 };
// Boid **local_boids = NULL;
// int local_boids_count = 0;
Boid **local_boids = all_local_boids[i];
int local_boids_count = all_local_boid_counts[i];
// Boid *local_boids[world->boids.size()];
// int local_boids_count = get_boids_in_view_cone(local_boids, boid, world->view_radius, world->view_angle, world->boids.data(), world->boids.size());
if (local_boids_count > 0) {
// LogTrace("i:%d", i);
// LogTrace("%d", local_boids_count);
Vector2 separation_force = { 0, 0 };
Vector2 flock_center = { 0, 0 };
Vector2 flock_heading = { 0, 0 };
for (int j = 0; j < local_boids_count; j++) {
// LogTrace("%d", local_boids);
flock_heading = Vector2Add(flock_heading, local_boids[j]->dir);
flock_center = Vector2Add(flock_center , local_boids[j]->pos);
@ -271,6 +397,13 @@ static void world_update(World *world, float dt) {
}
}
}
RPROF_STOP();
RPROF_START("Free groups");
for (int i = 0; i < boid_count; i++) {
free(all_local_boids[i]);
}
RPROF_STOP();
}
static void world_draw(World *world, Visuals *visuals) {
@ -398,6 +531,7 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
{
VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 };
GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 10, 200);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 10, 400);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI);
@ -419,11 +553,15 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Separation strength", &world->separation_strength, 0, 100, &ui->separation_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Collision avoidance strength", &world->collision_avoidance_strength, 0, 100, &ui->collision_avoidance_strength_edit);
}
float window_width = GetScreenWidth();
DrawFPS(window_width - 90, 10);
}
void UpdateDrawFrame() {
float dt = GetFrameTime();
RPROF_START("Update");
#ifdef PLATFORM_WEB
// If user goes to another tab and comes back, the time that the user was gone needs to be ignored.
// So boids wouldn't tunnel through walls and do other shenanigans.
@ -433,18 +571,96 @@ void UpdateDrawFrame() {
#else
world_update(&g_world, dt);
#endif
RPROF_STOP();
// Draw
BeginDrawing();
RPROF_START("Draw");
ClearBackground(g_visuals.bg_color);
world_draw(&g_world, &g_visuals);
ui_draw(&g_world, &g_visuals, &g_ui);
RPROF_STOP();
EndDrawing();
}
int estimate_maximum_boid_count(World *world) {
int boid_count = 1000;
int prev_boid_count = 0;
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
do {
world->boids.clear();
for (int i = 0; i < boid_count; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, 0);
g_world.boids.push_back(boid);
}
uint64_t best_duration = UINT64_MAX;
for (int i = 0; i < 10; i++) {
uint64_t start = rprof_read_cpu_timer();
world_update(&g_world, TIME_PER_FRAME);
uint64_t end = rprof_read_cpu_timer();
best_duration = std::min(best_duration, (end - start));
}
float duration_secs = (float)best_duration/cpu_hz;
// printf("duration: %f, err:%f", duration_secs, TIME_PER_FRAME - duration_secs);
int new_boid_count;
float diff = TIME_PER_FRAME - duration_secs;
if (diff < 0) {
if (prev_boid_count < boid_count) {
new_boid_count = (prev_boid_count + boid_count)/2;
} else {
new_boid_count = boid_count/2;
}
} else if (diff > 0) {
if (prev_boid_count > boid_count) {
new_boid_count = (prev_boid_count + boid_count)/2;
} else {
new_boid_count = boid_count*2;
}
}
prev_boid_count = boid_count;
boid_count = new_boid_count;
} while (std::abs(boid_count - prev_boid_count) > 10);
return boid_count;
}
int main() {
// printf("%d\n", estimate_maximum_boid_count(&g_world));
int screen_width = 1280;
int screen_height = 720;
g_world.size = { (float)screen_width, (float)screen_height };
float border = g_visuals.boid_edge_size;
for (int i = 0; i < 5000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
rprof_init();
for (int i = 0; i < 5; i++) {
world_update(&g_world, TIME_PER_FRAME);
}
rprof_end();
rprof_output(NULL);
return 0;
}
int test_main() {
SetTraceLogLevel(LOG_TRACE);
int screen_width = 1280;
@ -456,15 +672,20 @@ int main() {
g_world.size = { (float)screen_width, (float)screen_height };
float border = g_visuals.boid_edge_size;
for (int i = 0; i < 50; i++) {
float border = g_world.collision_avoidance_distance;
for (int i = 0; i < 3000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
// g_world.boids.push_back({ .pos = { 100, 100 }, .dir = { 1, 0 }, .speed = 10 });
// g_world.boids.push_back({ .pos = { 100, 500 }, .dir = { 1, 0 }, .speed = 10 });
GuiLoadStyleDefault();
rprof_init();
#ifdef PLATFORM_WEB
emscripten_set_main_loop(UpdateDrawFrame, 0, 1);
#else
@ -476,5 +697,9 @@ int main() {
window.Close();
rprof_end();
rprof_output(NULL);
return 0;
}