From 5af509f4fd895d6e990fe683a234b160e4dd25bc Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Mon, 31 Jul 2023 00:10:49 +0300 Subject: [PATCH] use simd for finding neigbours between boids --- Makefile | 4 +- src/boid-list.cpp | 49 ++- src/boid-list.hpp | 30 +- src/boid-playground.hpp | 16 +- src/main.cpp | 629 ++++------------------------------- src/raycast.cpp | 16 +- src/raycast.hpp | 16 + src/rprof.h | 4 +- src/ui.cpp | 105 ++++++ src/world.cpp | 712 ++++++++++++++++++++++++++++++++++++++++ 10 files changed, 963 insertions(+), 618 deletions(-) create mode 100644 src/raycast.hpp create mode 100644 src/ui.cpp create mode 100644 src/world.cpp diff --git a/Makefile b/Makefile index a0ebeb8..457edfa 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ WEB_HEAP_SIZE := 335544320 WEB_STACK_SIZE := 196608 WEB_SHELL := src/shell.html -COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O2 +COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g LINKER_FLAGS := -lraylib # SOURCES := $(wildcard src/*.cpp) @@ -79,6 +79,8 @@ ifeq ($(PLATFORM), web) LINKER_FLAGS += -s TOTAL_MEMORY=$(WEB_HEAP_SIZE) LINKER_FLAGS += -s STACK_SIZE=$(WEB_STACK_SIZE) LIB_DEPENDENCIES += emsdk +else + COMPILER_FLAGS += -march=native endif LINKER_FLAGS += -L$(RAYLIB_RELEASE_PATH) diff --git a/src/boid-list.cpp b/src/boid-list.cpp index 27ecde0..77641e4 100644 --- a/src/boid-list.cpp +++ b/src/boid-list.cpp @@ -2,11 +2,17 @@ #include "boid-list.hpp" -static BoidsListNodeIterator boid_list_get_iterator(BoidsListNode *node, uint16_t count) { - return { .count = count, .i = 0, .node = node }; +static void boid_list_init(BoidList *list) +{ + list->node.next = NULL; + list->count = 0; } -static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *value) { +static BoidsListNodeIterator boid_list_get_iterator(BoidList *list) { + return { .count = list->count, .i = 0, .node = &list->node }; +} + +static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uboid_t *value) { if (iterator->count == 0) { return false; } @@ -22,10 +28,10 @@ static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *v return true; } -static void boid_list_append(MemoryArena *arena, BoidsListNode *node, uint16_t *count, uint16_t new_boid) { - int left_count = *count; - BoidsListNode *prev = node; - BoidsListNode *curr = node; +static void boid_list_append(MemoryArena *arena, BoidList *list, uboid_t new_boid) { + int left_count = list->count; + BoidListNode *prev = &list->node; + BoidListNode *curr = &list->node; while (left_count >= BOIDS_PER_NODE && curr) { prev = curr; curr = curr->next; @@ -33,19 +39,19 @@ static void boid_list_append(MemoryArena *arena, BoidsListNode *node, uint16_t * } if (curr == NULL) { - curr = (BoidsListNode*)arena_malloc(arena, sizeof(BoidsListNode)); + curr = (BoidListNode*)arena_malloc(arena, sizeof(BoidListNode)); curr->next = NULL; prev->next = curr; } curr->boid_ids[left_count] = new_boid; - (*count)++; + list->count++; } -static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boids, uint16_t *count, uint16_t new_boid) { - int left_count = *count; - BoidsListNode *last = local_boids; - BoidsListNode *curr = local_boids; +static void boid_list_append_unique(MemoryArena *arena, BoidList *list, uboid_t new_boid) { + int left_count = list->count; + BoidListNode *last = &list->node; + BoidListNode *curr = &list->node; while (left_count > 0 && curr) { for (int i = 0; i < std::min(left_count, BOIDS_PER_NODE); i++) { if (curr->boid_ids[i] == new_boid) return; @@ -56,12 +62,23 @@ static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boi left_count -= BOIDS_PER_NODE; } - int idx = (*count) % BOIDS_PER_NODE; + int idx = list->count % BOIDS_PER_NODE; if (idx == BOIDS_PER_NODE-1) { - last->next = (BoidsListNode*)arena_malloc(arena, sizeof(BoidsListNode)); + last->next = (BoidListNode*)arena_malloc(arena, sizeof(BoidListNode)); last->next->next = NULL; } last->boid_ids[idx] = new_boid; - (*count)++; + list->count++; +} + +static void boid_list_to_array(uboid_t *result, BoidList *list) +{ + int i = 0; + uboid_t boid; + BoidsListNodeIterator it = boid_list_get_iterator(list); + while (boid_list_iterator_next(&it, &boid)) { + result[i] = boid; + i++; + } } diff --git a/src/boid-list.hpp b/src/boid-list.hpp index 27c274e..60b07f3 100644 --- a/src/boid-list.hpp +++ b/src/boid-list.hpp @@ -1,22 +1,32 @@ +#pragma once #include +#include "boid-playground.hpp" #include "memory-arena.hpp" -#define BOIDS_PER_NODE 128 +#define BOIDS_PER_NODE 64 -struct BoidsListNode { - BoidsListNode *next; - uint16_t boid_ids[BOIDS_PER_NODE]; +struct BoidListNode { + BoidListNode *next; + uboid_t boid_ids[BOIDS_PER_NODE]; +}; + +struct BoidList { + BoidListNode node; + uboid_t count; }; struct BoidsListNodeIterator { - uint16_t count; + uboid_t count; int i; - BoidsListNode *node; + BoidListNode *node; }; -static BoidsListNodeIterator boid_list_get_iterator(BoidsListNode *node, uint16_t count); -static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *value); +static BoidsListNodeIterator boid_list_get_iterator(BoidList *list); +static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uboid_t *value); -static void boid_list_append(MemoryArena *arena, BoidsListNode *node, uint16_t *count, uint16_t new_boid); -static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boids, uint16_t *count, uint16_t new_boid); +static void boid_list_init(BoidList *list); +static void boid_list_append(MemoryArena *arena, BoidList *list, uboid_t new_boid); +static void boid_list_append_unique(MemoryArena *arena, BoidList *list, uboid_t new_boid); + +static void boid_list_to_array(uboid_t *result, BoidList *list); diff --git a/src/boid-playground.hpp b/src/boid-playground.hpp index 62a943d..59a05b1 100644 --- a/src/boid-playground.hpp +++ b/src/boid-playground.hpp @@ -3,14 +3,16 @@ #include #include #include +#include "rlgl.h" #include "memory-arena.hpp" #define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0])) #define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__) -#define ASSERT(...) assert(__VA_ARGS__) +#define DEBUG_ASSERT(...) assert(__VA_ARGS__) -#define MAX_BOIDS 65536 // 65536 = 2^16 +typedef uint16_t uboid_t; +#define MAX_BOIDS 1 << (sizeof(uboid_t)*8) struct Boid { Vector2 pos; @@ -23,14 +25,6 @@ struct Obstacle { std::vector points; }; -struct RayHitResult { - float hit = -1; - - // TODO: `line1` and `line2` are not used, maybe remove them? - Vector2 line1; - Vector2 line2; -}; - struct World { Vector2 size; std::vector boids; @@ -55,6 +49,8 @@ struct World { // TODO: Function `get_boids_in_view_cone` doesn't work as expected with looping walls bool looping_walls = true; + + bool freeze = false; }; struct Visuals { diff --git a/src/main.cpp b/src/main.cpp index ca97b9b..9988847 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,5 @@ #include "raylib.h" #include "raymath.h" -#include "rlgl.h" #include #include @@ -8,17 +7,23 @@ #include #endif -#define RAYGUI_IMPLEMENTATION -#include "raygui.h" - #define RPROF_IMPLEMENTATION // #define RPROF_STUB_OUT +// #define RPROF_ONLY_TOTAL_TIME #include "rprof.h" #include "boid-playground.hpp" + #include "raycast.cpp" #include "memory-arena.cpp" #include "boid-list.cpp" +#include "world.cpp" +#include "ui.cpp" + +//#define USE_TEST_MAIN + +#define RAYGUI_IMPLEMENTATION +#include "raygui.h" #define FRAMERATE 60 #define TIME_PER_FRAME (1.0/FRAMERATE) @@ -27,517 +32,82 @@ static World g_world; static Visuals g_visuals; static UI g_ui; -static float vector2_atan2(Vector2 a) { - return std::atan2(a.y, a.x); -} +void UpdateDrawFrame(); +static void profiling_test(); -static Vector2 vector2_mul_value(Vector2 v, float value) { - return { v.x * value, v.y * value }; -} +int main() { + // profiling_test(); + // return 0; -static Vector2 vector2_div_value(Vector2 v, float value) { - return { v.x / value, v.y / value }; -} + SetTraceLogLevel(LOG_TRACE); -static Vector2 vector2_from_angle(float angle) { - return { std::cos(angle), std::sin(angle) }; -} + int screen_width = 1280; + int screen_height = 720; -static void boid_rand_init(World *world, Boid *boid, float border) { - float world_width = world->size.x; - float world_height = world->size.y; - boid->pos.x = GetRandomValue(border, world_width-border); - boid->pos.y = GetRandomValue(border, world_height-border); + raylib::Window window(screen_width, screen_height, "Boid Playground"); + window.SetState(FLAG_VSYNC_HINT); - float facing = GetRandomValue(0, 2*PI); - boid->dir = Vector2Rotate({ 1, 0 }, facing); - boid->speed = GetRandomValue(world->min_speed, world->max_speed); -} + GuiLoadStyleDefault(); -static void world_init(World *world, float width, float height) { - arena_init(&world->frame_arena, 1024 * 1024 * 256); - g_world.size = { width, height }; -} + rprof_init(); -static void world_free(World *world) { - arena_free(&world->frame_arena); -} + world_init(&g_world, screen_width, screen_height); -static Vector2 get_center_point(std::vector &points) { - Vector2 center = { 0, 0 }; - for (int i = 0; i < points.size(); i++) { - center.x += points[i].x; - center.y += points[i].y; + float border = g_world.collision_avoidance_distance; + for (int i = 0; i < 10000; i++) { + Boid boid; + boid_rand_init(&g_world, &boid, border); + g_world.boids.push_back(boid); } - center.x /= points.size(); - center.y /= points.size(); - return center; + +#ifdef __EMSCRIPTEN__ + emscripten_set_main_loop(UpdateDrawFrame, 0, 1); +#else + SetTargetFPS(FRAMERATE); + while (!window.ShouldClose()) { + UpdateDrawFrame(); + } +#endif + + window.Close(); + world_free(&g_world); + + rprof_end(); + + rprof_output(NULL); + + return 0; } -static void draw_obstacle(Obstacle *obstacle, Color color) { - std::vector *points = &obstacle->points; - int point_count = points->size(); - - rlBegin(RL_TRIANGLES); +static void profiling_test() { + rprof_init(); { - rlColor4ub(color.r, color.g, color.b, color.a); - for (int j = 0; j < point_count-1; j++) { - Vector2 *point1 = &(*points)[j]; - Vector2 *point2 = &(*points)[j+1]; - rlVertex2f(point1->x, point1->y); - rlVertex2f(obstacle->center.x, obstacle->center.y); - rlVertex2f(point2->x, point2->y); + world_init(&g_world, 1280, 720); + + SetRandomSeed(10); + + float border = g_visuals.boid_edge_size; + for (int i = 0; i < 45000; i++) { + Boid boid; + boid_rand_init(&g_world, &boid, border); + g_world.boids.push_back(boid); } - rlVertex2f((*points)[point_count-1].x, (*points)[point_count-1].y); - rlVertex2f(obstacle->center.x, obstacle->center.y); - rlVertex2f((*points)[0].x, (*points)[0].y); - } - rlEnd(); -} - -static void draw_obstacle_avoidance_rays(Visuals *visuals, World *world, Boid *boid) { - Vector2 pos = boid->pos; - - int ray_count = world->collision_avoidance_ray_count * 2 + 1; - float ray_angles[ray_count]; - fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle); - - float facing = std::atan2(boid->dir.y, boid->dir.x); - for (int i = 0; i < ray_count; i++) { - Vector2 ray_dir = { - std::cos(facing + ray_angles[i]), - std::sin(facing + ray_angles[i]) - }; - - RayHitResult hit_result; - get_intersect_with_world(&hit_result, pos, ray_dir, world); - bool hit_obstacle = (hit_result.hit != -1 && hit_result.hit <= world->collision_avoidance_distance); - - Color ray_color = GREEN; - float ray_length = world->collision_avoidance_distance; - if (hit_obstacle) { - ray_length = hit_result.hit; - ray_color = BLUE; + for (int i = 0; i < FRAMERATE; i++) { + world_update(&g_world, TIME_PER_FRAME); } - Vector2 hit_pos = Vector2Add(pos, Vector2Multiply(ray_dir, { ray_length, ray_length })); - DrawLine(pos.x, pos.y, hit_pos.x, hit_pos.y, ray_color); - if (hit_obstacle) { - DrawCircle(hit_pos.x, hit_pos.y, visuals->boid_edge_size * 0.05, ray_color); - } + printf("arena: %ld (%.03fMiB)\n", g_world.frame_arena.offset, (float)g_world.frame_arena.offset / 1024 / 1024); + world_free(&g_world); } -} + rprof_end(); -static Vector2 get_collision_avoidance_dir(World *world, Boid *boid) { - int ray_count = world->collision_avoidance_ray_count * 2 + 1; - float ray_angles[ray_count]; - fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle); - - int best_avoidance = -1; - Vector2 avoidance_dir = { 0, 0 }; - float facing = std::atan2(boid->dir.y, boid->dir.x); - bool got_hit = false; - RayHitResult hit_results[ray_count]; - - for (int i = 0; i < ray_count; i++) { - Vector2 ray_dir = vector2_from_angle(facing + ray_angles[i]); - get_intersect_with_world(&hit_results[i], boid->pos, ray_dir, world); - if (hit_results[i].hit != -1 && hit_results[i].hit <= world->collision_avoidance_distance) { - got_hit = true; - } - - if (hit_results[i].hit > hit_results[best_avoidance].hit || best_avoidance == -1) { - avoidance_dir = ray_dir; - best_avoidance = i; - } + printf("interactions: %d\n", interactions); + if (interactions != 33119854) { // 22 051 739 + printf("!!!!!! ITERACTIONS DONT MATCH, %d\n", interactions - 33119854); } - if (got_hit) { - return avoidance_dir; - } else { - return { 0, 0 }; - } -} - -static int count_out_of_bounds_boids(World *world) { - int count = 0; - for (int i = 0; i < world->boids.size(); i++) { - Vector2 *pos = &world->boids[i].pos; - - bool x_out_of_bounds = (pos->x <= 0 || pos->x >= world->size.x); - bool y_out_of_bounds = (pos->y <= 0 || pos->y >= world->size.y); - if (x_out_of_bounds || y_out_of_bounds) { - count++; - } - } - return count; -} - -static void draw_circle_sector(Vector2 center, float radius, float start_angle, float end_angle, int segments, Color color) { - rlBegin(RL_TRIANGLES); - float angle_step = (end_angle - start_angle) / segments; - for (int i = 0; i < segments; i++) - { - rlColor4ub(color.r, color.g, color.b, color.a); - float angle = start_angle + i * angle_step; - float nextAngle = start_angle + (i+1) * angle_step; - - rlVertex2f(center.x, center.y); - rlVertex2f(center.x + cosf(nextAngle)*radius, center.y + sinf(nextAngle)*radius); - rlVertex2f(center.x + cosf(angle) *radius, center.y + sinf(angle) *radius); - } - rlEnd(); -} - -static void assign_local_boids(World *world, BoidsListNode *local_boids, uint16_t *local_boid_counts, Boid *boids, uint16_t boid1, uint16_t boid2) { - // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); - float dot_threshold = cosf(world->view_angle/2); - - Vector2 offset = Vector2Subtract(boids[boid2].pos, boids[boid1].pos); - - bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius); - if (with_in_range) { - float dot = Vector2DotProduct(boids[boid1].dir, Vector2Normalize(offset)); - if (dot >= dot_threshold) { - boid_list_append(&world->frame_arena, &local_boids[boid1], &local_boid_counts[boid1], boid2); - } - } -} - -static void world_update(World *world, float dt) { - arena_clear(&world->frame_arena); - - Boid *boids = world->boids.data(); - int boid_count = world->boids.size(); - - assert(boid_count <= MAX_BOIDS); - - RPROF_START("Alloc groups"); - BoidsListNode *all_local_boids = (BoidsListNode*)arena_malloc(&world->frame_arena, boid_count * sizeof(BoidsListNode)); - uint16_t all_local_boid_counts[boid_count]; - for (int i = 0; i < boid_count; i++) { - all_local_boids[i].next = NULL; - all_local_boid_counts[i] = 0; - } - RPROF_STOP(); - - size_t alloc_chunks = world->frame_arena.offset; - float chunk_size = std::max(world->view_radius, 15.0f); - int chunks_wide = std::ceil(world->size.x / chunk_size) + 1; - int chunks_high = std::ceil(world->size.y / chunk_size) + 1; - RPROF_START("Alloc chunks"); - BoidsListNode *chunks[chunks_high][chunks_wide]; - uint16_t chunk_boid_counts[chunks_high][chunks_wide]; - for (int y = 0; y < chunks_high; y++) { - for (int x = 0; x < chunks_wide; x++) { - chunks[y][x] = (BoidsListNode*)arena_malloc(&world->frame_arena, sizeof(BoidsListNode)); - chunks[y][x]->next = NULL; - chunk_boid_counts[y][x] = 0; - } - } - RPROF_STOP(); - - RPROF_START("Creating chunks"); - for (int i = 0; i < boid_count; i++) { - Boid *boid = &boids[i]; - int chunk_x = boid->pos.x / chunk_size; - int chunk_y = boid->pos.y / chunk_size; - - BoidsListNode *node = chunks[chunk_y][chunk_x]; - uint16_t *count = &chunk_boid_counts[chunk_y][chunk_x]; - boid_list_append(&world->frame_arena, node, count, i); - } - RPROF_STOP(); - - RPROF_START("Calc dot products and ranges (chunked)"); - for (int y = 0; y < chunks_high; y++) { - for (int x = 0; x < chunks_wide; x++) { - BoidsListNode *chunk = chunks[y][x]; - size_t chunk_boid_count = chunk_boid_counts[y][x]; - if (chunk_boid_count == 0) continue; - - for (int oy = -1; oy <= 1; oy++) { - int chunk_y = y + oy; - if (chunk_y < 0 || chunk_y >= chunks_high) continue; - - for (int ox = -1; ox <= 1; ox++) { - int chunk_x = x + ox; - if (chunk_x < 0 || chunk_x >= chunks_wide) continue; - - BoidsListNode *neighbour_chunk = chunks[chunk_y][chunk_x]; - size_t neighbour_chunk_boid_count = chunk_boid_counts[chunk_y][chunk_x]; - if (neighbour_chunk_boid_count == 0) continue; - - - uint16_t boid1; - BoidsListNodeIterator it1 = boid_list_get_iterator(chunk, chunk_boid_count); - while (boid_list_iterator_next(&it1, &boid1)) { - uint16_t boid2; - BoidsListNodeIterator it2 = boid_list_get_iterator(neighbour_chunk, neighbour_chunk_boid_count); - while (boid_list_iterator_next(&it2, &boid2)) { - if (boid1 == boid2) continue; - - assign_local_boids(world, all_local_boids, all_local_boid_counts, boids, boid1, boid2); - } - } - } - } - } - } - RPROF_STOP(); - - - RPROF_START("Apply forces"); - for (int i = 0; i < boid_count; i++) { - Boid *boid = &world->boids[i]; - Vector2 acc = { 0, 0 }; - - BoidsListNode *local_boids = &all_local_boids[i]; - int local_boids_count = all_local_boid_counts[i]; - - if (local_boids_count > 0) { - Vector2 separation_force = { 0, 0 }; - Vector2 flock_center = { 0, 0 }; - Vector2 flock_heading = { 0, 0 }; - - uint16_t local_boid_id; - BoidsListNodeIterator it = boid_list_get_iterator(local_boids, local_boids_count); - while (boid_list_iterator_next(&it, &local_boid_id)) { - Boid *local_boid = &boids[local_boid_id]; - flock_heading = Vector2Add(flock_heading, local_boid->dir); - flock_center = Vector2Add(flock_center , local_boid->pos); - - Vector2 pos_diff = Vector2Subtract(boid->pos, local_boid->pos); - float dist_sqr = Vector2LengthSqr(pos_diff); - if (dist_sqr <= world->separation_radius * world->separation_radius) { - separation_force = Vector2Add(separation_force, vector2_div_value(pos_diff, dist_sqr)); - } - } - flock_center = vector2_div_value(flock_center, local_boids_count); - - Vector2 alignment_force = Vector2Normalize(flock_heading); - acc = Vector2Add(acc, vector2_mul_value(alignment_force, world->alignment_strength)); - - Vector2 cohesion_force = Vector2Normalize(Vector2Subtract(flock_center, boid->pos)); - acc = Vector2Add(acc, vector2_mul_value(cohesion_force, world->cohesion_strength)); - - separation_force = Vector2Normalize(separation_force); - acc = Vector2Add(acc, vector2_mul_value(separation_force, world->separation_strength)); - } - - // Apply obstacle avoidance to accelaration - Vector2 collision_avoidance = get_collision_avoidance_dir(world, boid); - acc = Vector2Add(acc, vector2_mul_value(collision_avoidance, world->collision_avoidance_strength)); - - acc = vector2_mul_value(acc, world->max_speed); - - // Clamp accelaration - Vector2 clamped_acc = acc; - float acc_size = Vector2Length(acc); - if (acc_size > world->max_steer_speed) { - clamped_acc = vector2_mul_value(Vector2Normalize(acc), world->max_steer_speed); - } - - // Apply accelaration - Vector2 velocity = Vector2Multiply(boid->dir, { boid->speed, boid->speed }); - velocity = Vector2Add(velocity, vector2_mul_value(clamped_acc, dt)); - - boid->dir = Vector2Normalize(velocity); - boid->speed = Vector2Length(velocity); - - boid->speed = Clamp(boid->speed, world->min_speed, world->max_speed); - Vector2 step = vector2_mul_value(boid->dir, boid->speed * dt); - Vector2 target_pos = Vector2Add(boid->pos, step); - - // Check collisions - RayHitResult hit_result; - get_intersect_with_world(&hit_result, target_pos, step, world); - if (hit_result.hit == -1 || hit_result.hit > 2) { - boid->pos = target_pos; - } - - if (world->looping_walls) { - if (boid->pos.x >= world->size.x) { - boid->pos.x -= world->size.x; - } else if (boid->pos.x < 0) { - boid->pos.x += world->size.x; - } - if (boid->pos.y >= world->size.y) { - boid->pos.y -= world->size.y; - } else if (boid->pos.y < 0) { - boid->pos.y += world->size.y; - } - } else { - if (boid->pos.x >= world->size.x) { - boid->pos.x = world->size.x-1; - } else if (boid->pos.x < 0) { - boid->pos.x = 0; - } - if (boid->pos.y >= world->size.y) { - boid->pos.y = world->size.y-1; - } else if (boid->pos.y < 0) { - boid->pos.y = 0; - } - } - } - RPROF_STOP(); -} - -static void world_draw(World *world, Visuals *visuals) { - for (int i = 0; i < world->obstacles.size(); i++) { - draw_obstacle(&world->obstacles[i], GRAY); - } - - if (visuals->draw_view_cone) { - Color view_cone_color = Fade(GRAY, 0.4); - for (int i = 0; i < world->boids.size(); i++) { - Boid *boid = &world->boids[i]; - Vector2 pos = boid->pos; - float facing = std::atan2(boid->dir.y, boid->dir.x); - - float view_angle = world->view_angle; - float segments = 16; - - draw_circle_sector(pos, world->view_radius, facing - view_angle/2, facing + view_angle/2, segments, view_cone_color); - } - } - - float boid_length = visuals->boid_edge_size * std::sqrt(3)/2; - float boid_width = visuals->boid_edge_size * 0.6; - for (int i = 0; i < world->boids.size(); i++) { - Boid *boid = &world->boids[i]; - - if (visuals->draw_collision_avoidance_rays) { - draw_obstacle_avoidance_rays(visuals, world, boid); - } - - if (visuals->draw_separation_radius) { - DrawCircleLines(boid->pos.x, boid->pos.y, world->separation_radius, MAGENTA); - } - - Vector2 triangle[] = { - { boid_length*2/3.0f, 0 }, - { -boid_length*1/3.0f, -boid_width/2 }, - { -boid_length*1/3.0f, boid_width/2 }, - }; - - float facing = std::atan2(boid->dir.y, boid->dir.x); - for (int i = 0; i < 3; i++) { - triangle[i] = Vector2Add(boid->pos, Vector2Rotate(triangle[i], facing)); - } - - DrawTriangle(triangle[0], triangle[1], triangle[2], visuals->boid_color); - - if (visuals->draw_boid_direction) { - DrawCircle(boid->pos.x, boid->pos.y, visuals->boid_edge_size * 0.05, RED); - Vector2 look_pos = Vector2Add(boid->pos, vector2_mul_value(boid->dir, visuals->boid_edge_size*1.5)); - DrawLine(boid->pos.x, boid->pos.y, look_pos.x, look_pos.y, RED); - } - } -} - -static Rectangle rect_with_offset(Rectangle rect, Vector2 offset) -{ - return { rect.x + offset.x, rect.y + offset.y, rect.width, rect.height }; -} - -static Rectangle rect_with_offset(Rectangle rect, float x, float y) -{ - return { rect.x + x, rect.y + y, rect.width, rect.height }; -} - -static int gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool edit_mode) { - int int_value = *value; - int result = GuiValueBox(rect, text, &int_value, min_value, max_value, edit_mode); - *value = int_value; - return result; -} - -static void gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool *edit_mode) { - if (gui_valuebox_float(rect, text, value, min_value, max_value, *edit_mode)) { - *edit_mode = !*edit_mode; - } -} - -struct VerticalLayout { - float x, y; - float gap; -}; - -static Rectangle next_in_layout(VerticalLayout *layout, float width, float height, float offset_x = 0) { - Rectangle rect = { layout->x + offset_x, layout->y, width, height }; - layout->y += height + layout->gap; - return rect; -} - -static void ui_draw(World *world, Visuals *visuals, UI *ui) { - if (!visuals->show_control_panel) { - visuals->show_control_panel = GuiButton({ 20, 20, 30, 30 }, GuiIconText(ICON_PENCIL_BIG, "")); - return; - } - - float panel_height = 310; - visuals->show_control_panel = !GuiWindowBox({ 20, 20, 660, panel_height }, "Control panel"); - - float group_height = panel_height - 45; - - GuiGroupBox({ 30, 55, 180, group_height }, "Visuals"); - { - VerticalLayout layout = { .x = 40, .y = 65, .gap = 8 }; - - GuiCheckBox(next_in_layout(&layout, 15, 15), "Show direction", &visuals->draw_boid_direction); - GuiCheckBox(next_in_layout(&layout, 15, 15), "Show view cone", &visuals->draw_view_cone); - GuiCheckBox(next_in_layout(&layout, 15, 15), "Show separation radius", &visuals->draw_separation_radius); - GuiCheckBox(next_in_layout(&layout, 15, 15), "Show collision rays", &visuals->draw_collision_avoidance_rays); - GuiCheckBox(next_in_layout(&layout, 15, 15), "Show pulling forces", &visuals->draw_pulling_forces); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 2.5, 50); - - Rectangle boid_color_rect = next_in_layout(&layout, 50, 50); - GuiColorPicker(boid_color_rect, NULL, &visuals->boid_color); - GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, boid_color_rect.x, boid_color_rect.y), "Boid color"); - - Rectangle bg_color_rect = next_in_layout(&layout, 50, 50); - GuiColorPicker(bg_color_rect, NULL, &visuals->bg_color); - GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, bg_color_rect.x, bg_color_rect.y), "BG color"); - - // TODO: Add show FPS - // TODO: Add showing out of bounds boids - } - - GuiGroupBox({ 220, 55, 220, group_height }, "Boid"); - { - VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 }; - - GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 2.5, 150); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 2.5, 150); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI); - - gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Min speed", &world->min_speed, 0, 1000, &ui->min_speed_edit); - gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Max speed", &world->max_speed, 0, 1000, &ui->max_speed_edit); - gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Steer speed", &world->max_steer_speed, 0, 1000, &ui->steer_speed_edit); - - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 5, 100); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision ray angle", &world->collision_avoidance_ray_angle, 0, PI); - GuiSpinner(next_in_layout(&layout, 100, 15, 95), "Collision ray count", &world->collision_avoidance_ray_count, 1, 10, false); - } - - GuiGroupBox({ 450, 55, 220, group_height }, "Flock"); - { - VerticalLayout layout = { .x = 605, .y = 65, .gap = 8 }; - gui_valuebox_float(next_in_layout(&layout, 50, 15), "Alignment strength", &world->alignment_strength, 0, 100, &ui->alignment_strength_edit); - gui_valuebox_float(next_in_layout(&layout, 50, 15), "Cohesion strength", &world->cohesion_strength, 0, 100, &ui->cohesion_strength_edit); - gui_valuebox_float(next_in_layout(&layout, 50, 15), "Separation strength", &world->separation_strength, 0, 100, &ui->separation_strength_edit); - gui_valuebox_float(next_in_layout(&layout, 50, 15), "Collision avoidance strength", &world->collision_avoidance_strength, 0, 100, &ui->collision_avoidance_strength_edit); - } - - float window_width = GetScreenWidth(); - DrawFPS(window_width - 90, 10); - - char boid_label[128] = { 0 }; - snprintf(boid_label, sizeof(boid_label), "%lu boids", world->boids.size()); - DrawText(boid_label, window_width - 125, 35, 20, GREEN); + rprof_output(NULL); } void UpdateDrawFrame() { @@ -566,72 +136,3 @@ void UpdateDrawFrame() { EndDrawing(); } - -void stress_test() { - rprof_init(); - { - world_init(&g_world, 1280, 720); - - float border = g_visuals.boid_edge_size; - for (int i = 0; i < MAX_BOIDS; i++) { - Boid boid; - boid_rand_init(&g_world, &boid, border); - g_world.boids.push_back(boid); - } - - for (int i = 0; i < 1; i++) { - world_update(&g_world, TIME_PER_FRAME); - } - - printf("arena: %ld (%.03fMiB)\n", g_world.frame_arena.offset, (float)g_world.frame_arena.offset / 1024 / 1024); - world_free(&g_world); - } - rprof_end(); - - rprof_output(NULL); -} - -int main() { - SetTraceLogLevel(LOG_TRACE); - - int screen_width = 1280; - int screen_height = 720; - - raylib::Color text_color(LIGHTGRAY); - raylib::Window window(screen_width, screen_height, "Boid Playground"); - window.SetState(FLAG_VSYNC_HINT); - - GuiLoadStyleDefault(); - - rprof_init(); - - world_init(&g_world, screen_width, screen_height); - - float border = g_world.collision_avoidance_distance; - for (int i = 0; i < 30000; i++) { - Boid boid; - boid_rand_init(&g_world, &boid, border); - g_world.boids.push_back(boid); - } - - // g_world.boids.push_back({ .pos = { 800, 105 }}); - // g_world.boids.push_back({ .pos = { 800, 110 }}); - -#ifdef __EMSCRIPTEN__ - emscripten_set_main_loop(UpdateDrawFrame, 0, 1); -#else - SetTargetFPS(FRAMERATE); - while (!window.ShouldClose()) { - UpdateDrawFrame(); - } -#endif - - window.Close(); - world_free(&g_world); - - rprof_end(); - - rprof_output(NULL); - - return 0; -} diff --git a/src/raycast.cpp b/src/raycast.cpp index d06483a..abe6166 100644 --- a/src/raycast.cpp +++ b/src/raycast.cpp @@ -1,4 +1,4 @@ -#include "boid-playground.hpp" +#include "raycast.hpp" static float get_intersect_point(Vector2 ray_origin, Vector2 ray_dir, Vector2 line1, Vector2 line2) { Vector2 line_dir = Vector2Subtract(line2, line1); @@ -61,17 +61,3 @@ static void get_intersect_with_world(RayHitResult *result, Vector2 ray_origin, V get_intersect_with_polygon(result, ray_origin, ray_dir, lines, 4); } } - -static void fill_avoidance_ray_angles(float *rays, int ray_count, float ray_angle) { - ASSERT(ray_count >= 1 && "Ray count must be at least 1"); - ASSERT(((ray_count - 1) % 2 == 0) && "Ray count must be a multiple of 2n+1"); - - rays[0] = 0; - - int side_ray_count = ((ray_count-1)/2); - float ray_angle_step = ray_angle / side_ray_count; - for (int i = 0; i < side_ray_count; i++) { - rays[2*i+0 + 1] = ray_angle_step * (i+1); - rays[2*i+1 + 1] = -ray_angle_step * (i+1); - } -} diff --git a/src/raycast.hpp b/src/raycast.hpp new file mode 100644 index 0000000..f815502 --- /dev/null +++ b/src/raycast.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "boid-playground.hpp" + +struct RayHitResult { + float hit = -1; + + // TODO: `line1` and `line2` are not used, maybe remove them? + Vector2 line1; + Vector2 line2; +}; + +static float get_intersect_point(Vector2 ray_origin, Vector2 ray_dir, Vector2 line1, Vector2 line2); +static void set_nearest_hit(RayHitResult *nearest_hit, float hit, Vector2 line1, Vector2 line2); +static void get_intersect_with_polygon(RayHitResult *result, Vector2 ray_origin, Vector2 ray_dir, Vector2 *points, int point_count); +static void get_intersect_with_obstacles(RayHitResult *result, Vector2 ray_origin, Vector2 ray_dir, std::vector *obstacles); +static void get_intersect_with_world(RayHitResult *result, Vector2 ray_origin, Vector2 ray_dir, World *world); diff --git a/src/rprof.h b/src/rprof.h index 644770d..1a0c562 100644 --- a/src/rprof.h +++ b/src/rprof.h @@ -81,7 +81,7 @@ void rprof_output(prof_sort_cmp_cb sort_cb); #define RPROF_START(label) rprof_start(__COUNTER__, label) #define RPROF_STOP() rprof_stop() -#define RPROF_IMPLEMENTATION +#define RPROF_IMPLEMENTATION // TODO: Remove this #define #ifdef RPROF_IMPLEMENTATION // ------------------------ CPU Timing ------------------------- @@ -219,7 +219,7 @@ static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms) qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb); } - printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time); + printf("\nTotal time taken: %.3fms (%lu) (CPU: ~%.3fGHz)\n", (float)total_time*1000/cpu_hz, total_time, (float)cpu_hz/1000000000); uint32_t duration_max_width = 0; uint32_t percent_max_width = 0; diff --git a/src/ui.cpp b/src/ui.cpp new file mode 100644 index 0000000..7cb10cb --- /dev/null +++ b/src/ui.cpp @@ -0,0 +1,105 @@ +#include "boid-playground.hpp" +#include "raygui.h" + +struct VerticalLayout { + float x, y; + float gap; +}; + +static Rectangle rect_with_offset(Rectangle rect, Vector2 offset) +{ + return { rect.x + offset.x, rect.y + offset.y, rect.width, rect.height }; +} + +static Rectangle rect_with_offset(Rectangle rect, float x, float y) +{ + return { rect.x + x, rect.y + y, rect.width, rect.height }; +} + +static int gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool edit_mode) { + int int_value = *value; + int result = GuiValueBox(rect, text, &int_value, min_value, max_value, edit_mode); + *value = int_value; + return result; +} + +static void gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool *edit_mode) { + if (gui_valuebox_float(rect, text, value, min_value, max_value, *edit_mode)) { + *edit_mode = !*edit_mode; + } +} + +static Rectangle next_in_layout(VerticalLayout *layout, float width, float height, float offset_x = 0) { + Rectangle rect = { layout->x + offset_x, layout->y, width, height }; + layout->y += height + layout->gap; + return rect; +} + +static void ui_draw(World *world, Visuals *visuals, UI *ui) { + if (!visuals->show_control_panel) { + visuals->show_control_panel = GuiButton({ 20, 20, 30, 30 }, GuiIconText(ICON_PENCIL_BIG, "")); + return; + } + + float panel_height = 310; + visuals->show_control_panel = !GuiWindowBox({ 20, 20, 660, panel_height }, "Control panel"); + + float group_height = panel_height - 45; + + GuiGroupBox({ 30, 55, 180, group_height }, "Visuals"); + { + VerticalLayout layout = { .x = 40, .y = 65, .gap = 8 }; + + GuiCheckBox(next_in_layout(&layout, 15, 15), "Show direction", &visuals->draw_boid_direction); + GuiCheckBox(next_in_layout(&layout, 15, 15), "Show view cone", &visuals->draw_view_cone); + GuiCheckBox(next_in_layout(&layout, 15, 15), "Show separation radius", &visuals->draw_separation_radius); + GuiCheckBox(next_in_layout(&layout, 15, 15), "Show collision rays", &visuals->draw_collision_avoidance_rays); + GuiCheckBox(next_in_layout(&layout, 15, 15), "Show pulling forces", &visuals->draw_pulling_forces); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 2.5, 50); + + Rectangle boid_color_rect = next_in_layout(&layout, 50, 50); + GuiColorPicker(boid_color_rect, NULL, &visuals->boid_color); + GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, boid_color_rect.x, boid_color_rect.y), "Boid color"); + + Rectangle bg_color_rect = next_in_layout(&layout, 50, 50); + GuiColorPicker(bg_color_rect, NULL, &visuals->bg_color); + GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, bg_color_rect.x, bg_color_rect.y), "BG color"); + + // TODO: Add show FPS + // TODO: Add showing out of bounds boids + } + + GuiGroupBox({ 220, 55, 220, group_height }, "Boid"); + { + VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 }; + + GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 2.5, 150); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 2.5, 150); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI); + + gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Min speed", &world->min_speed, 0, 1000, &ui->min_speed_edit); + gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Max speed", &world->max_speed, 0, 1000, &ui->max_speed_edit); + gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Steer speed", &world->max_steer_speed, 0, 1000, &ui->steer_speed_edit); + + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 5, 100); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision ray angle", &world->collision_avoidance_ray_angle, 0, PI); + GuiSpinner(next_in_layout(&layout, 100, 15, 95), "Collision ray count", &world->collision_avoidance_ray_count, 1, 10, false); + } + + GuiGroupBox({ 450, 55, 220, group_height }, "Flock"); + { + VerticalLayout layout = { .x = 605, .y = 65, .gap = 8 }; + gui_valuebox_float(next_in_layout(&layout, 50, 15), "Alignment strength", &world->alignment_strength, 0, 100, &ui->alignment_strength_edit); + gui_valuebox_float(next_in_layout(&layout, 50, 15), "Cohesion strength", &world->cohesion_strength, 0, 100, &ui->cohesion_strength_edit); + gui_valuebox_float(next_in_layout(&layout, 50, 15), "Separation strength", &world->separation_strength, 0, 100, &ui->separation_strength_edit); + gui_valuebox_float(next_in_layout(&layout, 50, 15), "Collision avoidance strength", &world->collision_avoidance_strength, 0, 100, &ui->collision_avoidance_strength_edit); + } + + float window_width = GetScreenWidth(); + DrawFPS(window_width - 90, 10); + + char boid_label[128] = { 0 }; + snprintf(boid_label, sizeof(boid_label), "%lu boids", world->boids.size()); + DrawText(boid_label, window_width - 125, 35, 20, GREEN); +} diff --git a/src/world.cpp b/src/world.cpp new file mode 100644 index 0000000..cc2b48a --- /dev/null +++ b/src/world.cpp @@ -0,0 +1,712 @@ +#include "boid-playground.hpp" +#include "raycast.hpp" +#include "rprof.h" +#include "boid-list.hpp" + +#include + +static float vector2_atan2(Vector2 a) { + return std::atan2(a.y, a.x); +} + +static Vector2 vector2_mul_value(Vector2 v, float value) { + return { v.x * value, v.y * value }; +} + +static Vector2 vector2_div_value(Vector2 v, float value) { + return { v.x / value, v.y / value }; +} + +static Vector2 vector2_from_angle(float angle) { + return { std::cos(angle), std::sin(angle) }; +} + +static Vector2 get_center_point(std::vector &points) { + Vector2 center = { 0, 0 }; + for (int i = 0; i < points.size(); i++) { + center.x += points[i].x; + center.y += points[i].y; + } + center.x /= points.size(); + center.y /= points.size(); + return center; +} + +static void fill_avoidance_ray_angles(float *rays, int ray_count, float ray_angle) { + DEBUG_ASSERT(ray_count >= 1 && "Ray count must be at least 1"); + DEBUG_ASSERT(((ray_count - 1) % 2 == 0) && "Ray count must be a multiple of 2n+1"); + + rays[0] = 0; + + int side_ray_count = ((ray_count-1)/2); + float ray_angle_step = ray_angle / side_ray_count; + for (int i = 0; i < side_ray_count; i++) { + rays[2*i+0 + 1] = ray_angle_step * (i+1); + rays[2*i+1 + 1] = -ray_angle_step * (i+1); + } +} + +static Vector2 get_collision_avoidance_dir(World *world, Boid *boid) { + int ray_count = world->collision_avoidance_ray_count * 2 + 1; + float ray_angles[ray_count]; + fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle); + + int best_avoidance = -1; + Vector2 avoidance_dir = { 0, 0 }; + float facing = std::atan2(boid->dir.y, boid->dir.x); + bool got_hit = false; + RayHitResult hit_results[ray_count]; + + for (int i = 0; i < ray_count; i++) { + Vector2 ray_dir = vector2_from_angle(facing + ray_angles[i]); + get_intersect_with_world(&hit_results[i], boid->pos, ray_dir, world); + if (hit_results[i].hit != -1 && hit_results[i].hit <= world->collision_avoidance_distance) { + got_hit = true; + } + + if (hit_results[i].hit > hit_results[best_avoidance].hit || best_avoidance == -1) { + avoidance_dir = ray_dir; + best_avoidance = i; + } + } + + if (got_hit) { + return avoidance_dir; + } else { + return { 0, 0 }; + } +} + +static int count_out_of_bounds_boids(World *world) { + int count = 0; + for (int i = 0; i < world->boids.size(); i++) { + Vector2 *pos = &world->boids[i].pos; + + bool x_out_of_bounds = (pos->x <= 0 || pos->x >= world->size.x); + bool y_out_of_bounds = (pos->y <= 0 || pos->y >= world->size.y); + if (x_out_of_bounds || y_out_of_bounds) { + count++; + } + } + return count; +} + +static void print_m256_f32(__m256 value) +{ + float *value_f32 = (float*)&value; + printf("%f", value_f32[0]); + for (int i = 1; i < 8; i++) { + printf(",%f", value_f32[i]); + } + printf("\n"); +} + +// -------------------- Init/Cleanup ------------------------ + +static void boid_rand_init(World *world, Boid *boid, float border) { + float world_width = world->size.x; + float world_height = world->size.y; + boid->pos.x = GetRandomValue(border, world_width-border); + boid->pos.y = GetRandomValue(border, world_height-border); + + float facing = GetRandomValue(0, 2*PI); + boid->dir = Vector2Rotate({ 1, 0 }, facing); + boid->speed = GetRandomValue(world->min_speed, world->max_speed); +} + +static void world_init(World *world, float width, float height) { + arena_init(&world->frame_arena, 1024 * 1024 * 256); + world->size = { width, height }; +} + +static void world_free(World *world) { + arena_free(&world->frame_arena); +} + +// --------------------- Update ----------------------- + +static int interactions = 0; + +static int nearest_multiple(int num, int divisor) +{ + return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor; +} + +// b2b = boid to boid comparison +static void assign_local_boids_b2b(World *world, BoidList *local_boids, uboid_t from_boid, uboid_t to_boid, Vector2 offset, float length_sqr) +{ + assert(to_boid != from_boid); + + // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); + float dot_threshold = cosf(world->view_angle/2); + + bool with_in_range = length_sqr <= (world->view_radius * world->view_radius); + if (with_in_range) { + interactions++; + + Vector2 normalized = offset; + if (length_sqr != 0) + { + float ilength = 1.0f/sqrtf(length_sqr); + normalized.x *= ilength; + normalized.y *= ilength; + } + + // printf("----\n"); + // printf("boid:%d->%d, lengths_sqr:%f, offset:(%f,%f), look:(%f,%f)\n", from_boid, to_boid, lengths_sqr, offset.x, offset.y, boids[from_boid].dir.x, boids[from_boid].dir.y); + + Boid *boids = world->boids.data(); + if (Vector2DotProduct(boids[from_boid].dir, Vector2Negate(normalized)) >= dot_threshold) { + boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid); + } + if (Vector2DotProduct(boids[to_boid].dir, normalized) >= dot_threshold) { + boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid); + } + } +} + +// b2l = boid to (list of boids) comparison +static void assign_local_boids_b2l(World *world, BoidList *local_boids, uboid_t from_boid, uboid_t *to_boids, uboid_t to_boids_count) +{ + Boid *boids = world->boids.data(); + int to_boids_count_8 = nearest_multiple(to_boids_count, 8); + + Vector2 to_positions[to_boids_count_8]; + for (int i = 0; i < to_boids_count; i++) { + to_positions[i] = boids[to_boids[i]].pos; + } + + // Vector2 offsets[to_boids_count_8]; + // vector2_sub_simd8(offsets, boids[from_boid].pos, to_positions, to_boids_count_8); + + // float lengths_sqrs[to_boids_count_8]; + // vector2_length_sqr_simd8(lengths_sqrs, offsets, to_boids_count_8); + + for (int i = 0; i < to_boids_count; i++) { + uint16_t to_boid = to_boids[i]; + + // Vector2 offset = offsets[i]; + // float lengths_sqr = lengths_sqrs[i]; + + Vector2 offset = Vector2Subtract(boids[from_boid].pos, boids[to_boid].pos); + float lengths_sqr = Vector2LengthSqr(offset); + + assign_local_boids_b2b(world, local_boids, from_boid, to_boid, offset, lengths_sqr); + } +} + +static void vector2_list_to_simd8(Vector2 *vecs, int vec_count, __m256 *vecs_x, __m256 *vecs_y) +{ + assert(vec_count % 8 == 0 && "Vector2 count must be divisible by 8"); + + for (int i = 0; i < vec_count/8; i++) { + vecs_x[i] = _mm256_set_ps( + vecs[8*i+7].x, + vecs[8*i+6].x, + vecs[8*i+5].x, + vecs[8*i+4].x, + vecs[8*i+3].x, + vecs[8*i+2].x, + vecs[8*i+1].x, + vecs[8*i+0].x + ); + + vecs_y[i] = _mm256_set_ps( + vecs[8*i+7].y, + vecs[8*i+6].y, + vecs[8*i+5].y, + vecs[8*i+4].y, + vecs[8*i+3].y, + vecs[8*i+2].y, + vecs[8*i+1].y, + vecs[8*i+0].y + ); + + } +} + +static void world_update(World *world, float dt) { + if (world->freeze) return; + + MemoryArena *arena = &world->frame_arena; + arena_clear(arena); + + Boid *boids = world->boids.data(); + int boid_count = world->boids.size(); + + assert(boid_count <= MAX_BOIDS); + + RPROF_START("Alloc groups"); + BoidList *all_local_boids = (BoidList*)arena_malloc(arena, boid_count * sizeof(BoidList)); + for (int i = 0; i < boid_count; i++) { + boid_list_init(&all_local_boids[i]); + } + RPROF_STOP(); + + size_t alloc_chunks = world->frame_arena.offset; + float chunk_size = std::max(world->view_radius, 15.0f); + int chunks_wide = std::ceil(world->size.x / chunk_size) + 1; + int chunks_high = std::ceil(world->size.y / chunk_size) + 1; + RPROF_START("Alloc chunks"); + BoidList *chunks[chunks_high][chunks_wide]; + for (int y = 0; y < chunks_high; y++) { + for (int x = 0; x < chunks_wide; x++) { + chunks[y][x] = (BoidList*)arena_malloc(arena, sizeof(BoidList)); + boid_list_init(chunks[y][x]); + } + } + RPROF_STOP(); + + RPROF_START("Creating chunks"); + for (int i = 0; i < boid_count; i++) { + Boid *boid = &boids[i]; + int chunk_x = boid->pos.x / chunk_size; + int chunk_y = boid->pos.y / chunk_size; + + boid_list_append(arena, chunks[chunk_y][chunk_x], i); + } + RPROF_STOP(); + + RPROF_START("Extracting boid positions"); + Vector2 *boid_dirs = (Vector2*)arena_malloc(arena, sizeof(Vector2)*boid_count); + Vector2 *boid_positions = (Vector2*)arena_malloc(arena, sizeof(Vector2)*boid_count); + for (int i = 0; i < boid_count; i++) { + boid_positions[i] = boids[i].pos; + boid_dirs[i] = boids[i].dir; + } + RPROF_STOP(); + + + int chunk_cmps = 0; + RPROF_START("Calc dot products and ranges (chunked)"); + // TODO: Use temp memory arena inside this profile block + // int32_t *in_range_mask_f32 = (int32_t*)arena_malloc(arena, sizeof(int32_t)*8, 32); + int32_t *do_append_mask1_f32 = (int32_t*)arena_malloc(arena, sizeof(int32_t)*8, 32); + int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(arena, sizeof(int32_t)*8, 32); + + for (int y = 0; y < chunks_high; y++) { + + Vector2 neighbours[] = { { 1, 0 }, { 0, 1 }, { 1, 1 }, { -1, 1 } }; + struct b2l_cmp { + uboid_t from; + uboid_t *to_list; + uboid_t to_list_count; + + __m256 *to_list_pos_x; + __m256 *to_list_pos_y; + __m256 *to_list_dir_x; + __m256 *to_list_dir_y; + int to_list_pos_count; + }; + + for (int x = 0; x < chunks_wide; x++) { + BoidList *chunk = chunks[y][x]; + if (chunk->count == 0) continue; + + std::vector b2l_cmps; // TODO: remove usage of std::vec, it is kinda slow + b2l_cmps.reserve(64); + + uboid_t chunk_boids[chunk->count]; + Vector2 chunk_boids_pos[chunk->count + 8]; + Vector2 chunk_boids_dir[chunk->count + 8]; + memset(chunk_boids_pos, 0, sizeof(Vector2) * (chunk->count + 8)); + boid_list_to_array(chunk_boids, chunk); + for (int i = 0; i < chunk->count; i++) { + uboid_t boid = chunk_boids[i]; + chunk_boids_pos[i] = boid_positions[boid]; + chunk_boids_dir[i] = boid_dirs[boid]; + } + + for (int i = 0; i < chunk->count-1; i++) { + uboid_t from_boid = chunk_boids[i]; + uboid_t *to_boids = &chunk_boids[i+1]; + uboid_t to_boids_count = chunk->count-i-1; + Vector2 *to_chunk_boids_pos = &chunk_boids_pos[i+1]; + Vector2 *to_chunk_boids_dir = &chunk_boids_dir[i+1]; + + b2l_cmp cmp = {}; + cmp.from = from_boid; + cmp.to_list = to_boids; + cmp.to_list_count = to_boids_count; + + int to_boids_count_8 = nearest_multiple(to_boids_count, 8); + cmp.to_list_pos_count = to_boids_count_8/8; + cmp.to_list_pos_x = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256)); + cmp.to_list_pos_y = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256)); + cmp.to_list_dir_x = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256)); + cmp.to_list_dir_y = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256)); + vector2_list_to_simd8(to_chunk_boids_pos, to_boids_count_8, cmp.to_list_pos_x, cmp.to_list_pos_y); + vector2_list_to_simd8(to_chunk_boids_dir, to_boids_count_8, cmp.to_list_dir_x, cmp.to_list_dir_y); + b2l_cmps.push_back(cmp); + } + + for (int i = 0; i < ARRAY_LEN(neighbours); i++) { + int chunk_y = y + neighbours[i].y; + int chunk_x = x + neighbours[i].x; + if (chunk_y < 0 || chunk_y >= chunks_high) continue; + if (chunk_x < 0 || chunk_x >= chunks_wide) continue; + + BoidList *neighbour_chunk = chunks[chunk_y][chunk_x]; + if (neighbour_chunk->count == 0) continue; + + // TODO: alloc 'neighbour_ids' into scratch arena + uboid_t *neighbour_ids = (uboid_t*)arena_malloc(arena, sizeof(uboid_t)*neighbour_chunk->count); + boid_list_to_array(neighbour_ids, neighbour_chunk); + + Vector2 neighbour_boids_pos[neighbour_chunk->count + 8]; + Vector2 neighbour_boids_dir[neighbour_chunk->count + 8]; + memset(neighbour_boids_pos, 0, sizeof(Vector2) * (neighbour_chunk->count + 8)); + for (int i = 0; i < neighbour_chunk->count; i++) { + neighbour_boids_pos[i] = boid_positions[neighbour_ids[i]]; + neighbour_boids_dir[i] = boid_dirs[neighbour_ids[i]]; + } + int to_boids_count_8 = nearest_multiple(neighbour_chunk->count, 8); + __m256 *to_list_pos_x = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256)); + __m256 *to_list_pos_y = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256)); + __m256 *to_list_dir_x = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256)); + __m256 *to_list_dir_y = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256)); + vector2_list_to_simd8(neighbour_boids_pos, to_boids_count_8, to_list_pos_x, to_list_pos_y); + vector2_list_to_simd8(neighbour_boids_dir, to_boids_count_8, to_list_dir_x, to_list_dir_y); + + uboid_t boid1; + BoidsListNodeIterator it1 = boid_list_get_iterator(chunk); + while (boid_list_iterator_next(&it1, &boid1)) { + b2l_cmp cmp = {}; + cmp.from = boid1; + cmp.to_list = neighbour_ids; + cmp.to_list_count = neighbour_chunk->count; + cmp.to_list_pos_x = to_list_pos_x; + cmp.to_list_pos_y = to_list_pos_y; + cmp.to_list_dir_x = to_list_dir_x; + cmp.to_list_dir_y = to_list_dir_y; + cmp.to_list_pos_count = to_boids_count_8/8; + b2l_cmps.push_back(cmp); + } + } + + for (int i = 0; i < b2l_cmps.size(); i++) { + b2l_cmp *cmp = &b2l_cmps[i]; + uboid_t from_boid = cmp->from; + + Vector2 from_pos = boid_positions[from_boid]; + Vector2 from_dir = boid_dirs[from_boid]; + + float view_radius_sqr = world->view_radius * world->view_radius; + + // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); + float dot_threshold_single = cosf(world->view_angle/2); + __m256 dot_threshold = _mm256_set1_ps(dot_threshold_single); + + __m256 view_radius = _mm256_set1_ps(view_radius_sqr); + __m256 from_pos_x = _mm256_set1_ps(from_pos.x); + __m256 from_pos_y = _mm256_set1_ps(from_pos.y); + __m256 from_dir_x = _mm256_set1_ps(from_dir.x); + __m256 from_dir_y = _mm256_set1_ps(from_dir.y); + __m256 zero = _mm256_set1_ps(0); + __m256 negative_one = _mm256_set1_ps(-1); + __m256i to_list_count = _mm256_set1_epi32(cmp->to_list_count); + + for (int j = 0; j < cmp->to_list_pos_count; j++) { + __m256 to_pos_x = cmp->to_list_pos_x[j]; + __m256 to_pos_y = cmp->to_list_pos_y[j]; + __m256 to_dir_x = cmp->to_list_dir_x[j]; + __m256 to_dir_y = cmp->to_list_dir_y[j]; + + __m256 sub_x = _mm256_sub_ps(from_pos_x, to_pos_x); + __m256 sub_y = _mm256_sub_ps(from_pos_y, cmp->to_list_pos_y[j]); + + __m256 x_sqr = _mm256_mul_ps(sub_x, sub_x); + __m256 length_sqr = _mm256_fmadd_ps(sub_y, sub_y, x_sqr); + __m256i in_range_mask = (__m256i)_mm256_cmp_ps(length_sqr, view_radius, _CMP_LE_OQ); + + __m256 is_length_zero = _mm256_cmp_ps(length_sqr, zero, _CMP_EQ_OQ); + __m256 ilength = _mm256_blendv_ps(_mm256_rsqrt_ps(length_sqr), zero, is_length_zero); + + __m256 x_norm = _mm256_mul_ps(sub_x, ilength); + __m256 y_norm = _mm256_mul_ps(sub_y, ilength); + + __m256 x_neg_norm = _mm256_mul_ps(x_norm, negative_one); + __m256 y_neg_norm = _mm256_mul_ps(y_norm, negative_one); + + __m256 dot_product1 = _mm256_fmadd_ps(from_dir_y, y_neg_norm, _mm256_mul_ps(from_dir_x, x_neg_norm)); + __m256 in_angle_mask1 = _mm256_cmp_ps(dot_product1, dot_threshold, _CMP_GE_OQ); + __m256 do_append_mask1 = _mm256_and_ps(in_angle_mask1, (__m256)in_range_mask); + + __m256 dot_product2 = _mm256_fmadd_ps(to_dir_y, y_norm, _mm256_mul_ps(to_dir_x, x_norm)); + __m256 in_angle_mask2 = _mm256_cmp_ps(dot_product2, dot_threshold, _CMP_GE_OQ); + __m256 do_append_mask2 = _mm256_and_ps(in_angle_mask2, (__m256)in_range_mask); + + _mm256_store_ps((float*)do_append_mask1_f32, do_append_mask1); + _mm256_store_ps((float*)do_append_mask2_f32, do_append_mask2); + for (int k = 0; k < 8; k++) { + uboid_t to_boid_idx = 8*j + k; + if (to_boid_idx >= cmp->to_list_count) break; + + uboid_t to_boid = cmp->to_list[to_boid_idx]; + if (do_append_mask1_f32[k]) { + boid_list_append(&world->frame_arena, &all_local_boids[from_boid], to_boid); + interactions++; + } + if (do_append_mask2_f32[k]) { + boid_list_append(&world->frame_arena, &all_local_boids[to_boid], from_boid); + interactions++; + } + } + } + } + + /* + uboid_t chunk_boids[chunk->count]; + boid_list_to_array(chunk_boids, chunk); + for (int i = 0; i < chunk->count-1; i++) { + uboid_t from_boid = chunk_boids[i]; + uboid_t *to_boids = &chunk_boids[i+1]; + uboid_t to_boids_count = chunk->count-i-1; + assign_local_boids_b2l(world, all_local_boids, from_boid, to_boids, to_boids_count); + } + + for (int i = 0; i < ARRAY_LEN(neighbours); i++) { + int chunk_y = y + neighbours[i].y; + int chunk_x = x + neighbours[i].x; + if (chunk_y < 0 || chunk_y >= chunks_high) continue; + if (chunk_x < 0 || chunk_x >= chunks_wide) continue; + + BoidList *neighbour_chunk = chunks[chunk_y][chunk_x]; + if (neighbour_chunk->count == 0) continue; + + uboid_t neighbour_ids[neighbour_chunk->count]; + boid_list_to_array(neighbour_ids, neighbour_chunk); + + uboid_t boid1; + BoidsListNodeIterator it1 = boid_list_get_iterator(chunk); + while (boid_list_iterator_next(&it1, &boid1)) { + assign_local_boids_b2l(world, all_local_boids, boid1, neighbour_ids, neighbour_chunk->count); + } + } + */ + } + + + } + RPROF_STOP(); + + RPROF_START("Apply forces"); + for (int i = 0; i < boid_count; i++) { + Boid *boid = &world->boids[i]; + Vector2 acc = { 0, 0 }; + + BoidList *local_boids = &all_local_boids[i]; + + if (local_boids->count > 0) { + Vector2 separation_force = { 0, 0 }; + Vector2 flock_center = { 0, 0 }; + Vector2 flock_heading = { 0, 0 }; + + uboid_t local_boid_id; + BoidsListNodeIterator it = boid_list_get_iterator(local_boids); + while (boid_list_iterator_next(&it, &local_boid_id)) { + Boid *local_boid = &boids[local_boid_id]; + flock_heading = Vector2Add(flock_heading, local_boid->dir); + flock_center = Vector2Add(flock_center , local_boid->pos); + + Vector2 pos_diff = Vector2Subtract(boid->pos, local_boid->pos); + float dist_sqr = Vector2LengthSqr(pos_diff); + if (dist_sqr <= world->separation_radius * world->separation_radius) { + separation_force = Vector2Add(separation_force, vector2_div_value(pos_diff, dist_sqr)); + } + } + flock_center = vector2_div_value(flock_center, local_boids->count); + + Vector2 alignment_force = Vector2Normalize(flock_heading); + acc = Vector2Add(acc, vector2_mul_value(alignment_force, world->alignment_strength)); + + Vector2 cohesion_force = Vector2Normalize(Vector2Subtract(flock_center, boid->pos)); + acc = Vector2Add(acc, vector2_mul_value(cohesion_force, world->cohesion_strength)); + + separation_force = Vector2Normalize(separation_force); + acc = Vector2Add(acc, vector2_mul_value(separation_force, world->separation_strength)); + } + + // Apply obstacle avoidance to accelaration + Vector2 collision_avoidance = get_collision_avoidance_dir(world, boid); + acc = Vector2Add(acc, vector2_mul_value(collision_avoidance, world->collision_avoidance_strength)); + + acc = vector2_mul_value(acc, world->max_speed); + + // Clamp accelaration + Vector2 clamped_acc = acc; + float acc_size = Vector2Length(acc); + if (acc_size > world->max_steer_speed) { + clamped_acc = vector2_mul_value(Vector2Normalize(acc), world->max_steer_speed); + } + + // Apply accelaration + Vector2 velocity = Vector2Multiply(boid->dir, { boid->speed, boid->speed }); + velocity = Vector2Add(velocity, vector2_mul_value(clamped_acc, dt)); + + boid->dir = Vector2Normalize(velocity); + boid->speed = Vector2Length(velocity); + + boid->speed = Clamp(boid->speed, world->min_speed, world->max_speed); + Vector2 step = vector2_mul_value(boid->dir, boid->speed * dt); + Vector2 target_pos = Vector2Add(boid->pos, step); + + // Check collisions + RayHitResult hit_result; + get_intersect_with_world(&hit_result, target_pos, step, world); + if (hit_result.hit == -1 || hit_result.hit > 2) { + boid->pos = target_pos; + } + + if (world->looping_walls) { + if (boid->pos.x >= world->size.x) { + boid->pos.x -= world->size.x; + } else if (boid->pos.x < 0) { + boid->pos.x += world->size.x; + } + if (boid->pos.y >= world->size.y) { + boid->pos.y -= world->size.y; + } else if (boid->pos.y < 0) { + boid->pos.y += world->size.y; + } + } else { + if (boid->pos.x >= world->size.x) { + boid->pos.x = world->size.x-1; + } else if (boid->pos.x < 0) { + boid->pos.x = 0; + } + if (boid->pos.y >= world->size.y) { + boid->pos.y = world->size.y-1; + } else if (boid->pos.y < 0) { + boid->pos.y = 0; + } + } + } + RPROF_STOP(); +} + +// --------------------- Draw ------------------------ + +static void draw_obstacle(Obstacle *obstacle, Color color) { + std::vector *points = &obstacle->points; + int point_count = points->size(); + + rlBegin(RL_TRIANGLES); + { + rlColor4ub(color.r, color.g, color.b, color.a); + for (int j = 0; j < point_count-1; j++) { + Vector2 *point1 = &(*points)[j]; + Vector2 *point2 = &(*points)[j+1]; + rlVertex2f(point1->x, point1->y); + rlVertex2f(obstacle->center.x, obstacle->center.y); + rlVertex2f(point2->x, point2->y); + } + + rlVertex2f((*points)[point_count-1].x, (*points)[point_count-1].y); + rlVertex2f(obstacle->center.x, obstacle->center.y); + rlVertex2f((*points)[0].x, (*points)[0].y); + } + rlEnd(); +} + +static void draw_obstacle_avoidance_rays(Visuals *visuals, World *world, Boid *boid) { + Vector2 pos = boid->pos; + + int ray_count = world->collision_avoidance_ray_count * 2 + 1; + float ray_angles[ray_count]; + fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle); + + float facing = std::atan2(boid->dir.y, boid->dir.x); + for (int i = 0; i < ray_count; i++) { + Vector2 ray_dir = { + std::cos(facing + ray_angles[i]), + std::sin(facing + ray_angles[i]) + }; + + RayHitResult hit_result; + get_intersect_with_world(&hit_result, pos, ray_dir, world); + bool hit_obstacle = (hit_result.hit != -1 && hit_result.hit <= world->collision_avoidance_distance); + + Color ray_color = GREEN; + float ray_length = world->collision_avoidance_distance; + if (hit_obstacle) { + ray_length = hit_result.hit; + ray_color = BLUE; + } + + Vector2 hit_pos = Vector2Add(pos, Vector2Multiply(ray_dir, { ray_length, ray_length })); + DrawLine(pos.x, pos.y, hit_pos.x, hit_pos.y, ray_color); + if (hit_obstacle) { + DrawCircle(hit_pos.x, hit_pos.y, visuals->boid_edge_size * 0.05, ray_color); + } + } +} + +static void draw_circle_sector(Vector2 center, float radius, float start_angle, float end_angle, int segments, Color color) { + rlBegin(RL_TRIANGLES); + float angle_step = (end_angle - start_angle) / segments; + for (int i = 0; i < segments; i++) + { + rlColor4ub(color.r, color.g, color.b, color.a); + float angle = start_angle + i * angle_step; + float nextAngle = start_angle + (i+1) * angle_step; + + rlVertex2f(center.x, center.y); + rlVertex2f(center.x + cosf(nextAngle)*radius, center.y + sinf(nextAngle)*radius); + rlVertex2f(center.x + cosf(angle) *radius, center.y + sinf(angle) *radius); + } + rlEnd(); +} + +static void world_draw(World *world, Visuals *visuals) { + for (int i = 0; i < world->obstacles.size(); i++) { + draw_obstacle(&world->obstacles[i], GRAY); + } + + if (visuals->draw_view_cone) { + Color view_cone_color = Fade(GRAY, 0.4); + for (int i = 0; i < world->boids.size(); i++) { + Boid *boid = &world->boids[i]; + Vector2 pos = boid->pos; + float facing = std::atan2(boid->dir.y, boid->dir.x); + + float view_angle = world->view_angle; + float segments = 16; + + draw_circle_sector(pos, world->view_radius, facing - view_angle/2, facing + view_angle/2, segments, view_cone_color); + } + } + + float boid_length = visuals->boid_edge_size * std::sqrt(3)/2; + float boid_width = visuals->boid_edge_size * 0.6; + for (int i = 0; i < world->boids.size(); i++) { + Boid *boid = &world->boids[i]; + + if (visuals->draw_collision_avoidance_rays) { + draw_obstacle_avoidance_rays(visuals, world, boid); + } + + if (visuals->draw_separation_radius) { + DrawCircleLines(boid->pos.x, boid->pos.y, world->separation_radius, MAGENTA); + } + + Vector2 triangle[] = { + { boid_length*2/3.0f, 0 }, + { -boid_length*1/3.0f, -boid_width/2 }, + { -boid_length*1/3.0f, boid_width/2 }, + }; + + float facing = std::atan2(boid->dir.y, boid->dir.x); + for (int i = 0; i < 3; i++) { + triangle[i] = Vector2Add(boid->pos, Vector2Rotate(triangle[i], facing)); + } + + DrawTriangle(triangle[0], triangle[1], triangle[2], visuals->boid_color); + + if (visuals->draw_boid_direction) { + DrawCircle(boid->pos.x, boid->pos.y, visuals->boid_edge_size * 0.05, RED); + Vector2 look_pos = Vector2Add(boid->pos, vector2_mul_value(boid->dir, visuals->boid_edge_size*1.5)); + DrawLine(boid->pos.x, boid->pos.y, look_pos.x, look_pos.y, RED); + } + } +}