use simd for finding neigbours between boids

This commit is contained in:
Rokas Puzonas 2023-07-31 00:10:49 +03:00
parent 9b2e3cafeb
commit 5af509f4fd
10 changed files with 963 additions and 618 deletions

View File

@ -14,7 +14,7 @@ WEB_HEAP_SIZE := 335544320
WEB_STACK_SIZE := 196608
WEB_SHELL := src/shell.html
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O2
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -O3 -g
LINKER_FLAGS := -lraylib
# SOURCES := $(wildcard src/*.cpp)
@ -79,6 +79,8 @@ ifeq ($(PLATFORM), web)
LINKER_FLAGS += -s TOTAL_MEMORY=$(WEB_HEAP_SIZE)
LINKER_FLAGS += -s STACK_SIZE=$(WEB_STACK_SIZE)
LIB_DEPENDENCIES += emsdk
else
COMPILER_FLAGS += -march=native
endif
LINKER_FLAGS += -L$(RAYLIB_RELEASE_PATH)

View File

@ -2,11 +2,17 @@
#include "boid-list.hpp"
static BoidsListNodeIterator boid_list_get_iterator(BoidsListNode *node, uint16_t count) {
return { .count = count, .i = 0, .node = node };
static void boid_list_init(BoidList *list)
{
list->node.next = NULL;
list->count = 0;
}
static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *value) {
static BoidsListNodeIterator boid_list_get_iterator(BoidList *list) {
return { .count = list->count, .i = 0, .node = &list->node };
}
static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uboid_t *value) {
if (iterator->count == 0) {
return false;
}
@ -22,10 +28,10 @@ static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *v
return true;
}
static void boid_list_append(MemoryArena *arena, BoidsListNode *node, uint16_t *count, uint16_t new_boid) {
int left_count = *count;
BoidsListNode *prev = node;
BoidsListNode *curr = node;
static void boid_list_append(MemoryArena *arena, BoidList *list, uboid_t new_boid) {
int left_count = list->count;
BoidListNode *prev = &list->node;
BoidListNode *curr = &list->node;
while (left_count >= BOIDS_PER_NODE && curr) {
prev = curr;
curr = curr->next;
@ -33,19 +39,19 @@ static void boid_list_append(MemoryArena *arena, BoidsListNode *node, uint16_t *
}
if (curr == NULL) {
curr = (BoidsListNode*)arena_malloc(arena, sizeof(BoidsListNode));
curr = (BoidListNode*)arena_malloc(arena, sizeof(BoidListNode));
curr->next = NULL;
prev->next = curr;
}
curr->boid_ids[left_count] = new_boid;
(*count)++;
list->count++;
}
static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boids, uint16_t *count, uint16_t new_boid) {
int left_count = *count;
BoidsListNode *last = local_boids;
BoidsListNode *curr = local_boids;
static void boid_list_append_unique(MemoryArena *arena, BoidList *list, uboid_t new_boid) {
int left_count = list->count;
BoidListNode *last = &list->node;
BoidListNode *curr = &list->node;
while (left_count > 0 && curr) {
for (int i = 0; i < std::min(left_count, BOIDS_PER_NODE); i++) {
if (curr->boid_ids[i] == new_boid) return;
@ -56,12 +62,23 @@ static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boi
left_count -= BOIDS_PER_NODE;
}
int idx = (*count) % BOIDS_PER_NODE;
int idx = list->count % BOIDS_PER_NODE;
if (idx == BOIDS_PER_NODE-1) {
last->next = (BoidsListNode*)arena_malloc(arena, sizeof(BoidsListNode));
last->next = (BoidListNode*)arena_malloc(arena, sizeof(BoidListNode));
last->next->next = NULL;
}
last->boid_ids[idx] = new_boid;
(*count)++;
list->count++;
}
static void boid_list_to_array(uboid_t *result, BoidList *list)
{
int i = 0;
uboid_t boid;
BoidsListNodeIterator it = boid_list_get_iterator(list);
while (boid_list_iterator_next(&it, &boid)) {
result[i] = boid;
i++;
}
}

View File

@ -1,22 +1,32 @@
#pragma once
#include <inttypes.h>
#include "boid-playground.hpp"
#include "memory-arena.hpp"
#define BOIDS_PER_NODE 128
#define BOIDS_PER_NODE 64
struct BoidsListNode {
BoidsListNode *next;
uint16_t boid_ids[BOIDS_PER_NODE];
struct BoidListNode {
BoidListNode *next;
uboid_t boid_ids[BOIDS_PER_NODE];
};
struct BoidList {
BoidListNode node;
uboid_t count;
};
struct BoidsListNodeIterator {
uint16_t count;
uboid_t count;
int i;
BoidsListNode *node;
BoidListNode *node;
};
static BoidsListNodeIterator boid_list_get_iterator(BoidsListNode *node, uint16_t count);
static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *value);
static BoidsListNodeIterator boid_list_get_iterator(BoidList *list);
static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uboid_t *value);
static void boid_list_append(MemoryArena *arena, BoidsListNode *node, uint16_t *count, uint16_t new_boid);
static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boids, uint16_t *count, uint16_t new_boid);
static void boid_list_init(BoidList *list);
static void boid_list_append(MemoryArena *arena, BoidList *list, uboid_t new_boid);
static void boid_list_append_unique(MemoryArena *arena, BoidList *list, uboid_t new_boid);
static void boid_list_to_array(uboid_t *result, BoidList *list);

View File

@ -3,14 +3,16 @@
#include <vector>
#include <assert.h>
#include <raylib-cpp.hpp>
#include "rlgl.h"
#include "memory-arena.hpp"
#define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0]))
#define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__)
#define ASSERT(...) assert(__VA_ARGS__)
#define DEBUG_ASSERT(...) assert(__VA_ARGS__)
#define MAX_BOIDS 65536 // 65536 = 2^16
typedef uint16_t uboid_t;
#define MAX_BOIDS 1 << (sizeof(uboid_t)*8)
struct Boid {
Vector2 pos;
@ -23,14 +25,6 @@ struct Obstacle {
std::vector<Vector2> points;
};
struct RayHitResult {
float hit = -1;
// TODO: `line1` and `line2` are not used, maybe remove them?
Vector2 line1;
Vector2 line2;
};
struct World {
Vector2 size;
std::vector<Boid> boids;
@ -55,6 +49,8 @@ struct World {
// TODO: Function `get_boids_in_view_cone` doesn't work as expected with looping walls
bool looping_walls = true;
bool freeze = false;
};
struct Visuals {

View File

@ -1,6 +1,5 @@
#include "raylib.h"
#include "raymath.h"
#include "rlgl.h"
#include <cmath>
#include <optional>
@ -8,17 +7,23 @@
#include <emscripten/emscripten.h>
#endif
#define RAYGUI_IMPLEMENTATION
#include "raygui.h"
#define RPROF_IMPLEMENTATION
// #define RPROF_STUB_OUT
// #define RPROF_ONLY_TOTAL_TIME
#include "rprof.h"
#include "boid-playground.hpp"
#include "raycast.cpp"
#include "memory-arena.cpp"
#include "boid-list.cpp"
#include "world.cpp"
#include "ui.cpp"
//#define USE_TEST_MAIN
#define RAYGUI_IMPLEMENTATION
#include "raygui.h"
#define FRAMERATE 60
#define TIME_PER_FRAME (1.0/FRAMERATE)
@ -27,517 +32,82 @@ static World g_world;
static Visuals g_visuals;
static UI g_ui;
static float vector2_atan2(Vector2 a) {
return std::atan2(a.y, a.x);
}
void UpdateDrawFrame();
static void profiling_test();
static Vector2 vector2_mul_value(Vector2 v, float value) {
return { v.x * value, v.y * value };
}
int main() {
// profiling_test();
// return 0;
static Vector2 vector2_div_value(Vector2 v, float value) {
return { v.x / value, v.y / value };
}
SetTraceLogLevel(LOG_TRACE);
static Vector2 vector2_from_angle(float angle) {
return { std::cos(angle), std::sin(angle) };
}
int screen_width = 1280;
int screen_height = 720;
static void boid_rand_init(World *world, Boid *boid, float border) {
float world_width = world->size.x;
float world_height = world->size.y;
boid->pos.x = GetRandomValue(border, world_width-border);
boid->pos.y = GetRandomValue(border, world_height-border);
raylib::Window window(screen_width, screen_height, "Boid Playground");
window.SetState(FLAG_VSYNC_HINT);
float facing = GetRandomValue(0, 2*PI);
boid->dir = Vector2Rotate({ 1, 0 }, facing);
boid->speed = GetRandomValue(world->min_speed, world->max_speed);
}
GuiLoadStyleDefault();
static void world_init(World *world, float width, float height) {
arena_init(&world->frame_arena, 1024 * 1024 * 256);
g_world.size = { width, height };
}
rprof_init();
static void world_free(World *world) {
arena_free(&world->frame_arena);
}
world_init(&g_world, screen_width, screen_height);
static Vector2 get_center_point(std::vector<Vector2> &points) {
Vector2 center = { 0, 0 };
for (int i = 0; i < points.size(); i++) {
center.x += points[i].x;
center.y += points[i].y;
float border = g_world.collision_avoidance_distance;
for (int i = 0; i < 10000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
center.x /= points.size();
center.y /= points.size();
return center;
#ifdef __EMSCRIPTEN__
emscripten_set_main_loop(UpdateDrawFrame, 0, 1);
#else
SetTargetFPS(FRAMERATE);
while (!window.ShouldClose()) {
UpdateDrawFrame();
}
#endif
window.Close();
world_free(&g_world);
rprof_end();
rprof_output(NULL);
return 0;
}
static void draw_obstacle(Obstacle *obstacle, Color color) {
std::vector<Vector2> *points = &obstacle->points;
int point_count = points->size();
rlBegin(RL_TRIANGLES);
static void profiling_test() {
rprof_init();
{
rlColor4ub(color.r, color.g, color.b, color.a);
for (int j = 0; j < point_count-1; j++) {
Vector2 *point1 = &(*points)[j];
Vector2 *point2 = &(*points)[j+1];
rlVertex2f(point1->x, point1->y);
rlVertex2f(obstacle->center.x, obstacle->center.y);
rlVertex2f(point2->x, point2->y);
world_init(&g_world, 1280, 720);
SetRandomSeed(10);
float border = g_visuals.boid_edge_size;
for (int i = 0; i < 45000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
rlVertex2f((*points)[point_count-1].x, (*points)[point_count-1].y);
rlVertex2f(obstacle->center.x, obstacle->center.y);
rlVertex2f((*points)[0].x, (*points)[0].y);
}
rlEnd();
}
static void draw_obstacle_avoidance_rays(Visuals *visuals, World *world, Boid *boid) {
Vector2 pos = boid->pos;
int ray_count = world->collision_avoidance_ray_count * 2 + 1;
float ray_angles[ray_count];
fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle);
float facing = std::atan2(boid->dir.y, boid->dir.x);
for (int i = 0; i < ray_count; i++) {
Vector2 ray_dir = {
std::cos(facing + ray_angles[i]),
std::sin(facing + ray_angles[i])
};
RayHitResult hit_result;
get_intersect_with_world(&hit_result, pos, ray_dir, world);
bool hit_obstacle = (hit_result.hit != -1 && hit_result.hit <= world->collision_avoidance_distance);
Color ray_color = GREEN;
float ray_length = world->collision_avoidance_distance;
if (hit_obstacle) {
ray_length = hit_result.hit;
ray_color = BLUE;
for (int i = 0; i < FRAMERATE; i++) {
world_update(&g_world, TIME_PER_FRAME);
}
Vector2 hit_pos = Vector2Add(pos, Vector2Multiply(ray_dir, { ray_length, ray_length }));
DrawLine(pos.x, pos.y, hit_pos.x, hit_pos.y, ray_color);
if (hit_obstacle) {
DrawCircle(hit_pos.x, hit_pos.y, visuals->boid_edge_size * 0.05, ray_color);
}
printf("arena: %ld (%.03fMiB)\n", g_world.frame_arena.offset, (float)g_world.frame_arena.offset / 1024 / 1024);
world_free(&g_world);
}
}
rprof_end();
static Vector2 get_collision_avoidance_dir(World *world, Boid *boid) {
int ray_count = world->collision_avoidance_ray_count * 2 + 1;
float ray_angles[ray_count];
fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle);
int best_avoidance = -1;
Vector2 avoidance_dir = { 0, 0 };
float facing = std::atan2(boid->dir.y, boid->dir.x);
bool got_hit = false;
RayHitResult hit_results[ray_count];
for (int i = 0; i < ray_count; i++) {
Vector2 ray_dir = vector2_from_angle(facing + ray_angles[i]);
get_intersect_with_world(&hit_results[i], boid->pos, ray_dir, world);
if (hit_results[i].hit != -1 && hit_results[i].hit <= world->collision_avoidance_distance) {
got_hit = true;
}
if (hit_results[i].hit > hit_results[best_avoidance].hit || best_avoidance == -1) {
avoidance_dir = ray_dir;
best_avoidance = i;
}
printf("interactions: %d\n", interactions);
if (interactions != 33119854) { // 22 051 739
printf("!!!!!! ITERACTIONS DONT MATCH, %d\n", interactions - 33119854);
}
if (got_hit) {
return avoidance_dir;
} else {
return { 0, 0 };
}
}
static int count_out_of_bounds_boids(World *world) {
int count = 0;
for (int i = 0; i < world->boids.size(); i++) {
Vector2 *pos = &world->boids[i].pos;
bool x_out_of_bounds = (pos->x <= 0 || pos->x >= world->size.x);
bool y_out_of_bounds = (pos->y <= 0 || pos->y >= world->size.y);
if (x_out_of_bounds || y_out_of_bounds) {
count++;
}
}
return count;
}
static void draw_circle_sector(Vector2 center, float radius, float start_angle, float end_angle, int segments, Color color) {
rlBegin(RL_TRIANGLES);
float angle_step = (end_angle - start_angle) / segments;
for (int i = 0; i < segments; i++)
{
rlColor4ub(color.r, color.g, color.b, color.a);
float angle = start_angle + i * angle_step;
float nextAngle = start_angle + (i+1) * angle_step;
rlVertex2f(center.x, center.y);
rlVertex2f(center.x + cosf(nextAngle)*radius, center.y + sinf(nextAngle)*radius);
rlVertex2f(center.x + cosf(angle) *radius, center.y + sinf(angle) *radius);
}
rlEnd();
}
static void assign_local_boids(World *world, BoidsListNode *local_boids, uint16_t *local_boid_counts, Boid *boids, uint16_t boid1, uint16_t boid2) {
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
float dot_threshold = cosf(world->view_angle/2);
Vector2 offset = Vector2Subtract(boids[boid2].pos, boids[boid1].pos);
bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius);
if (with_in_range) {
float dot = Vector2DotProduct(boids[boid1].dir, Vector2Normalize(offset));
if (dot >= dot_threshold) {
boid_list_append(&world->frame_arena, &local_boids[boid1], &local_boid_counts[boid1], boid2);
}
}
}
static void world_update(World *world, float dt) {
arena_clear(&world->frame_arena);
Boid *boids = world->boids.data();
int boid_count = world->boids.size();
assert(boid_count <= MAX_BOIDS);
RPROF_START("Alloc groups");
BoidsListNode *all_local_boids = (BoidsListNode*)arena_malloc(&world->frame_arena, boid_count * sizeof(BoidsListNode));
uint16_t all_local_boid_counts[boid_count];
for (int i = 0; i < boid_count; i++) {
all_local_boids[i].next = NULL;
all_local_boid_counts[i] = 0;
}
RPROF_STOP();
size_t alloc_chunks = world->frame_arena.offset;
float chunk_size = std::max(world->view_radius, 15.0f);
int chunks_wide = std::ceil(world->size.x / chunk_size) + 1;
int chunks_high = std::ceil(world->size.y / chunk_size) + 1;
RPROF_START("Alloc chunks");
BoidsListNode *chunks[chunks_high][chunks_wide];
uint16_t chunk_boid_counts[chunks_high][chunks_wide];
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
chunks[y][x] = (BoidsListNode*)arena_malloc(&world->frame_arena, sizeof(BoidsListNode));
chunks[y][x]->next = NULL;
chunk_boid_counts[y][x] = 0;
}
}
RPROF_STOP();
RPROF_START("Creating chunks");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &boids[i];
int chunk_x = boid->pos.x / chunk_size;
int chunk_y = boid->pos.y / chunk_size;
BoidsListNode *node = chunks[chunk_y][chunk_x];
uint16_t *count = &chunk_boid_counts[chunk_y][chunk_x];
boid_list_append(&world->frame_arena, node, count, i);
}
RPROF_STOP();
RPROF_START("Calc dot products and ranges (chunked)");
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
BoidsListNode *chunk = chunks[y][x];
size_t chunk_boid_count = chunk_boid_counts[y][x];
if (chunk_boid_count == 0) continue;
for (int oy = -1; oy <= 1; oy++) {
int chunk_y = y + oy;
if (chunk_y < 0 || chunk_y >= chunks_high) continue;
for (int ox = -1; ox <= 1; ox++) {
int chunk_x = x + ox;
if (chunk_x < 0 || chunk_x >= chunks_wide) continue;
BoidsListNode *neighbour_chunk = chunks[chunk_y][chunk_x];
size_t neighbour_chunk_boid_count = chunk_boid_counts[chunk_y][chunk_x];
if (neighbour_chunk_boid_count == 0) continue;
uint16_t boid1;
BoidsListNodeIterator it1 = boid_list_get_iterator(chunk, chunk_boid_count);
while (boid_list_iterator_next(&it1, &boid1)) {
uint16_t boid2;
BoidsListNodeIterator it2 = boid_list_get_iterator(neighbour_chunk, neighbour_chunk_boid_count);
while (boid_list_iterator_next(&it2, &boid2)) {
if (boid1 == boid2) continue;
assign_local_boids(world, all_local_boids, all_local_boid_counts, boids, boid1, boid2);
}
}
}
}
}
}
RPROF_STOP();
RPROF_START("Apply forces");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &world->boids[i];
Vector2 acc = { 0, 0 };
BoidsListNode *local_boids = &all_local_boids[i];
int local_boids_count = all_local_boid_counts[i];
if (local_boids_count > 0) {
Vector2 separation_force = { 0, 0 };
Vector2 flock_center = { 0, 0 };
Vector2 flock_heading = { 0, 0 };
uint16_t local_boid_id;
BoidsListNodeIterator it = boid_list_get_iterator(local_boids, local_boids_count);
while (boid_list_iterator_next(&it, &local_boid_id)) {
Boid *local_boid = &boids[local_boid_id];
flock_heading = Vector2Add(flock_heading, local_boid->dir);
flock_center = Vector2Add(flock_center , local_boid->pos);
Vector2 pos_diff = Vector2Subtract(boid->pos, local_boid->pos);
float dist_sqr = Vector2LengthSqr(pos_diff);
if (dist_sqr <= world->separation_radius * world->separation_radius) {
separation_force = Vector2Add(separation_force, vector2_div_value(pos_diff, dist_sqr));
}
}
flock_center = vector2_div_value(flock_center, local_boids_count);
Vector2 alignment_force = Vector2Normalize(flock_heading);
acc = Vector2Add(acc, vector2_mul_value(alignment_force, world->alignment_strength));
Vector2 cohesion_force = Vector2Normalize(Vector2Subtract(flock_center, boid->pos));
acc = Vector2Add(acc, vector2_mul_value(cohesion_force, world->cohesion_strength));
separation_force = Vector2Normalize(separation_force);
acc = Vector2Add(acc, vector2_mul_value(separation_force, world->separation_strength));
}
// Apply obstacle avoidance to accelaration
Vector2 collision_avoidance = get_collision_avoidance_dir(world, boid);
acc = Vector2Add(acc, vector2_mul_value(collision_avoidance, world->collision_avoidance_strength));
acc = vector2_mul_value(acc, world->max_speed);
// Clamp accelaration
Vector2 clamped_acc = acc;
float acc_size = Vector2Length(acc);
if (acc_size > world->max_steer_speed) {
clamped_acc = vector2_mul_value(Vector2Normalize(acc), world->max_steer_speed);
}
// Apply accelaration
Vector2 velocity = Vector2Multiply(boid->dir, { boid->speed, boid->speed });
velocity = Vector2Add(velocity, vector2_mul_value(clamped_acc, dt));
boid->dir = Vector2Normalize(velocity);
boid->speed = Vector2Length(velocity);
boid->speed = Clamp(boid->speed, world->min_speed, world->max_speed);
Vector2 step = vector2_mul_value(boid->dir, boid->speed * dt);
Vector2 target_pos = Vector2Add(boid->pos, step);
// Check collisions
RayHitResult hit_result;
get_intersect_with_world(&hit_result, target_pos, step, world);
if (hit_result.hit == -1 || hit_result.hit > 2) {
boid->pos = target_pos;
}
if (world->looping_walls) {
if (boid->pos.x >= world->size.x) {
boid->pos.x -= world->size.x;
} else if (boid->pos.x < 0) {
boid->pos.x += world->size.x;
}
if (boid->pos.y >= world->size.y) {
boid->pos.y -= world->size.y;
} else if (boid->pos.y < 0) {
boid->pos.y += world->size.y;
}
} else {
if (boid->pos.x >= world->size.x) {
boid->pos.x = world->size.x-1;
} else if (boid->pos.x < 0) {
boid->pos.x = 0;
}
if (boid->pos.y >= world->size.y) {
boid->pos.y = world->size.y-1;
} else if (boid->pos.y < 0) {
boid->pos.y = 0;
}
}
}
RPROF_STOP();
}
static void world_draw(World *world, Visuals *visuals) {
for (int i = 0; i < world->obstacles.size(); i++) {
draw_obstacle(&world->obstacles[i], GRAY);
}
if (visuals->draw_view_cone) {
Color view_cone_color = Fade(GRAY, 0.4);
for (int i = 0; i < world->boids.size(); i++) {
Boid *boid = &world->boids[i];
Vector2 pos = boid->pos;
float facing = std::atan2(boid->dir.y, boid->dir.x);
float view_angle = world->view_angle;
float segments = 16;
draw_circle_sector(pos, world->view_radius, facing - view_angle/2, facing + view_angle/2, segments, view_cone_color);
}
}
float boid_length = visuals->boid_edge_size * std::sqrt(3)/2;
float boid_width = visuals->boid_edge_size * 0.6;
for (int i = 0; i < world->boids.size(); i++) {
Boid *boid = &world->boids[i];
if (visuals->draw_collision_avoidance_rays) {
draw_obstacle_avoidance_rays(visuals, world, boid);
}
if (visuals->draw_separation_radius) {
DrawCircleLines(boid->pos.x, boid->pos.y, world->separation_radius, MAGENTA);
}
Vector2 triangle[] = {
{ boid_length*2/3.0f, 0 },
{ -boid_length*1/3.0f, -boid_width/2 },
{ -boid_length*1/3.0f, boid_width/2 },
};
float facing = std::atan2(boid->dir.y, boid->dir.x);
for (int i = 0; i < 3; i++) {
triangle[i] = Vector2Add(boid->pos, Vector2Rotate(triangle[i], facing));
}
DrawTriangle(triangle[0], triangle[1], triangle[2], visuals->boid_color);
if (visuals->draw_boid_direction) {
DrawCircle(boid->pos.x, boid->pos.y, visuals->boid_edge_size * 0.05, RED);
Vector2 look_pos = Vector2Add(boid->pos, vector2_mul_value(boid->dir, visuals->boid_edge_size*1.5));
DrawLine(boid->pos.x, boid->pos.y, look_pos.x, look_pos.y, RED);
}
}
}
static Rectangle rect_with_offset(Rectangle rect, Vector2 offset)
{
return { rect.x + offset.x, rect.y + offset.y, rect.width, rect.height };
}
static Rectangle rect_with_offset(Rectangle rect, float x, float y)
{
return { rect.x + x, rect.y + y, rect.width, rect.height };
}
static int gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool edit_mode) {
int int_value = *value;
int result = GuiValueBox(rect, text, &int_value, min_value, max_value, edit_mode);
*value = int_value;
return result;
}
static void gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool *edit_mode) {
if (gui_valuebox_float(rect, text, value, min_value, max_value, *edit_mode)) {
*edit_mode = !*edit_mode;
}
}
struct VerticalLayout {
float x, y;
float gap;
};
static Rectangle next_in_layout(VerticalLayout *layout, float width, float height, float offset_x = 0) {
Rectangle rect = { layout->x + offset_x, layout->y, width, height };
layout->y += height + layout->gap;
return rect;
}
static void ui_draw(World *world, Visuals *visuals, UI *ui) {
if (!visuals->show_control_panel) {
visuals->show_control_panel = GuiButton({ 20, 20, 30, 30 }, GuiIconText(ICON_PENCIL_BIG, ""));
return;
}
float panel_height = 310;
visuals->show_control_panel = !GuiWindowBox({ 20, 20, 660, panel_height }, "Control panel");
float group_height = panel_height - 45;
GuiGroupBox({ 30, 55, 180, group_height }, "Visuals");
{
VerticalLayout layout = { .x = 40, .y = 65, .gap = 8 };
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show direction", &visuals->draw_boid_direction);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show view cone", &visuals->draw_view_cone);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show separation radius", &visuals->draw_separation_radius);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show collision rays", &visuals->draw_collision_avoidance_rays);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show pulling forces", &visuals->draw_pulling_forces);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 2.5, 50);
Rectangle boid_color_rect = next_in_layout(&layout, 50, 50);
GuiColorPicker(boid_color_rect, NULL, &visuals->boid_color);
GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, boid_color_rect.x, boid_color_rect.y), "Boid color");
Rectangle bg_color_rect = next_in_layout(&layout, 50, 50);
GuiColorPicker(bg_color_rect, NULL, &visuals->bg_color);
GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, bg_color_rect.x, bg_color_rect.y), "BG color");
// TODO: Add show FPS
// TODO: Add showing out of bounds boids
}
GuiGroupBox({ 220, 55, 220, group_height }, "Boid");
{
VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 };
GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 2.5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 2.5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Min speed", &world->min_speed, 0, 1000, &ui->min_speed_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Max speed", &world->max_speed, 0, 1000, &ui->max_speed_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Steer speed", &world->max_steer_speed, 0, 1000, &ui->steer_speed_edit);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 5, 100);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision ray angle", &world->collision_avoidance_ray_angle, 0, PI);
GuiSpinner(next_in_layout(&layout, 100, 15, 95), "Collision ray count", &world->collision_avoidance_ray_count, 1, 10, false);
}
GuiGroupBox({ 450, 55, 220, group_height }, "Flock");
{
VerticalLayout layout = { .x = 605, .y = 65, .gap = 8 };
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Alignment strength", &world->alignment_strength, 0, 100, &ui->alignment_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Cohesion strength", &world->cohesion_strength, 0, 100, &ui->cohesion_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Separation strength", &world->separation_strength, 0, 100, &ui->separation_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Collision avoidance strength", &world->collision_avoidance_strength, 0, 100, &ui->collision_avoidance_strength_edit);
}
float window_width = GetScreenWidth();
DrawFPS(window_width - 90, 10);
char boid_label[128] = { 0 };
snprintf(boid_label, sizeof(boid_label), "%lu boids", world->boids.size());
DrawText(boid_label, window_width - 125, 35, 20, GREEN);
rprof_output(NULL);
}
void UpdateDrawFrame() {
@ -566,72 +136,3 @@ void UpdateDrawFrame() {
EndDrawing();
}
void stress_test() {
rprof_init();
{
world_init(&g_world, 1280, 720);
float border = g_visuals.boid_edge_size;
for (int i = 0; i < MAX_BOIDS; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
for (int i = 0; i < 1; i++) {
world_update(&g_world, TIME_PER_FRAME);
}
printf("arena: %ld (%.03fMiB)\n", g_world.frame_arena.offset, (float)g_world.frame_arena.offset / 1024 / 1024);
world_free(&g_world);
}
rprof_end();
rprof_output(NULL);
}
int main() {
SetTraceLogLevel(LOG_TRACE);
int screen_width = 1280;
int screen_height = 720;
raylib::Color text_color(LIGHTGRAY);
raylib::Window window(screen_width, screen_height, "Boid Playground");
window.SetState(FLAG_VSYNC_HINT);
GuiLoadStyleDefault();
rprof_init();
world_init(&g_world, screen_width, screen_height);
float border = g_world.collision_avoidance_distance;
for (int i = 0; i < 30000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
// g_world.boids.push_back({ .pos = { 800, 105 }});
// g_world.boids.push_back({ .pos = { 800, 110 }});
#ifdef __EMSCRIPTEN__
emscripten_set_main_loop(UpdateDrawFrame, 0, 1);
#else
SetTargetFPS(FRAMERATE);
while (!window.ShouldClose()) {
UpdateDrawFrame();
}
#endif
window.Close();
world_free(&g_world);
rprof_end();
rprof_output(NULL);
return 0;
}

View File

@ -1,4 +1,4 @@
#include "boid-playground.hpp"
#include "raycast.hpp"
static float get_intersect_point(Vector2 ray_origin, Vector2 ray_dir, Vector2 line1, Vector2 line2) {
Vector2 line_dir = Vector2Subtract(line2, line1);
@ -61,17 +61,3 @@ static void get_intersect_with_world(RayHitResult *result, Vector2 ray_origin, V
get_intersect_with_polygon(result, ray_origin, ray_dir, lines, 4);
}
}
static void fill_avoidance_ray_angles(float *rays, int ray_count, float ray_angle) {
ASSERT(ray_count >= 1 && "Ray count must be at least 1");
ASSERT(((ray_count - 1) % 2 == 0) && "Ray count must be a multiple of 2n+1");
rays[0] = 0;
int side_ray_count = ((ray_count-1)/2);
float ray_angle_step = ray_angle / side_ray_count;
for (int i = 0; i < side_ray_count; i++) {
rays[2*i+0 + 1] = ray_angle_step * (i+1);
rays[2*i+1 + 1] = -ray_angle_step * (i+1);
}
}

16
src/raycast.hpp Normal file
View File

@ -0,0 +1,16 @@
#pragma once
#include "boid-playground.hpp"
struct RayHitResult {
float hit = -1;
// TODO: `line1` and `line2` are not used, maybe remove them?
Vector2 line1;
Vector2 line2;
};
static float get_intersect_point(Vector2 ray_origin, Vector2 ray_dir, Vector2 line1, Vector2 line2);
static void set_nearest_hit(RayHitResult *nearest_hit, float hit, Vector2 line1, Vector2 line2);
static void get_intersect_with_polygon(RayHitResult *result, Vector2 ray_origin, Vector2 ray_dir, Vector2 *points, int point_count);
static void get_intersect_with_obstacles(RayHitResult *result, Vector2 ray_origin, Vector2 ray_dir, std::vector<Obstacle> *obstacles);
static void get_intersect_with_world(RayHitResult *result, Vector2 ray_origin, Vector2 ray_dir, World *world);

View File

@ -81,7 +81,7 @@ void rprof_output(prof_sort_cmp_cb sort_cb);
#define RPROF_START(label) rprof_start(__COUNTER__, label)
#define RPROF_STOP() rprof_stop()
#define RPROF_IMPLEMENTATION
#define RPROF_IMPLEMENTATION // TODO: Remove this #define
#ifdef RPROF_IMPLEMENTATION
// ------------------------ CPU Timing -------------------------
@ -219,7 +219,7 @@ static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms)
qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb);
}
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
printf("\nTotal time taken: %.3fms (%lu) (CPU: ~%.3fGHz)\n", (float)total_time*1000/cpu_hz, total_time, (float)cpu_hz/1000000000);
uint32_t duration_max_width = 0;
uint32_t percent_max_width = 0;

105
src/ui.cpp Normal file
View File

@ -0,0 +1,105 @@
#include "boid-playground.hpp"
#include "raygui.h"
struct VerticalLayout {
float x, y;
float gap;
};
static Rectangle rect_with_offset(Rectangle rect, Vector2 offset)
{
return { rect.x + offset.x, rect.y + offset.y, rect.width, rect.height };
}
static Rectangle rect_with_offset(Rectangle rect, float x, float y)
{
return { rect.x + x, rect.y + y, rect.width, rect.height };
}
static int gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool edit_mode) {
int int_value = *value;
int result = GuiValueBox(rect, text, &int_value, min_value, max_value, edit_mode);
*value = int_value;
return result;
}
static void gui_valuebox_float(Rectangle rect, const char *text, float *value, float min_value, float max_value, bool *edit_mode) {
if (gui_valuebox_float(rect, text, value, min_value, max_value, *edit_mode)) {
*edit_mode = !*edit_mode;
}
}
static Rectangle next_in_layout(VerticalLayout *layout, float width, float height, float offset_x = 0) {
Rectangle rect = { layout->x + offset_x, layout->y, width, height };
layout->y += height + layout->gap;
return rect;
}
static void ui_draw(World *world, Visuals *visuals, UI *ui) {
if (!visuals->show_control_panel) {
visuals->show_control_panel = GuiButton({ 20, 20, 30, 30 }, GuiIconText(ICON_PENCIL_BIG, ""));
return;
}
float panel_height = 310;
visuals->show_control_panel = !GuiWindowBox({ 20, 20, 660, panel_height }, "Control panel");
float group_height = panel_height - 45;
GuiGroupBox({ 30, 55, 180, group_height }, "Visuals");
{
VerticalLayout layout = { .x = 40, .y = 65, .gap = 8 };
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show direction", &visuals->draw_boid_direction);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show view cone", &visuals->draw_view_cone);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show separation radius", &visuals->draw_separation_radius);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show collision rays", &visuals->draw_collision_avoidance_rays);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show pulling forces", &visuals->draw_pulling_forces);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 2.5, 50);
Rectangle boid_color_rect = next_in_layout(&layout, 50, 50);
GuiColorPicker(boid_color_rect, NULL, &visuals->boid_color);
GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, boid_color_rect.x, boid_color_rect.y), "Boid color");
Rectangle bg_color_rect = next_in_layout(&layout, 50, 50);
GuiColorPicker(bg_color_rect, NULL, &visuals->bg_color);
GuiLabel(rect_with_offset({ 80, 10, 80, 16 }, bg_color_rect.x, bg_color_rect.y), "BG color");
// TODO: Add show FPS
// TODO: Add showing out of bounds boids
}
GuiGroupBox({ 220, 55, 220, group_height }, "Boid");
{
VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 };
GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 2.5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 2.5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Min speed", &world->min_speed, 0, 1000, &ui->min_speed_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Max speed", &world->max_speed, 0, 1000, &ui->max_speed_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Steer speed", &world->max_steer_speed, 0, 1000, &ui->steer_speed_edit);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 5, 100);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision ray angle", &world->collision_avoidance_ray_angle, 0, PI);
GuiSpinner(next_in_layout(&layout, 100, 15, 95), "Collision ray count", &world->collision_avoidance_ray_count, 1, 10, false);
}
GuiGroupBox({ 450, 55, 220, group_height }, "Flock");
{
VerticalLayout layout = { .x = 605, .y = 65, .gap = 8 };
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Alignment strength", &world->alignment_strength, 0, 100, &ui->alignment_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Cohesion strength", &world->cohesion_strength, 0, 100, &ui->cohesion_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Separation strength", &world->separation_strength, 0, 100, &ui->separation_strength_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Collision avoidance strength", &world->collision_avoidance_strength, 0, 100, &ui->collision_avoidance_strength_edit);
}
float window_width = GetScreenWidth();
DrawFPS(window_width - 90, 10);
char boid_label[128] = { 0 };
snprintf(boid_label, sizeof(boid_label), "%lu boids", world->boids.size());
DrawText(boid_label, window_width - 125, 35, 20, GREEN);
}

712
src/world.cpp Normal file
View File

@ -0,0 +1,712 @@
#include "boid-playground.hpp"
#include "raycast.hpp"
#include "rprof.h"
#include "boid-list.hpp"
#include <immintrin.h>
static float vector2_atan2(Vector2 a) {
return std::atan2(a.y, a.x);
}
static Vector2 vector2_mul_value(Vector2 v, float value) {
return { v.x * value, v.y * value };
}
static Vector2 vector2_div_value(Vector2 v, float value) {
return { v.x / value, v.y / value };
}
static Vector2 vector2_from_angle(float angle) {
return { std::cos(angle), std::sin(angle) };
}
static Vector2 get_center_point(std::vector<Vector2> &points) {
Vector2 center = { 0, 0 };
for (int i = 0; i < points.size(); i++) {
center.x += points[i].x;
center.y += points[i].y;
}
center.x /= points.size();
center.y /= points.size();
return center;
}
static void fill_avoidance_ray_angles(float *rays, int ray_count, float ray_angle) {
DEBUG_ASSERT(ray_count >= 1 && "Ray count must be at least 1");
DEBUG_ASSERT(((ray_count - 1) % 2 == 0) && "Ray count must be a multiple of 2n+1");
rays[0] = 0;
int side_ray_count = ((ray_count-1)/2);
float ray_angle_step = ray_angle / side_ray_count;
for (int i = 0; i < side_ray_count; i++) {
rays[2*i+0 + 1] = ray_angle_step * (i+1);
rays[2*i+1 + 1] = -ray_angle_step * (i+1);
}
}
static Vector2 get_collision_avoidance_dir(World *world, Boid *boid) {
int ray_count = world->collision_avoidance_ray_count * 2 + 1;
float ray_angles[ray_count];
fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle);
int best_avoidance = -1;
Vector2 avoidance_dir = { 0, 0 };
float facing = std::atan2(boid->dir.y, boid->dir.x);
bool got_hit = false;
RayHitResult hit_results[ray_count];
for (int i = 0; i < ray_count; i++) {
Vector2 ray_dir = vector2_from_angle(facing + ray_angles[i]);
get_intersect_with_world(&hit_results[i], boid->pos, ray_dir, world);
if (hit_results[i].hit != -1 && hit_results[i].hit <= world->collision_avoidance_distance) {
got_hit = true;
}
if (hit_results[i].hit > hit_results[best_avoidance].hit || best_avoidance == -1) {
avoidance_dir = ray_dir;
best_avoidance = i;
}
}
if (got_hit) {
return avoidance_dir;
} else {
return { 0, 0 };
}
}
static int count_out_of_bounds_boids(World *world) {
int count = 0;
for (int i = 0; i < world->boids.size(); i++) {
Vector2 *pos = &world->boids[i].pos;
bool x_out_of_bounds = (pos->x <= 0 || pos->x >= world->size.x);
bool y_out_of_bounds = (pos->y <= 0 || pos->y >= world->size.y);
if (x_out_of_bounds || y_out_of_bounds) {
count++;
}
}
return count;
}
static void print_m256_f32(__m256 value)
{
float *value_f32 = (float*)&value;
printf("%f", value_f32[0]);
for (int i = 1; i < 8; i++) {
printf(",%f", value_f32[i]);
}
printf("\n");
}
// -------------------- Init/Cleanup ------------------------
static void boid_rand_init(World *world, Boid *boid, float border) {
float world_width = world->size.x;
float world_height = world->size.y;
boid->pos.x = GetRandomValue(border, world_width-border);
boid->pos.y = GetRandomValue(border, world_height-border);
float facing = GetRandomValue(0, 2*PI);
boid->dir = Vector2Rotate({ 1, 0 }, facing);
boid->speed = GetRandomValue(world->min_speed, world->max_speed);
}
static void world_init(World *world, float width, float height) {
arena_init(&world->frame_arena, 1024 * 1024 * 256);
world->size = { width, height };
}
static void world_free(World *world) {
arena_free(&world->frame_arena);
}
// --------------------- Update -----------------------
static int interactions = 0;
static int nearest_multiple(int num, int divisor)
{
return (num / divisor + (num % divisor > 0 ? 1 : 0)) * divisor;
}
// b2b = boid to boid comparison
static void assign_local_boids_b2b(World *world, BoidList *local_boids, uboid_t from_boid, uboid_t to_boid, Vector2 offset, float length_sqr)
{
assert(to_boid != from_boid);
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
float dot_threshold = cosf(world->view_angle/2);
bool with_in_range = length_sqr <= (world->view_radius * world->view_radius);
if (with_in_range) {
interactions++;
Vector2 normalized = offset;
if (length_sqr != 0)
{
float ilength = 1.0f/sqrtf(length_sqr);
normalized.x *= ilength;
normalized.y *= ilength;
}
// printf("----\n");
// printf("boid:%d->%d, lengths_sqr:%f, offset:(%f,%f), look:(%f,%f)\n", from_boid, to_boid, lengths_sqr, offset.x, offset.y, boids[from_boid].dir.x, boids[from_boid].dir.y);
Boid *boids = world->boids.data();
if (Vector2DotProduct(boids[from_boid].dir, Vector2Negate(normalized)) >= dot_threshold) {
boid_list_append(&world->frame_arena, &local_boids[from_boid], to_boid);
}
if (Vector2DotProduct(boids[to_boid].dir, normalized) >= dot_threshold) {
boid_list_append(&world->frame_arena, &local_boids[to_boid], from_boid);
}
}
}
// b2l = boid to (list of boids) comparison
static void assign_local_boids_b2l(World *world, BoidList *local_boids, uboid_t from_boid, uboid_t *to_boids, uboid_t to_boids_count)
{
Boid *boids = world->boids.data();
int to_boids_count_8 = nearest_multiple(to_boids_count, 8);
Vector2 to_positions[to_boids_count_8];
for (int i = 0; i < to_boids_count; i++) {
to_positions[i] = boids[to_boids[i]].pos;
}
// Vector2 offsets[to_boids_count_8];
// vector2_sub_simd8(offsets, boids[from_boid].pos, to_positions, to_boids_count_8);
// float lengths_sqrs[to_boids_count_8];
// vector2_length_sqr_simd8(lengths_sqrs, offsets, to_boids_count_8);
for (int i = 0; i < to_boids_count; i++) {
uint16_t to_boid = to_boids[i];
// Vector2 offset = offsets[i];
// float lengths_sqr = lengths_sqrs[i];
Vector2 offset = Vector2Subtract(boids[from_boid].pos, boids[to_boid].pos);
float lengths_sqr = Vector2LengthSqr(offset);
assign_local_boids_b2b(world, local_boids, from_boid, to_boid, offset, lengths_sqr);
}
}
static void vector2_list_to_simd8(Vector2 *vecs, int vec_count, __m256 *vecs_x, __m256 *vecs_y)
{
assert(vec_count % 8 == 0 && "Vector2 count must be divisible by 8");
for (int i = 0; i < vec_count/8; i++) {
vecs_x[i] = _mm256_set_ps(
vecs[8*i+7].x,
vecs[8*i+6].x,
vecs[8*i+5].x,
vecs[8*i+4].x,
vecs[8*i+3].x,
vecs[8*i+2].x,
vecs[8*i+1].x,
vecs[8*i+0].x
);
vecs_y[i] = _mm256_set_ps(
vecs[8*i+7].y,
vecs[8*i+6].y,
vecs[8*i+5].y,
vecs[8*i+4].y,
vecs[8*i+3].y,
vecs[8*i+2].y,
vecs[8*i+1].y,
vecs[8*i+0].y
);
}
}
static void world_update(World *world, float dt) {
if (world->freeze) return;
MemoryArena *arena = &world->frame_arena;
arena_clear(arena);
Boid *boids = world->boids.data();
int boid_count = world->boids.size();
assert(boid_count <= MAX_BOIDS);
RPROF_START("Alloc groups");
BoidList *all_local_boids = (BoidList*)arena_malloc(arena, boid_count * sizeof(BoidList));
for (int i = 0; i < boid_count; i++) {
boid_list_init(&all_local_boids[i]);
}
RPROF_STOP();
size_t alloc_chunks = world->frame_arena.offset;
float chunk_size = std::max(world->view_radius, 15.0f);
int chunks_wide = std::ceil(world->size.x / chunk_size) + 1;
int chunks_high = std::ceil(world->size.y / chunk_size) + 1;
RPROF_START("Alloc chunks");
BoidList *chunks[chunks_high][chunks_wide];
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
chunks[y][x] = (BoidList*)arena_malloc(arena, sizeof(BoidList));
boid_list_init(chunks[y][x]);
}
}
RPROF_STOP();
RPROF_START("Creating chunks");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &boids[i];
int chunk_x = boid->pos.x / chunk_size;
int chunk_y = boid->pos.y / chunk_size;
boid_list_append(arena, chunks[chunk_y][chunk_x], i);
}
RPROF_STOP();
RPROF_START("Extracting boid positions");
Vector2 *boid_dirs = (Vector2*)arena_malloc(arena, sizeof(Vector2)*boid_count);
Vector2 *boid_positions = (Vector2*)arena_malloc(arena, sizeof(Vector2)*boid_count);
for (int i = 0; i < boid_count; i++) {
boid_positions[i] = boids[i].pos;
boid_dirs[i] = boids[i].dir;
}
RPROF_STOP();
int chunk_cmps = 0;
RPROF_START("Calc dot products and ranges (chunked)");
// TODO: Use temp memory arena inside this profile block
// int32_t *in_range_mask_f32 = (int32_t*)arena_malloc(arena, sizeof(int32_t)*8, 32);
int32_t *do_append_mask1_f32 = (int32_t*)arena_malloc(arena, sizeof(int32_t)*8, 32);
int32_t *do_append_mask2_f32 = (int32_t*)arena_malloc(arena, sizeof(int32_t)*8, 32);
for (int y = 0; y < chunks_high; y++) {
Vector2 neighbours[] = { { 1, 0 }, { 0, 1 }, { 1, 1 }, { -1, 1 } };
struct b2l_cmp {
uboid_t from;
uboid_t *to_list;
uboid_t to_list_count;
__m256 *to_list_pos_x;
__m256 *to_list_pos_y;
__m256 *to_list_dir_x;
__m256 *to_list_dir_y;
int to_list_pos_count;
};
for (int x = 0; x < chunks_wide; x++) {
BoidList *chunk = chunks[y][x];
if (chunk->count == 0) continue;
std::vector<b2l_cmp> b2l_cmps; // TODO: remove usage of std::vec<T>, it is kinda slow
b2l_cmps.reserve(64);
uboid_t chunk_boids[chunk->count];
Vector2 chunk_boids_pos[chunk->count + 8];
Vector2 chunk_boids_dir[chunk->count + 8];
memset(chunk_boids_pos, 0, sizeof(Vector2) * (chunk->count + 8));
boid_list_to_array(chunk_boids, chunk);
for (int i = 0; i < chunk->count; i++) {
uboid_t boid = chunk_boids[i];
chunk_boids_pos[i] = boid_positions[boid];
chunk_boids_dir[i] = boid_dirs[boid];
}
for (int i = 0; i < chunk->count-1; i++) {
uboid_t from_boid = chunk_boids[i];
uboid_t *to_boids = &chunk_boids[i+1];
uboid_t to_boids_count = chunk->count-i-1;
Vector2 *to_chunk_boids_pos = &chunk_boids_pos[i+1];
Vector2 *to_chunk_boids_dir = &chunk_boids_dir[i+1];
b2l_cmp cmp = {};
cmp.from = from_boid;
cmp.to_list = to_boids;
cmp.to_list_count = to_boids_count;
int to_boids_count_8 = nearest_multiple(to_boids_count, 8);
cmp.to_list_pos_count = to_boids_count_8/8;
cmp.to_list_pos_x = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256));
cmp.to_list_pos_y = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256));
cmp.to_list_dir_x = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256));
cmp.to_list_dir_y = (__m256*)arena_malloc(arena, sizeof(__m256) * cmp.to_list_pos_count, sizeof(__m256));
vector2_list_to_simd8(to_chunk_boids_pos, to_boids_count_8, cmp.to_list_pos_x, cmp.to_list_pos_y);
vector2_list_to_simd8(to_chunk_boids_dir, to_boids_count_8, cmp.to_list_dir_x, cmp.to_list_dir_y);
b2l_cmps.push_back(cmp);
}
for (int i = 0; i < ARRAY_LEN(neighbours); i++) {
int chunk_y = y + neighbours[i].y;
int chunk_x = x + neighbours[i].x;
if (chunk_y < 0 || chunk_y >= chunks_high) continue;
if (chunk_x < 0 || chunk_x >= chunks_wide) continue;
BoidList *neighbour_chunk = chunks[chunk_y][chunk_x];
if (neighbour_chunk->count == 0) continue;
// TODO: alloc 'neighbour_ids' into scratch arena
uboid_t *neighbour_ids = (uboid_t*)arena_malloc(arena, sizeof(uboid_t)*neighbour_chunk->count);
boid_list_to_array(neighbour_ids, neighbour_chunk);
Vector2 neighbour_boids_pos[neighbour_chunk->count + 8];
Vector2 neighbour_boids_dir[neighbour_chunk->count + 8];
memset(neighbour_boids_pos, 0, sizeof(Vector2) * (neighbour_chunk->count + 8));
for (int i = 0; i < neighbour_chunk->count; i++) {
neighbour_boids_pos[i] = boid_positions[neighbour_ids[i]];
neighbour_boids_dir[i] = boid_dirs[neighbour_ids[i]];
}
int to_boids_count_8 = nearest_multiple(neighbour_chunk->count, 8);
__m256 *to_list_pos_x = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256));
__m256 *to_list_pos_y = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256));
__m256 *to_list_dir_x = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256));
__m256 *to_list_dir_y = (__m256*)arena_malloc(arena, sizeof(__m256) * to_boids_count_8/8, sizeof(__m256));
vector2_list_to_simd8(neighbour_boids_pos, to_boids_count_8, to_list_pos_x, to_list_pos_y);
vector2_list_to_simd8(neighbour_boids_dir, to_boids_count_8, to_list_dir_x, to_list_dir_y);
uboid_t boid1;
BoidsListNodeIterator it1 = boid_list_get_iterator(chunk);
while (boid_list_iterator_next(&it1, &boid1)) {
b2l_cmp cmp = {};
cmp.from = boid1;
cmp.to_list = neighbour_ids;
cmp.to_list_count = neighbour_chunk->count;
cmp.to_list_pos_x = to_list_pos_x;
cmp.to_list_pos_y = to_list_pos_y;
cmp.to_list_dir_x = to_list_dir_x;
cmp.to_list_dir_y = to_list_dir_y;
cmp.to_list_pos_count = to_boids_count_8/8;
b2l_cmps.push_back(cmp);
}
}
for (int i = 0; i < b2l_cmps.size(); i++) {
b2l_cmp *cmp = &b2l_cmps[i];
uboid_t from_boid = cmp->from;
Vector2 from_pos = boid_positions[from_boid];
Vector2 from_dir = boid_dirs[from_boid];
float view_radius_sqr = world->view_radius * world->view_radius;
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
float dot_threshold_single = cosf(world->view_angle/2);
__m256 dot_threshold = _mm256_set1_ps(dot_threshold_single);
__m256 view_radius = _mm256_set1_ps(view_radius_sqr);
__m256 from_pos_x = _mm256_set1_ps(from_pos.x);
__m256 from_pos_y = _mm256_set1_ps(from_pos.y);
__m256 from_dir_x = _mm256_set1_ps(from_dir.x);
__m256 from_dir_y = _mm256_set1_ps(from_dir.y);
__m256 zero = _mm256_set1_ps(0);
__m256 negative_one = _mm256_set1_ps(-1);
__m256i to_list_count = _mm256_set1_epi32(cmp->to_list_count);
for (int j = 0; j < cmp->to_list_pos_count; j++) {
__m256 to_pos_x = cmp->to_list_pos_x[j];
__m256 to_pos_y = cmp->to_list_pos_y[j];
__m256 to_dir_x = cmp->to_list_dir_x[j];
__m256 to_dir_y = cmp->to_list_dir_y[j];
__m256 sub_x = _mm256_sub_ps(from_pos_x, to_pos_x);
__m256 sub_y = _mm256_sub_ps(from_pos_y, cmp->to_list_pos_y[j]);
__m256 x_sqr = _mm256_mul_ps(sub_x, sub_x);
__m256 length_sqr = _mm256_fmadd_ps(sub_y, sub_y, x_sqr);
__m256i in_range_mask = (__m256i)_mm256_cmp_ps(length_sqr, view_radius, _CMP_LE_OQ);
__m256 is_length_zero = _mm256_cmp_ps(length_sqr, zero, _CMP_EQ_OQ);
__m256 ilength = _mm256_blendv_ps(_mm256_rsqrt_ps(length_sqr), zero, is_length_zero);
__m256 x_norm = _mm256_mul_ps(sub_x, ilength);
__m256 y_norm = _mm256_mul_ps(sub_y, ilength);
__m256 x_neg_norm = _mm256_mul_ps(x_norm, negative_one);
__m256 y_neg_norm = _mm256_mul_ps(y_norm, negative_one);
__m256 dot_product1 = _mm256_fmadd_ps(from_dir_y, y_neg_norm, _mm256_mul_ps(from_dir_x, x_neg_norm));
__m256 in_angle_mask1 = _mm256_cmp_ps(dot_product1, dot_threshold, _CMP_GE_OQ);
__m256 do_append_mask1 = _mm256_and_ps(in_angle_mask1, (__m256)in_range_mask);
__m256 dot_product2 = _mm256_fmadd_ps(to_dir_y, y_norm, _mm256_mul_ps(to_dir_x, x_norm));
__m256 in_angle_mask2 = _mm256_cmp_ps(dot_product2, dot_threshold, _CMP_GE_OQ);
__m256 do_append_mask2 = _mm256_and_ps(in_angle_mask2, (__m256)in_range_mask);
_mm256_store_ps((float*)do_append_mask1_f32, do_append_mask1);
_mm256_store_ps((float*)do_append_mask2_f32, do_append_mask2);
for (int k = 0; k < 8; k++) {
uboid_t to_boid_idx = 8*j + k;
if (to_boid_idx >= cmp->to_list_count) break;
uboid_t to_boid = cmp->to_list[to_boid_idx];
if (do_append_mask1_f32[k]) {
boid_list_append(&world->frame_arena, &all_local_boids[from_boid], to_boid);
interactions++;
}
if (do_append_mask2_f32[k]) {
boid_list_append(&world->frame_arena, &all_local_boids[to_boid], from_boid);
interactions++;
}
}
}
}
/*
uboid_t chunk_boids[chunk->count];
boid_list_to_array(chunk_boids, chunk);
for (int i = 0; i < chunk->count-1; i++) {
uboid_t from_boid = chunk_boids[i];
uboid_t *to_boids = &chunk_boids[i+1];
uboid_t to_boids_count = chunk->count-i-1;
assign_local_boids_b2l(world, all_local_boids, from_boid, to_boids, to_boids_count);
}
for (int i = 0; i < ARRAY_LEN(neighbours); i++) {
int chunk_y = y + neighbours[i].y;
int chunk_x = x + neighbours[i].x;
if (chunk_y < 0 || chunk_y >= chunks_high) continue;
if (chunk_x < 0 || chunk_x >= chunks_wide) continue;
BoidList *neighbour_chunk = chunks[chunk_y][chunk_x];
if (neighbour_chunk->count == 0) continue;
uboid_t neighbour_ids[neighbour_chunk->count];
boid_list_to_array(neighbour_ids, neighbour_chunk);
uboid_t boid1;
BoidsListNodeIterator it1 = boid_list_get_iterator(chunk);
while (boid_list_iterator_next(&it1, &boid1)) {
assign_local_boids_b2l(world, all_local_boids, boid1, neighbour_ids, neighbour_chunk->count);
}
}
*/
}
}
RPROF_STOP();
RPROF_START("Apply forces");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &world->boids[i];
Vector2 acc = { 0, 0 };
BoidList *local_boids = &all_local_boids[i];
if (local_boids->count > 0) {
Vector2 separation_force = { 0, 0 };
Vector2 flock_center = { 0, 0 };
Vector2 flock_heading = { 0, 0 };
uboid_t local_boid_id;
BoidsListNodeIterator it = boid_list_get_iterator(local_boids);
while (boid_list_iterator_next(&it, &local_boid_id)) {
Boid *local_boid = &boids[local_boid_id];
flock_heading = Vector2Add(flock_heading, local_boid->dir);
flock_center = Vector2Add(flock_center , local_boid->pos);
Vector2 pos_diff = Vector2Subtract(boid->pos, local_boid->pos);
float dist_sqr = Vector2LengthSqr(pos_diff);
if (dist_sqr <= world->separation_radius * world->separation_radius) {
separation_force = Vector2Add(separation_force, vector2_div_value(pos_diff, dist_sqr));
}
}
flock_center = vector2_div_value(flock_center, local_boids->count);
Vector2 alignment_force = Vector2Normalize(flock_heading);
acc = Vector2Add(acc, vector2_mul_value(alignment_force, world->alignment_strength));
Vector2 cohesion_force = Vector2Normalize(Vector2Subtract(flock_center, boid->pos));
acc = Vector2Add(acc, vector2_mul_value(cohesion_force, world->cohesion_strength));
separation_force = Vector2Normalize(separation_force);
acc = Vector2Add(acc, vector2_mul_value(separation_force, world->separation_strength));
}
// Apply obstacle avoidance to accelaration
Vector2 collision_avoidance = get_collision_avoidance_dir(world, boid);
acc = Vector2Add(acc, vector2_mul_value(collision_avoidance, world->collision_avoidance_strength));
acc = vector2_mul_value(acc, world->max_speed);
// Clamp accelaration
Vector2 clamped_acc = acc;
float acc_size = Vector2Length(acc);
if (acc_size > world->max_steer_speed) {
clamped_acc = vector2_mul_value(Vector2Normalize(acc), world->max_steer_speed);
}
// Apply accelaration
Vector2 velocity = Vector2Multiply(boid->dir, { boid->speed, boid->speed });
velocity = Vector2Add(velocity, vector2_mul_value(clamped_acc, dt));
boid->dir = Vector2Normalize(velocity);
boid->speed = Vector2Length(velocity);
boid->speed = Clamp(boid->speed, world->min_speed, world->max_speed);
Vector2 step = vector2_mul_value(boid->dir, boid->speed * dt);
Vector2 target_pos = Vector2Add(boid->pos, step);
// Check collisions
RayHitResult hit_result;
get_intersect_with_world(&hit_result, target_pos, step, world);
if (hit_result.hit == -1 || hit_result.hit > 2) {
boid->pos = target_pos;
}
if (world->looping_walls) {
if (boid->pos.x >= world->size.x) {
boid->pos.x -= world->size.x;
} else if (boid->pos.x < 0) {
boid->pos.x += world->size.x;
}
if (boid->pos.y >= world->size.y) {
boid->pos.y -= world->size.y;
} else if (boid->pos.y < 0) {
boid->pos.y += world->size.y;
}
} else {
if (boid->pos.x >= world->size.x) {
boid->pos.x = world->size.x-1;
} else if (boid->pos.x < 0) {
boid->pos.x = 0;
}
if (boid->pos.y >= world->size.y) {
boid->pos.y = world->size.y-1;
} else if (boid->pos.y < 0) {
boid->pos.y = 0;
}
}
}
RPROF_STOP();
}
// --------------------- Draw ------------------------
static void draw_obstacle(Obstacle *obstacle, Color color) {
std::vector<Vector2> *points = &obstacle->points;
int point_count = points->size();
rlBegin(RL_TRIANGLES);
{
rlColor4ub(color.r, color.g, color.b, color.a);
for (int j = 0; j < point_count-1; j++) {
Vector2 *point1 = &(*points)[j];
Vector2 *point2 = &(*points)[j+1];
rlVertex2f(point1->x, point1->y);
rlVertex2f(obstacle->center.x, obstacle->center.y);
rlVertex2f(point2->x, point2->y);
}
rlVertex2f((*points)[point_count-1].x, (*points)[point_count-1].y);
rlVertex2f(obstacle->center.x, obstacle->center.y);
rlVertex2f((*points)[0].x, (*points)[0].y);
}
rlEnd();
}
static void draw_obstacle_avoidance_rays(Visuals *visuals, World *world, Boid *boid) {
Vector2 pos = boid->pos;
int ray_count = world->collision_avoidance_ray_count * 2 + 1;
float ray_angles[ray_count];
fill_avoidance_ray_angles(ray_angles, ray_count, world->collision_avoidance_ray_angle);
float facing = std::atan2(boid->dir.y, boid->dir.x);
for (int i = 0; i < ray_count; i++) {
Vector2 ray_dir = {
std::cos(facing + ray_angles[i]),
std::sin(facing + ray_angles[i])
};
RayHitResult hit_result;
get_intersect_with_world(&hit_result, pos, ray_dir, world);
bool hit_obstacle = (hit_result.hit != -1 && hit_result.hit <= world->collision_avoidance_distance);
Color ray_color = GREEN;
float ray_length = world->collision_avoidance_distance;
if (hit_obstacle) {
ray_length = hit_result.hit;
ray_color = BLUE;
}
Vector2 hit_pos = Vector2Add(pos, Vector2Multiply(ray_dir, { ray_length, ray_length }));
DrawLine(pos.x, pos.y, hit_pos.x, hit_pos.y, ray_color);
if (hit_obstacle) {
DrawCircle(hit_pos.x, hit_pos.y, visuals->boid_edge_size * 0.05, ray_color);
}
}
}
static void draw_circle_sector(Vector2 center, float radius, float start_angle, float end_angle, int segments, Color color) {
rlBegin(RL_TRIANGLES);
float angle_step = (end_angle - start_angle) / segments;
for (int i = 0; i < segments; i++)
{
rlColor4ub(color.r, color.g, color.b, color.a);
float angle = start_angle + i * angle_step;
float nextAngle = start_angle + (i+1) * angle_step;
rlVertex2f(center.x, center.y);
rlVertex2f(center.x + cosf(nextAngle)*radius, center.y + sinf(nextAngle)*radius);
rlVertex2f(center.x + cosf(angle) *radius, center.y + sinf(angle) *radius);
}
rlEnd();
}
static void world_draw(World *world, Visuals *visuals) {
for (int i = 0; i < world->obstacles.size(); i++) {
draw_obstacle(&world->obstacles[i], GRAY);
}
if (visuals->draw_view_cone) {
Color view_cone_color = Fade(GRAY, 0.4);
for (int i = 0; i < world->boids.size(); i++) {
Boid *boid = &world->boids[i];
Vector2 pos = boid->pos;
float facing = std::atan2(boid->dir.y, boid->dir.x);
float view_angle = world->view_angle;
float segments = 16;
draw_circle_sector(pos, world->view_radius, facing - view_angle/2, facing + view_angle/2, segments, view_cone_color);
}
}
float boid_length = visuals->boid_edge_size * std::sqrt(3)/2;
float boid_width = visuals->boid_edge_size * 0.6;
for (int i = 0; i < world->boids.size(); i++) {
Boid *boid = &world->boids[i];
if (visuals->draw_collision_avoidance_rays) {
draw_obstacle_avoidance_rays(visuals, world, boid);
}
if (visuals->draw_separation_radius) {
DrawCircleLines(boid->pos.x, boid->pos.y, world->separation_radius, MAGENTA);
}
Vector2 triangle[] = {
{ boid_length*2/3.0f, 0 },
{ -boid_length*1/3.0f, -boid_width/2 },
{ -boid_length*1/3.0f, boid_width/2 },
};
float facing = std::atan2(boid->dir.y, boid->dir.x);
for (int i = 0; i < 3; i++) {
triangle[i] = Vector2Add(boid->pos, Vector2Rotate(triangle[i], facing));
}
DrawTriangle(triangle[0], triangle[1], triangle[2], visuals->boid_color);
if (visuals->draw_boid_direction) {
DrawCircle(boid->pos.x, boid->pos.y, visuals->boid_edge_size * 0.05, RED);
Vector2 look_pos = Vector2Add(boid->pos, vector2_mul_value(boid->dir, visuals->boid_edge_size*1.5));
DrawLine(boid->pos.x, boid->pos.y, look_pos.x, look_pos.y, RED);
}
}
}