optimize checking neighbours

This commit is contained in:
Rokas Puzonas 2023-07-20 23:17:21 +03:00
parent c6fb55cbe6
commit 2184918195
6 changed files with 313 additions and 152 deletions

View File

@ -6,22 +6,28 @@ PLATFORM ?= desktop
SHELL := /bin/zsh
TOP_BUILD_DIR := build
EXECUTABLE := boids-playground
WEB_SHELL := src/shell.html
SUBMODULES_PATH := depends
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare
# default stack - 65536
# 67108864 = 64MiB
WEB_HEAP_SIZE := 1105199104
WEB_STACK_SIZE := 262144
WEB_SHELL := src/shell.html
COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -s -O0
LINKER_FLAGS := -lraylib
# SOURCES := $(wildcard src/*.cpp)
SOURCES := src/main.cpp
COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/
# ----------------- Prepare variables for targets ------------------
EXT :=
EMSDK_PATH := $(SUBMODULES_PATH)/emsdk
RAYLIB_PLATFORM := PLATFORM_DESKTOP
COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/
COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raylib/src
COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raylib-cpp/include
@ -65,10 +71,13 @@ ifeq ($(PLATFORM), web)
EMSCRIPTEN_PATH ?= $(EMSDK_PATH)/upstream/emscripten
COMPILER_FLAGS += -I$(EMSCRIPTEN_PATH)/cache/sysroot/include
COMPILER_FLAGS += -D_DEFAULT_SOURCE
LINKER_FLAGS += -s USE_GLFW=3
LINKER_FLAGS += -s FORCE_FILESYSTEM=1
LINKER_FLAGS += $(RAYLIB_RELEASE_PATH)/libraylib.a
LINKER_FLAGS += --shell-file $(WEB_SHELL)
LINKER_FLAGS += -s TOTAL_MEMORY=$(WEB_HEAP_SIZE)
LINKER_FLAGS += -s STACK_SIZE=$(WEB_STACK_SIZE)
LIB_DEPENDENCIES += emsdk
endif

View File

@ -4,10 +4,14 @@
#include <assert.h>
#include <raylib-cpp.hpp>
#include "memory-arena.hpp"
#define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0]))
#define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__)
#define ASSERT(...) assert(__VA_ARGS__)
#define MAX_BOIDS 65536 // 65536 = 2^16
struct Boid {
Vector2 pos;
Vector2 dir;
@ -32,28 +36,29 @@ struct World {
std::vector<Boid> boids;
std::vector<Obstacle> obstacles;
float view_radius = 100;
MemoryArena frame_arena;
float view_radius = 15;
float view_angle = PI*1.5;
float min_speed = 50;
float max_speed = 80;
float min_speed = 30;
float max_speed = 50;
float max_steer_speed = 100;
float separation_radius = 50;
float separation_radius = 10;
float alignment_strength = 1;
float cohesion_strength = 1;
float separation_strength = 5;
float collision_avoidance_strength = 50;
float collision_avoidance_distance = 75;
float collision_avoidance_distance = 30;
float collision_avoidance_ray_angle = PI/1.5;
int collision_avoidance_ray_count = 3;
// TODO: Function `get_boids_in_view_cone` doesn't work as expected with looping walls
bool looping_walls = false;
bool looping_walls = true;
};
struct Visuals {
float boid_edge_size = 40;
float boid_edge_size = 8;
Color boid_color = BLACK;
Color bg_color = RAYWHITE;
@ -77,3 +82,16 @@ struct UI {
bool separation_strength_edit = false;
bool collision_avoidance_strength_edit = false;
};
#define BOIDS_PER_NODE 32
struct BoidsListNode {
uint16_t boid_ids[BOIDS_PER_NODE];
BoidsListNode *next;
};
struct BoidsListNodeIterator {
BoidsListNode *node;
int i;
uint16_t count;
};

View File

@ -4,7 +4,7 @@
#include <cmath>
#include <optional>
#ifdef PLATFORM_WEB
#ifdef __EMSCRIPTEN__
#include <emscripten/emscripten.h>
#endif
@ -17,6 +17,7 @@
#include "boid-playground.hpp"
#include "raycast.cpp"
#include "memory-arena.cpp"
#define FRAMERATE 60
#define TIME_PER_FRAME (1.0/FRAMERATE)
@ -52,6 +53,15 @@ static void boid_rand_init(World *world, Boid *boid, float border) {
boid->speed = GetRandomValue(world->min_speed, world->max_speed);
}
static void world_init(World *world, float width, float height) {
arena_init(&world->frame_arena, 1024 * 1024 * 1024);
g_world.size = { width, height };
}
static void world_free(World *world) {
arena_free(&world->frame_arena);
}
static Vector2 get_center_point(std::vector<Vector2> &points) {
Vector2 center = { 0, 0 };
for (int i = 0; i < points.size(); i++) {
@ -179,47 +189,98 @@ static void draw_circle_sector(Vector2 center, float radius, float start_angle,
rlEnd();
}
static int get_boids_in_view_cone(Boid **boids_in_view, Boid *boid, float view_radius, float view_angle, Boid *boids, int boid_count) {
int count = 0;
float dot_threshold = Vector2DotProduct(boid->dir, Vector2Rotate(boid->dir, view_angle/2));
static BoidsListNodeIterator boid_list_get_iterator(BoidsListNode *node, uint16_t count) {
return { .node = node, .i = 0, .count = count };
}
for (int i = 0; i < boid_count; i++) {
if (&boids[i] == boid) continue;
Vector2 dir_to_boid = Vector2Normalize(Vector2Subtract(boids[i].pos, boid->pos));
float dot = Vector2DotProduct(boid->dir, dir_to_boid);
if (dot >= dot_threshold && Vector2DistanceSqr(boids[i].pos, boid->pos) <= view_radius * view_radius) {
boids_in_view[count] = &boids[i];
count++;
}
static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *value) {
if (iterator->count == 0) {
return false;
}
return count;
if (iterator->i == BOIDS_PER_NODE) {
iterator->i = 0;
iterator->node = iterator->node->next;
}
*value = iterator->node->boid_ids[iterator->i];
iterator->i++;
iterator->count--;
return true;
}
static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boids, uint16_t *count, uint16_t new_boid) {
int left_count = *count;
BoidsListNode *last = local_boids;
BoidsListNode *curr = local_boids;
while (left_count > 0 && curr) {
for (int i = 0; i < MIN(left_count, BOIDS_PER_NODE); i++) {
if (curr->boid_ids[i] == new_boid) return;
}
last = curr;
curr = curr->next;
left_count -= BOIDS_PER_NODE;
}
int idx = (*count) % BOIDS_PER_NODE;
if (idx == BOIDS_PER_NODE-1) {
last->next = (BoidsListNode*)arena_malloc(arena, sizeof(BoidsListNode));
last->next->next = NULL;
}
last->boid_ids[idx] = new_boid;
(*count)++;
}
static void assign_local_boids(World *world, BoidsListNode *local_boids, uint16_t *local_boid_counts, Boid *boids, uint16_t boid1, uint16_t boid2) {
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
float dot_threshold = cosf(world->view_angle/2);
Vector2 offset = Vector2Subtract(boids[boid1].pos, boids[boid2].pos);
bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius);
if (with_in_range) {
float dot = Vector2DotProduct(boids[boid1].dir, Vector2Normalize(offset));
if (-dot >= dot_threshold) {
boid_list_append_unique(&world->frame_arena, &local_boids[boid1], &local_boid_counts[boid1], boid2);
}
if (dot >= dot_threshold) {
boid_list_append_unique(&world->frame_arena, &local_boids[boid2], &local_boid_counts[boid2], boid1);
}
}
}
static void world_update(World *world, float dt) {
arena_clear(&world->frame_arena);
Boid *boids = world->boids.data();
int boid_count = world->boids.size();
RPROF_START("Create groups");
Boid **all_local_boids[boid_count];
int all_local_boid_counts[boid_count];
assert(boid_count <= MAX_BOIDS);
RPROF_START("Alloc groups");
// LocalBoidsListNode all_local_boids[boid_count];
BoidsListNode *all_local_boids = (BoidsListNode*)arena_malloc(&world->frame_arena, boid_count * sizeof(BoidsListNode));
uint16_t all_local_boid_counts[boid_count];
for (int i = 0; i < boid_count; i++) {
all_local_boids[i] = (Boid **)malloc(boid_count * sizeof(Boid*));
// all_local_boids[i] = (uint16_t *)arena_malloc(&world->frame_arena, boid_count * sizeof(uint16_t));
all_local_boids[i].next = NULL;
all_local_boid_counts[i] = 0;
}
RPROF_STOP();
// float chunk_size = world->size.x;
float chunk_size = world->view_radius/2;
int chunks_wide = std::ceil(world->size.x / chunk_size);
int chunks_high = std::ceil(world->size.y / chunk_size);
size_t alloc_chunks = world->frame_arena.offset;
float chunk_size = std::max(world->view_radius, 15.0f);
int chunks_wide = std::ceil(world->size.x / chunk_size) + 1;
int chunks_high = std::ceil(world->size.y / chunk_size) + 1;
RPROF_START("Alloc chunks");
int *chunks[chunks_high][chunks_wide];
int chunk_boid_counts[chunks_high][chunks_wide];
uint16_t *chunks[chunks_high][chunks_wide];
uint16_t chunk_boid_counts[chunks_high][chunks_wide];
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
chunks[y][x] = (int*)malloc(boid_count * sizeof(int));
chunks[y][x] = (uint16_t*)arena_malloc(&world->frame_arena, boid_count * sizeof(uint16_t));
chunk_boid_counts[y][x] = 0;
}
}
@ -231,111 +292,77 @@ static void world_update(World *world, float dt) {
int chunk_x = boid->pos.x / chunk_size;
int chunk_y = boid->pos.y / chunk_size;
int *boids_in_chunk = &chunk_boid_counts[chunk_y][chunk_x];
chunks[chunk_y][chunk_x][*boids_in_chunk] = i;
(*boids_in_chunk)++;
}
RPROF_STOP();
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
float dot_threshold = cosf(world->view_angle/2);
RPROF_START("alloc checked_local_boids");
bool *checked_local_boids[boid_count];
for (int i = 0; i < boid_count; i++) {
checked_local_boids[i] = (bool*)calloc(boid_count, sizeof(bool));
uint16_t *count = &chunk_boid_counts[chunk_y][chunk_x];
chunks[chunk_y][chunk_x][*count] = i;
(*count)++;
}
RPROF_STOP();
RPROF_START("Calc dot products and ranges (chunked)");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &boids[i];
int boid_chunk_x = boid->pos.x / chunk_size;
int boid_chunk_y = boid->pos.y / chunk_size;
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_high; x++) {
uint16_t *chunk = chunks[y][x];
size_t chunk_boid_count = chunk_boid_counts[y][x];
Vector2 chunk_offset[] = {
{ -1, -1, }, { 0, -1, }, { 1, -1, },
{ -1, 0, }, { 0, 0, }, { 1, 0, },
{ -1, 1, }, { 0, 1, }, { 1, 1, },
};
for (int j = 0; j < 9; j++) {
int chunk_x = boid_chunk_x + chunk_offset[j].x;
int chunk_y = boid_chunk_y + chunk_offset[j].y;
if (chunk_x < 0 || chunk_y < 0 || chunk_x >= chunks_wide || chunk_y >= chunks_high) continue;
for (int oy = -1; oy <= 1; oy++) {
int chunk_y = x + oy;
if (chunk_y < 0 || chunk_y >= chunks_high) continue;
int *chunk = chunks[chunk_y][chunk_x];
int boids_in_chunk = chunk_boid_counts[chunk_y][chunk_x];
for (int k = 0; k < boids_in_chunk; k++) {
int other_boid = chunk[k];
if (i == other_boid) continue;
if (checked_local_boids[i][other_boid]) continue;
if (checked_local_boids[other_boid][other_boid]) continue;
for (int ox = -1; ox <= 1; ox++) {
int chunk_x = y + ox;
if (chunk_x < 0 || chunk_x >= chunks_wide) continue;
Vector2 offset = Vector2Subtract(boids[i].pos, boids[other_boid].pos);
uint16_t *neighbour_chunk = chunks[chunk_y][chunk_x];
size_t neighbour_chunk_boid_count = chunk_boid_counts[chunk_y][chunk_x];
bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius);
if (with_in_range) {
float dot = Vector2DotProduct(boids[i].dir, Vector2Normalize(offset));
for (int i = 0; i < chunk_boid_count; i++) {
int boid1 = chunk[i];
for (int j = 0; j < neighbour_chunk_boid_count; j++) {
int boid2 = chunk[j];
if (boid1 == boid2) continue;
if (-dot >= dot_threshold) {
int *count = &all_local_boid_counts[i];
all_local_boids[i][*count] = &boids[other_boid];
(*count)++;
}
if (dot >= dot_threshold) {
int *count = &all_local_boid_counts[other_boid];
all_local_boids[other_boid][*count] = &boids[i];
(*count)++;
assign_local_boids(world, all_local_boids, all_local_boid_counts, boids, boid1, boid2);
}
}
// uint16_t boid1;
// BoidsListNodeIterator it1 = boid_list_get_iterator(chunk, chunk_boid_count);
// while (boid_list_iterator_next(&it1, &boid1)) {
// uint16_t boid2;
// BoidsListNodeIterator it2 = boid_list_get_iterator(neighbour_chunk, neighbour_chunk_boid_count);
// while (boid_list_iterator_next(&it2, &boid2)) {
// if (boid1 == boid2) continue;
//
// assign_local_boids(world, all_local_boids, all_local_boid_counts, boids, boid1, boid2);
// }
// }
}
checked_local_boids[i][other_boid] = true;
checked_local_boids[other_boid][i] = true;
}
}
}
RPROF_STOP();
RPROF_START("Free chunks");
for (int y = 0; y < chunks_high; y++) {
for (int x = 0; x < chunks_wide; x++) {
free(chunks[y][x]);
}
}
RPROF_STOP();
RPROF_START("free checked_local_boids");
for (int i = 0; i < boid_count; i++) {
free(checked_local_boids[i]);
}
RPROF_STOP();
RPROF_START("Apply forces");
for (int i = 0; i < boid_count; i++) {
Boid *boid = &world->boids[i];
Vector2 acc = { 0, 0 };
// Boid **local_boids = NULL;
// int local_boids_count = 0;
Boid **local_boids = all_local_boids[i];
BoidsListNode *local_boids = &all_local_boids[i];
int local_boids_count = all_local_boid_counts[i];
// Boid *local_boids[world->boids.size()];
// int local_boids_count = get_boids_in_view_cone(local_boids, boid, world->view_radius, world->view_angle, world->boids.data(), world->boids.size());
if (local_boids_count > 0) {
// LogTrace("i:%d", i);
// LogTrace("%d", local_boids_count);
Vector2 separation_force = { 0, 0 };
Vector2 flock_center = { 0, 0 };
Vector2 flock_heading = { 0, 0 };
for (int j = 0; j < local_boids_count; j++) {
// LogTrace("%d", local_boids);
flock_heading = Vector2Add(flock_heading, local_boids[j]->dir);
flock_center = Vector2Add(flock_center , local_boids[j]->pos);
Vector2 pos_diff = Vector2Subtract(boid->pos, local_boids[j]->pos);
uint16_t local_boid_id;
BoidsListNodeIterator it = boid_list_get_iterator(local_boids, local_boids_count);
while (boid_list_iterator_next(&it, &local_boid_id)) {
Boid *local_boid = &boids[local_boid_id];
flock_heading = Vector2Add(flock_heading, local_boid->dir);
flock_center = Vector2Add(flock_center , local_boid->pos);
Vector2 pos_diff = Vector2Subtract(boid->pos, local_boid->pos);
float dist_sqr = Vector2LengthSqr(pos_diff);
if (dist_sqr <= world->separation_radius * world->separation_radius) {
separation_force = Vector2Add(separation_force, vector2_div_value(pos_diff, dist_sqr));
@ -385,25 +412,30 @@ static void world_update(World *world, float dt) {
}
if (world->looping_walls) {
if (boid->pos.x > world->size.x) {
if (boid->pos.x >= world->size.x) {
boid->pos.x -= world->size.x;
} else if (boid->pos.x < 0) {
boid->pos.x += world->size.x;
}
if (boid->pos.y > world->size.y) {
if (boid->pos.y >= world->size.y) {
boid->pos.y -= world->size.y;
} else if (boid->pos.y < 0) {
boid->pos.y += world->size.y;
}
} else {
if (boid->pos.x >= world->size.x) {
boid->pos.x = world->size.x-1;
} else if (boid->pos.x < 0) {
boid->pos.x = 0;
}
if (boid->pos.y >= world->size.y) {
boid->pos.y = world->size.y-1;
} else if (boid->pos.y < 0) {
boid->pos.y = 0;
}
}
}
RPROF_STOP();
RPROF_START("Free groups");
for (int i = 0; i < boid_count; i++) {
free(all_local_boids[i]);
}
RPROF_STOP();
}
static void world_draw(World *world, Visuals *visuals) {
@ -513,7 +545,7 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show separation radius", &visuals->draw_separation_radius);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show collision rays", &visuals->draw_collision_avoidance_rays);
GuiCheckBox(next_in_layout(&layout, 15, 15), "Show pulling forces", &visuals->draw_pulling_forces);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 2.5, 50);
Rectangle boid_color_rect = next_in_layout(&layout, 50, 50);
GuiColorPicker(boid_color_rect, NULL, &visuals->boid_color);
@ -532,15 +564,15 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 };
GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 10, 200);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 10, 400);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 2.5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 2.5, 150);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Min speed", &world->min_speed, 0, world->max_speed, &ui->min_speed_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Max speed", &world->max_speed, 0, 1000, &ui->max_speed_edit);
gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Steer speed", &world->max_steer_speed, 0, 1000, &ui->steer_speed_edit);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 10, 200);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 5, 100);
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision ray angle", &world->collision_avoidance_ray_angle, 0, PI);
GuiSpinner(next_in_layout(&layout, 100, 15, 95), "Collision ray count", &world->collision_avoidance_ray_count, 1, 10, false);
}
@ -556,6 +588,10 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
float window_width = GetScreenWidth();
DrawFPS(window_width - 90, 10);
char boid_label[128] = { 0 };
snprintf(boid_label, sizeof(boid_label), "%lu boids", world->boids.size());
DrawText(boid_label, window_width - 125, 35, 20, GREEN);
}
void UpdateDrawFrame() {
@ -600,7 +636,7 @@ int estimate_maximum_boid_count(World *world) {
}
uint64_t best_duration = UINT64_MAX;
for (int i = 0; i < 10; i++) {
for (int i = 0; i < 20; i++) {
uint64_t start = rprof_read_cpu_timer();
world_update(&g_world, TIME_PER_FRAME);
uint64_t end = rprof_read_cpu_timer();
@ -634,25 +670,24 @@ int estimate_maximum_boid_count(World *world) {
}
int main() {
// printf("%d\n", estimate_maximum_boid_count(&g_world));
rprof_init();
int screen_width = 1280;
int screen_height = 720;
g_world.size = { (float)screen_width, (float)screen_height };
world_init(&g_world, 1280, 720);
float border = g_visuals.boid_edge_size;
for (int i = 0; i < 5000; i++) {
for (int i = 0; i < 50000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
rprof_init();
for (int i = 0; i < 5; i++) {
for (int i = 0; i < 1; i++) {
world_update(&g_world, TIME_PER_FRAME);
}
printf("arena: %ld (%.03fMiB)\n", g_world.frame_arena.offset, (float)g_world.frame_arena.offset / 1024 / 1024);
world_free(&g_world);
rprof_end();
rprof_output(NULL);
@ -660,7 +695,7 @@ int main() {
return 0;
}
int test_main() {
int foo_main() {
SetTraceLogLevel(LOG_TRACE);
int screen_width = 1280;
@ -670,23 +705,20 @@ int test_main() {
raylib::Window window(screen_width, screen_height, "Boid Playground");
window.SetState(FLAG_VSYNC_HINT);
g_world.size = { (float)screen_width, (float)screen_height };
GuiLoadStyleDefault();
rprof_init();
world_init(&g_world, screen_width, screen_height);
float border = g_world.collision_avoidance_distance;
for (int i = 0; i < 3000; i++) {
for (int i = 0; i < 50000; i++) {
Boid boid;
boid_rand_init(&g_world, &boid, border);
g_world.boids.push_back(boid);
}
// g_world.boids.push_back({ .pos = { 100, 100 }, .dir = { 1, 0 }, .speed = 10 });
// g_world.boids.push_back({ .pos = { 100, 500 }, .dir = { 1, 0 }, .speed = 10 });
GuiLoadStyleDefault();
rprof_init();
#ifdef PLATFORM_WEB
#ifdef __EMSCRIPTEN__
emscripten_set_main_loop(UpdateDrawFrame, 0, 1);
#else
SetTargetFPS(FRAMERATE);
@ -696,6 +728,7 @@ int test_main() {
#endif
window.Close();
world_free(&g_world);
rprof_end();

65
src/memory-arena.cpp Normal file
View File

@ -0,0 +1,65 @@
#include <cstdio>
#include <cstring>
#include <cassert>
#include <cstdlib>
#include "memory-arena.hpp"
static bool is_power_of_two(uintptr_t x) {
return (x & (x-1)) == 0;
}
static uintptr_t align_forward(uintptr_t ptr, size_t align) {
assert(is_power_of_two(align));
// Same as (ptr % align) but faster as 'align' is a power of two
uintptr_t modulo = ptr & (align - 1);
if (modulo != 0) {
// If 'ptr' address is not aligned, push the address to the
// next value which is aligned
return ptr + align - modulo;
} else {
return ptr;
}
}
void arena_init(MemoryArena *arena, size_t size) {
arena->buffer = (uint8_t*)malloc(size);
arena->size = size;
arena->offset = 0;
}
void arena_free(MemoryArena *arena) {
free(arena->buffer);
arena->buffer = NULL;
arena->size = 0;
}
void* arena_malloc(MemoryArena *arena, size_t size, size_t align) {
uintptr_t curr_ptr = (uintptr_t)arena->buffer + arena->offset;
uintptr_t offset = align_forward(curr_ptr, align) - (uintptr_t)arena->buffer;
if (offset + size >= arena->size) {
// TODO: grow arena
assert(false && "Arena ran out of space");
return NULL;
}
uint8_t *ptr = &arena->buffer[offset];
arena->offset = offset + size;
return ptr;
}
void* arena_calloc(MemoryArena *arena, size_t size, size_t align) {
void* ptr = arena_malloc(arena, size, align);
if (ptr != NULL) {
memset(ptr, 0, size);
}
return ptr;
}
void arena_clear(MemoryArena *arena) {
arena->offset = 0;
}

20
src/memory-arena.hpp Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <cstddef>
#include <inttypes.h>
#ifndef ARENA_DEFAULT_ALIGNMENT
#define ARENA_DEFAULT_ALIGNMENT 2*(sizeof(void*))
#endif
struct MemoryArena {
uint8_t *buffer;
size_t size;
size_t offset;
};
void arena_init(MemoryArena *arena, size_t size);
void arena_free(MemoryArena *arena);
void* arena_malloc(MemoryArena *arena, size_t size, size_t align = ARENA_DEFAULT_ALIGNMENT);
void* arena_calloc(MemoryArena *arena, size_t size, size_t align = ARENA_DEFAULT_ALIGNMENT);
void arena_clear(MemoryArena *arena);

View File

@ -80,7 +80,6 @@ void rprof_output(prof_sort_cmp_cb sort_cb);
// ------------------------ CPU Timing -------------------------
#ifdef WIN32
#include <intrin.h>
#include <windows.h>
static uint64_t rprof_get_os_timer_hz(void)
@ -97,7 +96,6 @@ void rprof_output(prof_sort_cmp_cb sort_cb);
return Value.QuadPart;
}
#else
#include <x86intrin.h>
#include <time.h>
static uint64_t rprof_get_os_timer_hz(void)
@ -114,10 +112,28 @@ void rprof_output(prof_sort_cmp_cb sort_cb);
#endif // WIN32
static uint64_t rprof_read_cpu_timer(void)
{
return __rdtsc();
}
#ifdef WIN32
#include <intrin.h>
static uint64_t rprof_read_cpu_timer(void)
{
return __rdtsc();
}
#elif __EMSCRIPTEN__
static uint64_t rprof_read_cpu_timer(void)
{
return rprof_read_os_timer();
}
#else
#include <x86intrin.h>
static uint64_t rprof_read_cpu_timer(void)
{
return __rdtsc();
}
#endif
static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms)
{
@ -257,7 +273,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb);
}
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time);
uint32_t duration_max_width = 0;
uint32_t percent_max_width = 0;
@ -301,7 +317,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time);
}
#endif // RPROF_ONLY_TOTAL_TIME