From 218491819589b9be25a14c8ae3bbbf3546fe39a7 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Thu, 20 Jul 2023 23:17:21 +0300 Subject: [PATCH] optimize checking neighbours --- Makefile | 15 +- src/boid-playground.hpp | 32 ++++- src/main.cpp | 301 ++++++++++++++++++++++------------------ src/memory-arena.cpp | 65 +++++++++ src/memory-arena.hpp | 20 +++ src/rprof.h | 32 +++-- 6 files changed, 313 insertions(+), 152 deletions(-) create mode 100644 src/memory-arena.cpp create mode 100644 src/memory-arena.hpp diff --git a/Makefile b/Makefile index e79573b..dcd7a11 100644 --- a/Makefile +++ b/Makefile @@ -6,22 +6,28 @@ PLATFORM ?= desktop SHELL := /bin/zsh TOP_BUILD_DIR := build EXECUTABLE := boids-playground -WEB_SHELL := src/shell.html SUBMODULES_PATH := depends -COMPILER_FLAGS := -std=c++17 -Wno-enum-compare +# default stack - 65536 +# 67108864 = 64MiB +WEB_HEAP_SIZE := 1105199104 +WEB_STACK_SIZE := 262144 +WEB_SHELL := src/shell.html + +COMPILER_FLAGS := -std=c++17 -Wno-enum-compare -s -O0 LINKER_FLAGS := -lraylib # SOURCES := $(wildcard src/*.cpp) SOURCES := src/main.cpp +COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/ + # ----------------- Prepare variables for targets ------------------ EXT := EMSDK_PATH := $(SUBMODULES_PATH)/emsdk RAYLIB_PLATFORM := PLATFORM_DESKTOP -COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raygui/src/ COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raylib/src COMPILER_FLAGS += -I$(SUBMODULES_PATH)/raylib-cpp/include @@ -65,10 +71,13 @@ ifeq ($(PLATFORM), web) EMSCRIPTEN_PATH ?= $(EMSDK_PATH)/upstream/emscripten COMPILER_FLAGS += -I$(EMSCRIPTEN_PATH)/cache/sysroot/include + COMPILER_FLAGS += -D_DEFAULT_SOURCE LINKER_FLAGS += -s USE_GLFW=3 LINKER_FLAGS += -s FORCE_FILESYSTEM=1 LINKER_FLAGS += $(RAYLIB_RELEASE_PATH)/libraylib.a LINKER_FLAGS += --shell-file $(WEB_SHELL) + LINKER_FLAGS += -s TOTAL_MEMORY=$(WEB_HEAP_SIZE) + LINKER_FLAGS += -s STACK_SIZE=$(WEB_STACK_SIZE) LIB_DEPENDENCIES += emsdk endif diff --git a/src/boid-playground.hpp b/src/boid-playground.hpp index e55ea88..22ef10c 100644 --- a/src/boid-playground.hpp +++ b/src/boid-playground.hpp @@ -4,10 +4,14 @@ #include #include +#include "memory-arena.hpp" + #define ARRAY_LEN(arr) (sizeof(arr)/sizeof(arr[0])) #define LogTrace(...) TraceLog(LOG_TRACE, __VA_ARGS__) #define ASSERT(...) assert(__VA_ARGS__) +#define MAX_BOIDS 65536 // 65536 = 2^16 + struct Boid { Vector2 pos; Vector2 dir; @@ -32,28 +36,29 @@ struct World { std::vector boids; std::vector obstacles; - float view_radius = 100; + MemoryArena frame_arena; + float view_radius = 15; float view_angle = PI*1.5; - float min_speed = 50; - float max_speed = 80; + float min_speed = 30; + float max_speed = 50; float max_steer_speed = 100; - float separation_radius = 50; + float separation_radius = 10; float alignment_strength = 1; float cohesion_strength = 1; float separation_strength = 5; float collision_avoidance_strength = 50; - float collision_avoidance_distance = 75; + float collision_avoidance_distance = 30; float collision_avoidance_ray_angle = PI/1.5; int collision_avoidance_ray_count = 3; // TODO: Function `get_boids_in_view_cone` doesn't work as expected with looping walls - bool looping_walls = false; + bool looping_walls = true; }; struct Visuals { - float boid_edge_size = 40; + float boid_edge_size = 8; Color boid_color = BLACK; Color bg_color = RAYWHITE; @@ -77,3 +82,16 @@ struct UI { bool separation_strength_edit = false; bool collision_avoidance_strength_edit = false; }; + +#define BOIDS_PER_NODE 32 + +struct BoidsListNode { + uint16_t boid_ids[BOIDS_PER_NODE]; + BoidsListNode *next; +}; + +struct BoidsListNodeIterator { + BoidsListNode *node; + int i; + uint16_t count; +}; diff --git a/src/main.cpp b/src/main.cpp index 5fa0d03..3999404 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,7 +4,7 @@ #include #include -#ifdef PLATFORM_WEB +#ifdef __EMSCRIPTEN__ #include #endif @@ -17,6 +17,7 @@ #include "boid-playground.hpp" #include "raycast.cpp" +#include "memory-arena.cpp" #define FRAMERATE 60 #define TIME_PER_FRAME (1.0/FRAMERATE) @@ -52,6 +53,15 @@ static void boid_rand_init(World *world, Boid *boid, float border) { boid->speed = GetRandomValue(world->min_speed, world->max_speed); } +static void world_init(World *world, float width, float height) { + arena_init(&world->frame_arena, 1024 * 1024 * 1024); + g_world.size = { width, height }; +} + +static void world_free(World *world) { + arena_free(&world->frame_arena); +} + static Vector2 get_center_point(std::vector &points) { Vector2 center = { 0, 0 }; for (int i = 0; i < points.size(); i++) { @@ -179,47 +189,98 @@ static void draw_circle_sector(Vector2 center, float radius, float start_angle, rlEnd(); } -static int get_boids_in_view_cone(Boid **boids_in_view, Boid *boid, float view_radius, float view_angle, Boid *boids, int boid_count) { - int count = 0; - float dot_threshold = Vector2DotProduct(boid->dir, Vector2Rotate(boid->dir, view_angle/2)); +static BoidsListNodeIterator boid_list_get_iterator(BoidsListNode *node, uint16_t count) { + return { .node = node, .i = 0, .count = count }; +} - for (int i = 0; i < boid_count; i++) { - if (&boids[i] == boid) continue; - - Vector2 dir_to_boid = Vector2Normalize(Vector2Subtract(boids[i].pos, boid->pos)); - float dot = Vector2DotProduct(boid->dir, dir_to_boid); - if (dot >= dot_threshold && Vector2DistanceSqr(boids[i].pos, boid->pos) <= view_radius * view_radius) { - boids_in_view[count] = &boids[i]; - count++; - } +static bool boid_list_iterator_next(BoidsListNodeIterator *iterator, uint16_t *value) { + if (iterator->count == 0) { + return false; } - return count; + if (iterator->i == BOIDS_PER_NODE) { + iterator->i = 0; + iterator->node = iterator->node->next; + } + + *value = iterator->node->boid_ids[iterator->i]; + iterator->i++; + iterator->count--; + return true; +} + +static void boid_list_append_unique(MemoryArena *arena, BoidsListNode *local_boids, uint16_t *count, uint16_t new_boid) { + int left_count = *count; + BoidsListNode *last = local_boids; + BoidsListNode *curr = local_boids; + while (left_count > 0 && curr) { + for (int i = 0; i < MIN(left_count, BOIDS_PER_NODE); i++) { + if (curr->boid_ids[i] == new_boid) return; + } + + last = curr; + curr = curr->next; + left_count -= BOIDS_PER_NODE; + } + + int idx = (*count) % BOIDS_PER_NODE; + if (idx == BOIDS_PER_NODE-1) { + last->next = (BoidsListNode*)arena_malloc(arena, sizeof(BoidsListNode)); + last->next->next = NULL; + } + + last->boid_ids[idx] = new_boid; + (*count)++; +} + +static void assign_local_boids(World *world, BoidsListNode *local_boids, uint16_t *local_boid_counts, Boid *boids, uint16_t boid1, uint16_t boid2) { + // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); + float dot_threshold = cosf(world->view_angle/2); + + Vector2 offset = Vector2Subtract(boids[boid1].pos, boids[boid2].pos); + + bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius); + if (with_in_range) { + float dot = Vector2DotProduct(boids[boid1].dir, Vector2Normalize(offset)); + + if (-dot >= dot_threshold) { + boid_list_append_unique(&world->frame_arena, &local_boids[boid1], &local_boid_counts[boid1], boid2); + } + if (dot >= dot_threshold) { + boid_list_append_unique(&world->frame_arena, &local_boids[boid2], &local_boid_counts[boid2], boid1); + } + } } static void world_update(World *world, float dt) { + arena_clear(&world->frame_arena); + Boid *boids = world->boids.data(); int boid_count = world->boids.size(); - RPROF_START("Create groups"); - Boid **all_local_boids[boid_count]; - int all_local_boid_counts[boid_count]; + assert(boid_count <= MAX_BOIDS); + + RPROF_START("Alloc groups"); + // LocalBoidsListNode all_local_boids[boid_count]; + BoidsListNode *all_local_boids = (BoidsListNode*)arena_malloc(&world->frame_arena, boid_count * sizeof(BoidsListNode)); + uint16_t all_local_boid_counts[boid_count]; for (int i = 0; i < boid_count; i++) { - all_local_boids[i] = (Boid **)malloc(boid_count * sizeof(Boid*)); + // all_local_boids[i] = (uint16_t *)arena_malloc(&world->frame_arena, boid_count * sizeof(uint16_t)); + all_local_boids[i].next = NULL; all_local_boid_counts[i] = 0; } RPROF_STOP(); - // float chunk_size = world->size.x; - float chunk_size = world->view_radius/2; - int chunks_wide = std::ceil(world->size.x / chunk_size); - int chunks_high = std::ceil(world->size.y / chunk_size); + size_t alloc_chunks = world->frame_arena.offset; + float chunk_size = std::max(world->view_radius, 15.0f); + int chunks_wide = std::ceil(world->size.x / chunk_size) + 1; + int chunks_high = std::ceil(world->size.y / chunk_size) + 1; RPROF_START("Alloc chunks"); - int *chunks[chunks_high][chunks_wide]; - int chunk_boid_counts[chunks_high][chunks_wide]; + uint16_t *chunks[chunks_high][chunks_wide]; + uint16_t chunk_boid_counts[chunks_high][chunks_wide]; for (int y = 0; y < chunks_high; y++) { for (int x = 0; x < chunks_wide; x++) { - chunks[y][x] = (int*)malloc(boid_count * sizeof(int)); + chunks[y][x] = (uint16_t*)arena_malloc(&world->frame_arena, boid_count * sizeof(uint16_t)); chunk_boid_counts[y][x] = 0; } } @@ -231,111 +292,77 @@ static void world_update(World *world, float dt) { int chunk_x = boid->pos.x / chunk_size; int chunk_y = boid->pos.y / chunk_size; - int *boids_in_chunk = &chunk_boid_counts[chunk_y][chunk_x]; - chunks[chunk_y][chunk_x][*boids_in_chunk] = i; - (*boids_in_chunk)++; - } - RPROF_STOP(); - - // Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2)); - float dot_threshold = cosf(world->view_angle/2); - - RPROF_START("alloc checked_local_boids"); - bool *checked_local_boids[boid_count]; - for (int i = 0; i < boid_count; i++) { - checked_local_boids[i] = (bool*)calloc(boid_count, sizeof(bool)); + uint16_t *count = &chunk_boid_counts[chunk_y][chunk_x]; + chunks[chunk_y][chunk_x][*count] = i; + (*count)++; } RPROF_STOP(); RPROF_START("Calc dot products and ranges (chunked)"); - for (int i = 0; i < boid_count; i++) { - Boid *boid = &boids[i]; - int boid_chunk_x = boid->pos.x / chunk_size; - int boid_chunk_y = boid->pos.y / chunk_size; + for (int y = 0; y < chunks_high; y++) { + for (int x = 0; x < chunks_high; x++) { + uint16_t *chunk = chunks[y][x]; + size_t chunk_boid_count = chunk_boid_counts[y][x]; - Vector2 chunk_offset[] = { - { -1, -1, }, { 0, -1, }, { 1, -1, }, - { -1, 0, }, { 0, 0, }, { 1, 0, }, - { -1, 1, }, { 0, 1, }, { 1, 1, }, - }; - for (int j = 0; j < 9; j++) { - int chunk_x = boid_chunk_x + chunk_offset[j].x; - int chunk_y = boid_chunk_y + chunk_offset[j].y; - if (chunk_x < 0 || chunk_y < 0 || chunk_x >= chunks_wide || chunk_y >= chunks_high) continue; + for (int oy = -1; oy <= 1; oy++) { + int chunk_y = x + oy; + if (chunk_y < 0 || chunk_y >= chunks_high) continue; - int *chunk = chunks[chunk_y][chunk_x]; - int boids_in_chunk = chunk_boid_counts[chunk_y][chunk_x]; - for (int k = 0; k < boids_in_chunk; k++) { - int other_boid = chunk[k]; - if (i == other_boid) continue; - if (checked_local_boids[i][other_boid]) continue; - if (checked_local_boids[other_boid][other_boid]) continue; + for (int ox = -1; ox <= 1; ox++) { + int chunk_x = y + ox; + if (chunk_x < 0 || chunk_x >= chunks_wide) continue; - Vector2 offset = Vector2Subtract(boids[i].pos, boids[other_boid].pos); + uint16_t *neighbour_chunk = chunks[chunk_y][chunk_x]; + size_t neighbour_chunk_boid_count = chunk_boid_counts[chunk_y][chunk_x]; - bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius); - if (with_in_range) { - float dot = Vector2DotProduct(boids[i].dir, Vector2Normalize(offset)); + for (int i = 0; i < chunk_boid_count; i++) { + int boid1 = chunk[i]; + for (int j = 0; j < neighbour_chunk_boid_count; j++) { + int boid2 = chunk[j]; + if (boid1 == boid2) continue; - if (-dot >= dot_threshold) { - int *count = &all_local_boid_counts[i]; - all_local_boids[i][*count] = &boids[other_boid]; - (*count)++; - } - if (dot >= dot_threshold) { - int *count = &all_local_boid_counts[other_boid]; - all_local_boids[other_boid][*count] = &boids[i]; - (*count)++; + assign_local_boids(world, all_local_boids, all_local_boid_counts, boids, boid1, boid2); + } } + + // uint16_t boid1; + // BoidsListNodeIterator it1 = boid_list_get_iterator(chunk, chunk_boid_count); + // while (boid_list_iterator_next(&it1, &boid1)) { + // uint16_t boid2; + // BoidsListNodeIterator it2 = boid_list_get_iterator(neighbour_chunk, neighbour_chunk_boid_count); + // while (boid_list_iterator_next(&it2, &boid2)) { + // if (boid1 == boid2) continue; + // + // assign_local_boids(world, all_local_boids, all_local_boid_counts, boids, boid1, boid2); + // } + // } } - - checked_local_boids[i][other_boid] = true; - checked_local_boids[other_boid][i] = true; } } } RPROF_STOP(); - RPROF_START("Free chunks"); - for (int y = 0; y < chunks_high; y++) { - for (int x = 0; x < chunks_wide; x++) { - free(chunks[y][x]); - } - } - RPROF_STOP(); - - RPROF_START("free checked_local_boids"); - for (int i = 0; i < boid_count; i++) { - free(checked_local_boids[i]); - } - RPROF_STOP(); - RPROF_START("Apply forces"); for (int i = 0; i < boid_count; i++) { Boid *boid = &world->boids[i]; Vector2 acc = { 0, 0 }; - // Boid **local_boids = NULL; - // int local_boids_count = 0; - - Boid **local_boids = all_local_boids[i]; + BoidsListNode *local_boids = &all_local_boids[i]; int local_boids_count = all_local_boid_counts[i]; - // Boid *local_boids[world->boids.size()]; - // int local_boids_count = get_boids_in_view_cone(local_boids, boid, world->view_radius, world->view_angle, world->boids.data(), world->boids.size()); - if (local_boids_count > 0) { - // LogTrace("i:%d", i); - // LogTrace("%d", local_boids_count); Vector2 separation_force = { 0, 0 }; Vector2 flock_center = { 0, 0 }; Vector2 flock_heading = { 0, 0 }; - for (int j = 0; j < local_boids_count; j++) { - // LogTrace("%d", local_boids); - flock_heading = Vector2Add(flock_heading, local_boids[j]->dir); - flock_center = Vector2Add(flock_center , local_boids[j]->pos); - Vector2 pos_diff = Vector2Subtract(boid->pos, local_boids[j]->pos); + uint16_t local_boid_id; + BoidsListNodeIterator it = boid_list_get_iterator(local_boids, local_boids_count); + while (boid_list_iterator_next(&it, &local_boid_id)) { + Boid *local_boid = &boids[local_boid_id]; + flock_heading = Vector2Add(flock_heading, local_boid->dir); + flock_center = Vector2Add(flock_center , local_boid->pos); + + Vector2 pos_diff = Vector2Subtract(boid->pos, local_boid->pos); float dist_sqr = Vector2LengthSqr(pos_diff); if (dist_sqr <= world->separation_radius * world->separation_radius) { separation_force = Vector2Add(separation_force, vector2_div_value(pos_diff, dist_sqr)); @@ -385,25 +412,30 @@ static void world_update(World *world, float dt) { } if (world->looping_walls) { - if (boid->pos.x > world->size.x) { + if (boid->pos.x >= world->size.x) { boid->pos.x -= world->size.x; } else if (boid->pos.x < 0) { boid->pos.x += world->size.x; } - if (boid->pos.y > world->size.y) { + if (boid->pos.y >= world->size.y) { boid->pos.y -= world->size.y; } else if (boid->pos.y < 0) { boid->pos.y += world->size.y; } + } else { + if (boid->pos.x >= world->size.x) { + boid->pos.x = world->size.x-1; + } else if (boid->pos.x < 0) { + boid->pos.x = 0; + } + if (boid->pos.y >= world->size.y) { + boid->pos.y = world->size.y-1; + } else if (boid->pos.y < 0) { + boid->pos.y = 0; + } } } RPROF_STOP(); - - RPROF_START("Free groups"); - for (int i = 0; i < boid_count; i++) { - free(all_local_boids[i]); - } - RPROF_STOP(); } static void world_draw(World *world, Visuals *visuals) { @@ -513,7 +545,7 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) { GuiCheckBox(next_in_layout(&layout, 15, 15), "Show separation radius", &visuals->draw_separation_radius); GuiCheckBox(next_in_layout(&layout, 15, 15), "Show collision rays", &visuals->draw_collision_avoidance_rays); GuiCheckBox(next_in_layout(&layout, 15, 15), "Show pulling forces", &visuals->draw_pulling_forces); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 5, 150); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Boid size", &visuals->boid_edge_size, 2.5, 50); Rectangle boid_color_rect = next_in_layout(&layout, 50, 50); GuiColorPicker(boid_color_rect, NULL, &visuals->boid_color); @@ -532,15 +564,15 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) { VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 }; GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 10, 200); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 10, 400); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 2.5, 150); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 2.5, 150); GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI); gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Min speed", &world->min_speed, 0, world->max_speed, &ui->min_speed_edit); gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Max speed", &world->max_speed, 0, 1000, &ui->max_speed_edit); gui_valuebox_float(next_in_layout(&layout, 50, 15, 60), "Steer speed", &world->max_steer_speed, 0, 1000, &ui->steer_speed_edit); - GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 10, 200); + GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision distance", &world->collision_avoidance_distance, 5, 100); GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Collision ray angle", &world->collision_avoidance_ray_angle, 0, PI); GuiSpinner(next_in_layout(&layout, 100, 15, 95), "Collision ray count", &world->collision_avoidance_ray_count, 1, 10, false); } @@ -556,6 +588,10 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) { float window_width = GetScreenWidth(); DrawFPS(window_width - 90, 10); + + char boid_label[128] = { 0 }; + snprintf(boid_label, sizeof(boid_label), "%lu boids", world->boids.size()); + DrawText(boid_label, window_width - 125, 35, 20, GREEN); } void UpdateDrawFrame() { @@ -600,7 +636,7 @@ int estimate_maximum_boid_count(World *world) { } uint64_t best_duration = UINT64_MAX; - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 20; i++) { uint64_t start = rprof_read_cpu_timer(); world_update(&g_world, TIME_PER_FRAME); uint64_t end = rprof_read_cpu_timer(); @@ -634,25 +670,24 @@ int estimate_maximum_boid_count(World *world) { } int main() { - // printf("%d\n", estimate_maximum_boid_count(&g_world)); + rprof_init(); - int screen_width = 1280; - int screen_height = 720; - g_world.size = { (float)screen_width, (float)screen_height }; + world_init(&g_world, 1280, 720); float border = g_visuals.boid_edge_size; - for (int i = 0; i < 5000; i++) { + for (int i = 0; i < 50000; i++) { Boid boid; boid_rand_init(&g_world, &boid, border); g_world.boids.push_back(boid); } - rprof_init(); - - for (int i = 0; i < 5; i++) { + for (int i = 0; i < 1; i++) { world_update(&g_world, TIME_PER_FRAME); } + printf("arena: %ld (%.03fMiB)\n", g_world.frame_arena.offset, (float)g_world.frame_arena.offset / 1024 / 1024); + world_free(&g_world); + rprof_end(); rprof_output(NULL); @@ -660,7 +695,7 @@ int main() { return 0; } -int test_main() { +int foo_main() { SetTraceLogLevel(LOG_TRACE); int screen_width = 1280; @@ -670,23 +705,20 @@ int test_main() { raylib::Window window(screen_width, screen_height, "Boid Playground"); window.SetState(FLAG_VSYNC_HINT); - g_world.size = { (float)screen_width, (float)screen_height }; + GuiLoadStyleDefault(); + + rprof_init(); + + world_init(&g_world, screen_width, screen_height); float border = g_world.collision_avoidance_distance; - for (int i = 0; i < 3000; i++) { + for (int i = 0; i < 50000; i++) { Boid boid; boid_rand_init(&g_world, &boid, border); g_world.boids.push_back(boid); } - // g_world.boids.push_back({ .pos = { 100, 100 }, .dir = { 1, 0 }, .speed = 10 }); - // g_world.boids.push_back({ .pos = { 100, 500 }, .dir = { 1, 0 }, .speed = 10 }); - - GuiLoadStyleDefault(); - - rprof_init(); - -#ifdef PLATFORM_WEB +#ifdef __EMSCRIPTEN__ emscripten_set_main_loop(UpdateDrawFrame, 0, 1); #else SetTargetFPS(FRAMERATE); @@ -696,6 +728,7 @@ int test_main() { #endif window.Close(); + world_free(&g_world); rprof_end(); diff --git a/src/memory-arena.cpp b/src/memory-arena.cpp new file mode 100644 index 0000000..1470314 --- /dev/null +++ b/src/memory-arena.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include + +#include "memory-arena.hpp" + +static bool is_power_of_two(uintptr_t x) { + return (x & (x-1)) == 0; +} + +static uintptr_t align_forward(uintptr_t ptr, size_t align) { + assert(is_power_of_two(align)); + + // Same as (ptr % align) but faster as 'align' is a power of two + uintptr_t modulo = ptr & (align - 1); + + if (modulo != 0) { + // If 'ptr' address is not aligned, push the address to the + // next value which is aligned + return ptr + align - modulo; + } else { + return ptr; + } +} + +void arena_init(MemoryArena *arena, size_t size) { + arena->buffer = (uint8_t*)malloc(size); + arena->size = size; + arena->offset = 0; +} + +void arena_free(MemoryArena *arena) { + free(arena->buffer); + arena->buffer = NULL; + arena->size = 0; +} + +void* arena_malloc(MemoryArena *arena, size_t size, size_t align) { + uintptr_t curr_ptr = (uintptr_t)arena->buffer + arena->offset; + uintptr_t offset = align_forward(curr_ptr, align) - (uintptr_t)arena->buffer; + + if (offset + size >= arena->size) { + // TODO: grow arena + assert(false && "Arena ran out of space"); + return NULL; + } + + uint8_t *ptr = &arena->buffer[offset]; + arena->offset = offset + size; + + return ptr; +} + +void* arena_calloc(MemoryArena *arena, size_t size, size_t align) { + void* ptr = arena_malloc(arena, size, align); + if (ptr != NULL) { + memset(ptr, 0, size); + } + return ptr; +} + +void arena_clear(MemoryArena *arena) { + arena->offset = 0; +} diff --git a/src/memory-arena.hpp b/src/memory-arena.hpp new file mode 100644 index 0000000..af111c7 --- /dev/null +++ b/src/memory-arena.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +#ifndef ARENA_DEFAULT_ALIGNMENT + #define ARENA_DEFAULT_ALIGNMENT 2*(sizeof(void*)) +#endif + +struct MemoryArena { + uint8_t *buffer; + size_t size; + size_t offset; +}; + +void arena_init(MemoryArena *arena, size_t size); +void arena_free(MemoryArena *arena); +void* arena_malloc(MemoryArena *arena, size_t size, size_t align = ARENA_DEFAULT_ALIGNMENT); +void* arena_calloc(MemoryArena *arena, size_t size, size_t align = ARENA_DEFAULT_ALIGNMENT); +void arena_clear(MemoryArena *arena); diff --git a/src/rprof.h b/src/rprof.h index 5dc1712..365bc4e 100644 --- a/src/rprof.h +++ b/src/rprof.h @@ -80,7 +80,6 @@ void rprof_output(prof_sort_cmp_cb sort_cb); // ------------------------ CPU Timing ------------------------- #ifdef WIN32 - #include #include static uint64_t rprof_get_os_timer_hz(void) @@ -97,7 +96,6 @@ void rprof_output(prof_sort_cmp_cb sort_cb); return Value.QuadPart; } #else - #include #include static uint64_t rprof_get_os_timer_hz(void) @@ -114,10 +112,28 @@ void rprof_output(prof_sort_cmp_cb sort_cb); #endif // WIN32 -static uint64_t rprof_read_cpu_timer(void) -{ - return __rdtsc(); -} +#ifdef WIN32 + #include + + static uint64_t rprof_read_cpu_timer(void) + { + return __rdtsc(); + } +#elif __EMSCRIPTEN__ + + static uint64_t rprof_read_cpu_timer(void) + { + return rprof_read_os_timer(); + } +#else + #include + + static uint64_t rprof_read_cpu_timer(void) + { + return __rdtsc(); + } +#endif + static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms) { @@ -257,7 +273,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B) qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb); } - printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time); + printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time); uint32_t duration_max_width = 0; uint32_t percent_max_width = 0; @@ -301,7 +317,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B) uint64_t total_time = g_rprof.end_time - g_rprof.init_time; uint64_t cpu_hz = rprof_get_cpu_timer_hz(100); - printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time); + printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time); } #endif // RPROF_ONLY_TOTAL_TIME