generated from rpuzonas/raylib-cpp-template
Compare commits
2 Commits
2b6f8014d8
...
c6fb55cbe6
Author | SHA1 | Date | |
---|---|---|---|
c6fb55cbe6 | |||
ca52f12fda |
239
src/main.cpp
239
src/main.cpp
@ -11,6 +11,10 @@
|
||||
#define RAYGUI_IMPLEMENTATION
|
||||
#include "raygui.h"
|
||||
|
||||
#define RPROF_IMPLEMENTATION
|
||||
// #define RPROF_ONLY_TOTAL_TIME
|
||||
#include "rprof.h"
|
||||
|
||||
#include "boid-playground.hpp"
|
||||
#include "raycast.cpp"
|
||||
|
||||
@ -194,18 +198,140 @@ static int get_boids_in_view_cone(Boid **boids_in_view, Boid *boid, float view_r
|
||||
}
|
||||
|
||||
static void world_update(World *world, float dt) {
|
||||
for (int i = 0; i < world->boids.size(); i++) {
|
||||
Boid *boid = &world->boids[i];
|
||||
Vector2 acc = { 1, 0 };
|
||||
Boid *boids = world->boids.data();
|
||||
int boid_count = world->boids.size();
|
||||
|
||||
Boid *local_boids[world->boids.size()];
|
||||
int local_boids_count = get_boids_in_view_cone(local_boids, boid, world->view_radius, world->view_angle, world->boids.data(), world->boids.size());
|
||||
RPROF_START("Create groups");
|
||||
Boid **all_local_boids[boid_count];
|
||||
int all_local_boid_counts[boid_count];
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
all_local_boids[i] = (Boid **)malloc(boid_count * sizeof(Boid*));
|
||||
all_local_boid_counts[i] = 0;
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
// float chunk_size = world->size.x;
|
||||
float chunk_size = world->view_radius/2;
|
||||
int chunks_wide = std::ceil(world->size.x / chunk_size);
|
||||
int chunks_high = std::ceil(world->size.y / chunk_size);
|
||||
RPROF_START("Alloc chunks");
|
||||
int *chunks[chunks_high][chunks_wide];
|
||||
int chunk_boid_counts[chunks_high][chunks_wide];
|
||||
for (int y = 0; y < chunks_high; y++) {
|
||||
for (int x = 0; x < chunks_wide; x++) {
|
||||
chunks[y][x] = (int*)malloc(boid_count * sizeof(int));
|
||||
chunk_boid_counts[y][x] = 0;
|
||||
}
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
RPROF_START("Creating chunks");
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
Boid *boid = &boids[i];
|
||||
int chunk_x = boid->pos.x / chunk_size;
|
||||
int chunk_y = boid->pos.y / chunk_size;
|
||||
|
||||
int *boids_in_chunk = &chunk_boid_counts[chunk_y][chunk_x];
|
||||
chunks[chunk_y][chunk_x][*boids_in_chunk] = i;
|
||||
(*boids_in_chunk)++;
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
// Simplified from: float dot_threshold = Vector2DotProduct(dir, Vector2Rotate(dir, world->view_angle/2));
|
||||
float dot_threshold = cosf(world->view_angle/2);
|
||||
|
||||
RPROF_START("alloc checked_local_boids");
|
||||
bool *checked_local_boids[boid_count];
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
checked_local_boids[i] = (bool*)calloc(boid_count, sizeof(bool));
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
RPROF_START("Calc dot products and ranges (chunked)");
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
Boid *boid = &boids[i];
|
||||
int boid_chunk_x = boid->pos.x / chunk_size;
|
||||
int boid_chunk_y = boid->pos.y / chunk_size;
|
||||
|
||||
Vector2 chunk_offset[] = {
|
||||
{ -1, -1, }, { 0, -1, }, { 1, -1, },
|
||||
{ -1, 0, }, { 0, 0, }, { 1, 0, },
|
||||
{ -1, 1, }, { 0, 1, }, { 1, 1, },
|
||||
};
|
||||
for (int j = 0; j < 9; j++) {
|
||||
int chunk_x = boid_chunk_x + chunk_offset[j].x;
|
||||
int chunk_y = boid_chunk_y + chunk_offset[j].y;
|
||||
if (chunk_x < 0 || chunk_y < 0 || chunk_x >= chunks_wide || chunk_y >= chunks_high) continue;
|
||||
|
||||
int *chunk = chunks[chunk_y][chunk_x];
|
||||
int boids_in_chunk = chunk_boid_counts[chunk_y][chunk_x];
|
||||
for (int k = 0; k < boids_in_chunk; k++) {
|
||||
int other_boid = chunk[k];
|
||||
if (i == other_boid) continue;
|
||||
if (checked_local_boids[i][other_boid]) continue;
|
||||
if (checked_local_boids[other_boid][other_boid]) continue;
|
||||
|
||||
Vector2 offset = Vector2Subtract(boids[i].pos, boids[other_boid].pos);
|
||||
|
||||
bool with_in_range = Vector2LengthSqr(offset) <= (world->view_radius * world->view_radius);
|
||||
if (with_in_range) {
|
||||
float dot = Vector2DotProduct(boids[i].dir, Vector2Normalize(offset));
|
||||
|
||||
if (-dot >= dot_threshold) {
|
||||
int *count = &all_local_boid_counts[i];
|
||||
all_local_boids[i][*count] = &boids[other_boid];
|
||||
(*count)++;
|
||||
}
|
||||
if (dot >= dot_threshold) {
|
||||
int *count = &all_local_boid_counts[other_boid];
|
||||
all_local_boids[other_boid][*count] = &boids[i];
|
||||
(*count)++;
|
||||
}
|
||||
}
|
||||
|
||||
checked_local_boids[i][other_boid] = true;
|
||||
checked_local_boids[other_boid][i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
RPROF_START("Free chunks");
|
||||
for (int y = 0; y < chunks_high; y++) {
|
||||
for (int x = 0; x < chunks_wide; x++) {
|
||||
free(chunks[y][x]);
|
||||
}
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
RPROF_START("free checked_local_boids");
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
free(checked_local_boids[i]);
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
RPROF_START("Apply forces");
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
Boid *boid = &world->boids[i];
|
||||
Vector2 acc = { 0, 0 };
|
||||
|
||||
// Boid **local_boids = NULL;
|
||||
// int local_boids_count = 0;
|
||||
|
||||
Boid **local_boids = all_local_boids[i];
|
||||
int local_boids_count = all_local_boid_counts[i];
|
||||
|
||||
// Boid *local_boids[world->boids.size()];
|
||||
// int local_boids_count = get_boids_in_view_cone(local_boids, boid, world->view_radius, world->view_angle, world->boids.data(), world->boids.size());
|
||||
|
||||
if (local_boids_count > 0) {
|
||||
// LogTrace("i:%d", i);
|
||||
// LogTrace("%d", local_boids_count);
|
||||
Vector2 separation_force = { 0, 0 };
|
||||
Vector2 flock_center = { 0, 0 };
|
||||
Vector2 flock_heading = { 0, 0 };
|
||||
for (int j = 0; j < local_boids_count; j++) {
|
||||
// LogTrace("%d", local_boids);
|
||||
flock_heading = Vector2Add(flock_heading, local_boids[j]->dir);
|
||||
flock_center = Vector2Add(flock_center , local_boids[j]->pos);
|
||||
|
||||
@ -271,6 +397,13 @@ static void world_update(World *world, float dt) {
|
||||
}
|
||||
}
|
||||
}
|
||||
RPROF_STOP();
|
||||
|
||||
RPROF_START("Free groups");
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
free(all_local_boids[i]);
|
||||
}
|
||||
RPROF_STOP();
|
||||
}
|
||||
|
||||
static void world_draw(World *world, Visuals *visuals) {
|
||||
@ -398,6 +531,7 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
|
||||
{
|
||||
VerticalLayout layout = { .x = 230, .y = 65, .gap = 8 };
|
||||
|
||||
GuiCheckBox(next_in_layout(&layout, 15, 15), "Looping walls", &world->looping_walls);
|
||||
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "Separation radius", &world->separation_radius, 10, 200);
|
||||
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View radius", &world->view_radius, 10, 400);
|
||||
GuiSlider(next_in_layout(&layout, 100, 15), NULL, "View angle", &world->view_angle, 0, 2*PI);
|
||||
@ -419,11 +553,15 @@ static void ui_draw(World *world, Visuals *visuals, UI *ui) {
|
||||
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Separation strength", &world->separation_strength, 0, 100, &ui->separation_strength_edit);
|
||||
gui_valuebox_float(next_in_layout(&layout, 50, 15), "Collision avoidance strength", &world->collision_avoidance_strength, 0, 100, &ui->collision_avoidance_strength_edit);
|
||||
}
|
||||
|
||||
float window_width = GetScreenWidth();
|
||||
DrawFPS(window_width - 90, 10);
|
||||
}
|
||||
|
||||
void UpdateDrawFrame() {
|
||||
float dt = GetFrameTime();
|
||||
|
||||
RPROF_START("Update");
|
||||
#ifdef PLATFORM_WEB
|
||||
// If user goes to another tab and comes back, the time that the user was gone needs to be ignored.
|
||||
// So boids wouldn't tunnel through walls and do other shenanigans.
|
||||
@ -433,18 +571,96 @@ void UpdateDrawFrame() {
|
||||
#else
|
||||
world_update(&g_world, dt);
|
||||
#endif
|
||||
RPROF_STOP();
|
||||
|
||||
// Draw
|
||||
BeginDrawing();
|
||||
RPROF_START("Draw");
|
||||
ClearBackground(g_visuals.bg_color);
|
||||
|
||||
world_draw(&g_world, &g_visuals);
|
||||
ui_draw(&g_world, &g_visuals, &g_ui);
|
||||
RPROF_STOP();
|
||||
|
||||
EndDrawing();
|
||||
}
|
||||
|
||||
int estimate_maximum_boid_count(World *world) {
|
||||
int boid_count = 1000;
|
||||
int prev_boid_count = 0;
|
||||
|
||||
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
|
||||
|
||||
do {
|
||||
world->boids.clear();
|
||||
for (int i = 0; i < boid_count; i++) {
|
||||
Boid boid;
|
||||
boid_rand_init(&g_world, &boid, 0);
|
||||
g_world.boids.push_back(boid);
|
||||
}
|
||||
|
||||
uint64_t best_duration = UINT64_MAX;
|
||||
for (int i = 0; i < 10; i++) {
|
||||
uint64_t start = rprof_read_cpu_timer();
|
||||
world_update(&g_world, TIME_PER_FRAME);
|
||||
uint64_t end = rprof_read_cpu_timer();
|
||||
best_duration = std::min(best_duration, (end - start));
|
||||
}
|
||||
|
||||
float duration_secs = (float)best_duration/cpu_hz;
|
||||
// printf("duration: %f, err:%f", duration_secs, TIME_PER_FRAME - duration_secs);
|
||||
|
||||
int new_boid_count;
|
||||
float diff = TIME_PER_FRAME - duration_secs;
|
||||
if (diff < 0) {
|
||||
if (prev_boid_count < boid_count) {
|
||||
new_boid_count = (prev_boid_count + boid_count)/2;
|
||||
} else {
|
||||
new_boid_count = boid_count/2;
|
||||
}
|
||||
} else if (diff > 0) {
|
||||
if (prev_boid_count > boid_count) {
|
||||
new_boid_count = (prev_boid_count + boid_count)/2;
|
||||
} else {
|
||||
new_boid_count = boid_count*2;
|
||||
}
|
||||
}
|
||||
|
||||
prev_boid_count = boid_count;
|
||||
boid_count = new_boid_count;
|
||||
} while (std::abs(boid_count - prev_boid_count) > 10);
|
||||
|
||||
return boid_count;
|
||||
}
|
||||
|
||||
int main() {
|
||||
// printf("%d\n", estimate_maximum_boid_count(&g_world));
|
||||
|
||||
int screen_width = 1280;
|
||||
int screen_height = 720;
|
||||
g_world.size = { (float)screen_width, (float)screen_height };
|
||||
|
||||
float border = g_visuals.boid_edge_size;
|
||||
for (int i = 0; i < 5000; i++) {
|
||||
Boid boid;
|
||||
boid_rand_init(&g_world, &boid, border);
|
||||
g_world.boids.push_back(boid);
|
||||
}
|
||||
|
||||
rprof_init();
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
world_update(&g_world, TIME_PER_FRAME);
|
||||
}
|
||||
|
||||
rprof_end();
|
||||
|
||||
rprof_output(NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_main() {
|
||||
SetTraceLogLevel(LOG_TRACE);
|
||||
|
||||
int screen_width = 1280;
|
||||
@ -456,15 +672,20 @@ int main() {
|
||||
|
||||
g_world.size = { (float)screen_width, (float)screen_height };
|
||||
|
||||
float border = g_visuals.boid_edge_size;
|
||||
for (int i = 0; i < 50; i++) {
|
||||
float border = g_world.collision_avoidance_distance;
|
||||
for (int i = 0; i < 3000; i++) {
|
||||
Boid boid;
|
||||
boid_rand_init(&g_world, &boid, border);
|
||||
g_world.boids.push_back(boid);
|
||||
}
|
||||
|
||||
// g_world.boids.push_back({ .pos = { 100, 100 }, .dir = { 1, 0 }, .speed = 10 });
|
||||
// g_world.boids.push_back({ .pos = { 100, 500 }, .dir = { 1, 0 }, .speed = 10 });
|
||||
|
||||
GuiLoadStyleDefault();
|
||||
|
||||
rprof_init();
|
||||
|
||||
#ifdef PLATFORM_WEB
|
||||
emscripten_set_main_loop(UpdateDrawFrame, 0, 1);
|
||||
#else
|
||||
@ -476,5 +697,9 @@ int main() {
|
||||
|
||||
window.Close();
|
||||
|
||||
rprof_end();
|
||||
|
||||
rprof_output(NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
311
src/rprof.h
Normal file
311
src/rprof.h
Normal file
@ -0,0 +1,311 @@
|
||||
#ifndef RPROF_H
|
||||
#define RPROF_H
|
||||
|
||||
// TODO: Maybe remove `assert()`, to lower overhead? Put them behind a macro?
|
||||
|
||||
// Available defines for configuration:
|
||||
// RPROF_IMPLEMENTATION:
|
||||
// Enable implementation of library
|
||||
//
|
||||
// RPROF_MAX_STACK (default 128):
|
||||
// To record nested blocks rprof uses a stack, this defines the maximum size of that stack
|
||||
//
|
||||
// RPROF_MAX_SLOTS (default 32):
|
||||
// When using `rprof_start()`, you need to specify into which slot the timing will be saved.
|
||||
// This defines how many slots you have available.
|
||||
//
|
||||
// RPROF_ONLY_TOTAL_TIME:
|
||||
// Don't time block marked between `rprof_start()` and `rprof_end()`.
|
||||
// Useful for checking the overhead added by the profiler.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#ifndef RPROF_MAX_STACK
|
||||
#define RPROF_MAX_STACK 128
|
||||
#endif
|
||||
|
||||
#ifndef RPROF_MAX_SLOTS
|
||||
#define RPROF_MAX_SLOTS 32
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
const char *label;
|
||||
|
||||
uint32_t calls;
|
||||
uint64_t inclusive_duration;
|
||||
uint64_t exclusive_duration;
|
||||
} rprof_slot;
|
||||
|
||||
typedef struct {
|
||||
bool started;
|
||||
bool finished;
|
||||
|
||||
uint64_t init_time;
|
||||
uint64_t end_time;
|
||||
|
||||
uint32_t stack_size;
|
||||
size_t slot_stack[RPROF_MAX_STACK];
|
||||
uint64_t duration_stack[RPROF_MAX_STACK];
|
||||
uint64_t timer_stack[RPROF_MAX_STACK];
|
||||
|
||||
rprof_slot slots[RPROF_MAX_SLOTS];
|
||||
} rprof;
|
||||
|
||||
typedef int prof_sort_cmp_cb(const rprof_slot **A, const rprof_slot **B);
|
||||
typedef int qsort_cmp(const void*,const void*);
|
||||
|
||||
static rprof g_rprof = { 0 };
|
||||
|
||||
void rprof_init();
|
||||
void rprof_end();
|
||||
void rprof_start(size_t slot_idx, const char *label);
|
||||
void rprof_stop();
|
||||
|
||||
int rprof_cmp_by_calls(const rprof_slot **A, const rprof_slot **B);
|
||||
int rprof_cmp_by_exclusive_duration(const rprof_slot **A, const rprof_slot **B);
|
||||
int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B);
|
||||
void rprof_output(prof_sort_cmp_cb sort_cb);
|
||||
|
||||
#define RPROF_START(label) rprof_start(__COUNTER__, label)
|
||||
#define RPROF_STOP() rprof_stop()
|
||||
|
||||
#ifdef RPROF_IMPLEMENTATION
|
||||
|
||||
// ------------------------ CPU Timing -------------------------
|
||||
|
||||
#ifdef WIN32
|
||||
#include <intrin.h>
|
||||
#include <windows.h>
|
||||
|
||||
static uint64_t rprof_get_os_timer_hz(void)
|
||||
{
|
||||
LARGE_INTEGER Freq;
|
||||
QueryPerformanceFrequency(&Freq);
|
||||
return Freq.QuadPart;
|
||||
}
|
||||
|
||||
static uint64_t rprof_read_os_timer(void)
|
||||
{
|
||||
LARGE_INTEGER Value;
|
||||
QueryPerformanceCounter(&Value);
|
||||
return Value.QuadPart;
|
||||
}
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#include <time.h>
|
||||
|
||||
static uint64_t rprof_get_os_timer_hz(void)
|
||||
{
|
||||
return 1000000000;
|
||||
}
|
||||
|
||||
static uint64_t rprof_read_os_timer(void)
|
||||
{
|
||||
struct timespec time;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &time);
|
||||
return rprof_get_os_timer_hz()*time.tv_sec + time.tv_nsec;
|
||||
}
|
||||
#endif // WIN32
|
||||
|
||||
|
||||
static uint64_t rprof_read_cpu_timer(void)
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
|
||||
static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms)
|
||||
{
|
||||
uint64_t os_freq = rprof_get_os_timer_hz();
|
||||
uint64_t os_start = rprof_read_os_timer();
|
||||
uint64_t os_elapsed = 0;
|
||||
|
||||
uint64_t cpu_start = rprof_read_cpu_timer();
|
||||
|
||||
uint64_t wait_duration = os_freq * measure_time_ms / 1000;
|
||||
while (os_elapsed < wait_duration) {
|
||||
os_elapsed = rprof_read_os_timer() - os_start;
|
||||
}
|
||||
|
||||
uint64_t cpu_elapsed = rprof_read_cpu_timer() - cpu_start;
|
||||
if (os_elapsed) {
|
||||
return os_freq * cpu_elapsed / os_elapsed;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------ Profiling -------------------------
|
||||
|
||||
void rprof_init()
|
||||
{
|
||||
assert(!g_rprof.started);
|
||||
g_rprof.init_time = rprof_read_cpu_timer();
|
||||
g_rprof.started = true;
|
||||
}
|
||||
|
||||
void rprof_end()
|
||||
{
|
||||
assert(!g_rprof.finished);
|
||||
g_rprof.end_time = rprof_read_cpu_timer();
|
||||
g_rprof.finished = true;
|
||||
g_rprof.started = false;
|
||||
}
|
||||
|
||||
static int rprof_cmp_u32(uint32_t A, uint32_t B)
|
||||
{
|
||||
if (A == B) {
|
||||
return 0;
|
||||
} else if (A < B) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int rprof_cmp_by_calls(const rprof_slot **A, const rprof_slot **B)
|
||||
{
|
||||
return rprof_cmp_u32((*A)->calls, (*B)->calls);
|
||||
}
|
||||
|
||||
int rprof_cmp_by_exclusive_duration(const rprof_slot **A, const rprof_slot **B)
|
||||
{
|
||||
return rprof_cmp_u32((*A)->exclusive_duration, (*B)->exclusive_duration);
|
||||
}
|
||||
|
||||
int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
|
||||
{
|
||||
return rprof_cmp_u32((*A)->inclusive_duration, (*B)->inclusive_duration);
|
||||
}
|
||||
|
||||
#ifndef RPROF_ONLY_TOTAL_TIME
|
||||
|
||||
#define RPROF_ARRAY_LEN(x) (sizeof(x)/sizeof(x[0]))
|
||||
|
||||
void rprof_start(size_t slot_idx, const char *label)
|
||||
{
|
||||
assert(slot_idx < RPROF_MAX_SLOTS);
|
||||
assert(g_rprof.stack_size < RPROF_MAX_STACK-1);
|
||||
|
||||
rprof_slot *slot = &g_rprof.slots[slot_idx];
|
||||
slot->label = label;
|
||||
slot->calls++;
|
||||
|
||||
g_rprof.duration_stack[g_rprof.stack_size] = slot->inclusive_duration;
|
||||
g_rprof.slot_stack[g_rprof.stack_size] = slot_idx;
|
||||
g_rprof.timer_stack[g_rprof.stack_size] = rprof_read_cpu_timer();
|
||||
g_rprof.stack_size++;
|
||||
}
|
||||
|
||||
void rprof_stop()
|
||||
{
|
||||
uint64_t now = rprof_read_cpu_timer();
|
||||
g_rprof.stack_size--;
|
||||
uint64_t start = g_rprof.timer_stack[g_rprof.stack_size];
|
||||
size_t slot_idx = g_rprof.slot_stack[g_rprof.stack_size];
|
||||
size_t inclusive_duration = g_rprof.duration_stack[g_rprof.stack_size];
|
||||
|
||||
uint64_t duration = (now - start);
|
||||
|
||||
if (g_rprof.stack_size > 0) {
|
||||
size_t parent_slot = g_rprof.slot_stack[g_rprof.stack_size-1];
|
||||
g_rprof.slots[parent_slot].exclusive_duration -= duration;
|
||||
}
|
||||
|
||||
g_rprof.slots[slot_idx].exclusive_duration += duration;
|
||||
g_rprof.slots[slot_idx].inclusive_duration = inclusive_duration + duration;
|
||||
}
|
||||
|
||||
void rprof_output(prof_sort_cmp_cb sort_cb)
|
||||
{
|
||||
assert(g_rprof.finished);
|
||||
|
||||
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
|
||||
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
|
||||
|
||||
rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 };
|
||||
uint32_t slot_count = 0;
|
||||
uint64_t profiled_duration = 0;
|
||||
uint32_t label_width = 0;
|
||||
|
||||
for (int i = 0; i < RPROF_MAX_SLOTS; i++) {
|
||||
rprof_slot *slot = &g_rprof.slots[i];
|
||||
if (slot->label) {
|
||||
slots[slot_count] = slot;
|
||||
slot_count++;
|
||||
label_width = MAX(label_width, strlen(slot->label));
|
||||
profiled_duration += slot->exclusive_duration;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t other_duration = total_time - profiled_duration;
|
||||
rprof_slot other_slot = {
|
||||
.label = "<other>",
|
||||
.calls = 1,
|
||||
.inclusive_duration = other_duration,
|
||||
.exclusive_duration = other_duration
|
||||
};
|
||||
|
||||
slots[slot_count++] = &other_slot;
|
||||
|
||||
if (sort_cb) {
|
||||
qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb);
|
||||
}
|
||||
|
||||
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
|
||||
|
||||
uint32_t duration_max_width = 0;
|
||||
uint32_t percent_max_width = 0;
|
||||
char percent_column[RPROF_MAX_SLOTS+1][128];
|
||||
for (int i = 0; i < slot_count; i++) {
|
||||
rprof_slot *slot = slots[i];
|
||||
|
||||
float percent = (float)slot->inclusive_duration*100/total_time;
|
||||
float exclusive_percent = (float)slot->exclusive_duration*100/total_time;
|
||||
uint32_t length;
|
||||
if (slot->inclusive_duration == slot->exclusive_duration) {
|
||||
length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent);
|
||||
} else {
|
||||
length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent);
|
||||
}
|
||||
percent_max_width = MAX(percent_max_width, length);
|
||||
duration_max_width = MAX(duration_max_width, (int)log10(slot->inclusive_duration) + 1);
|
||||
}
|
||||
|
||||
char line_format[128];
|
||||
snprintf(line_format, RPROF_ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width);
|
||||
|
||||
for (int i = 0; i < slot_count; i++) {
|
||||
rprof_slot *slot = slots[i];
|
||||
|
||||
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
|
||||
}
|
||||
}
|
||||
|
||||
static_assert(__COUNTER__ < RPROF_MAX_SLOTS, "__COUNTER__ reached max profiler slots");
|
||||
|
||||
#else
|
||||
|
||||
#define rprof_start(...)
|
||||
#define rprof_stop(...)
|
||||
|
||||
void rprof_output(prof_sort_cmp_cb sort_cb)
|
||||
{
|
||||
assert(g_rprof.finished);
|
||||
|
||||
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
|
||||
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
|
||||
|
||||
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
|
||||
}
|
||||
|
||||
#endif // RPROF_ONLY_TOTAL_TIME
|
||||
|
||||
#endif // RPROF_IMPLEMENTATION
|
||||
|
||||
#endif //RPROF_H
|
Loading…
Reference in New Issue
Block a user