generated from rpuzonas/raylib-cpp-template
add option to profiler for compiling out everything
This commit is contained in:
parent
189be11d10
commit
3c627cd9e1
199
src/rprof.h
199
src/rprof.h
@ -14,6 +14,10 @@
|
|||||||
// When using `rprof_start()`, you need to specify into which slot the timing will be saved.
|
// When using `rprof_start()`, you need to specify into which slot the timing will be saved.
|
||||||
// This defines how many slots you have available.
|
// This defines how many slots you have available.
|
||||||
//
|
//
|
||||||
|
// RPROF_STUB_OUT:
|
||||||
|
// Stub out `rprof_init()`, `rprof_end()`, `rprof_output()`, `rprof_start()`, `rprof_stop()` so they would
|
||||||
|
// empty. So it could be compiled out.
|
||||||
|
//
|
||||||
// RPROF_ONLY_TOTAL_TIME:
|
// RPROF_ONLY_TOTAL_TIME:
|
||||||
// Don't time block marked between `rprof_start()` and `rprof_end()`.
|
// Don't time block marked between `rprof_start()` and `rprof_end()`.
|
||||||
// Useful for checking the overhead added by the profiler.
|
// Useful for checking the overhead added by the profiler.
|
||||||
@ -34,6 +38,8 @@
|
|||||||
#define RPROF_MAX_SLOTS 32
|
#define RPROF_MAX_SLOTS 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define RPROF_ARRAY_LEN(x) (sizeof(x)/sizeof(x[0]))
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *label;
|
const char *label;
|
||||||
|
|
||||||
@ -75,6 +81,7 @@ void rprof_output(prof_sort_cmp_cb sort_cb);
|
|||||||
#define RPROF_START(label) rprof_start(__COUNTER__, label)
|
#define RPROF_START(label) rprof_start(__COUNTER__, label)
|
||||||
#define RPROF_STOP() rprof_stop()
|
#define RPROF_STOP() rprof_stop()
|
||||||
|
|
||||||
|
#define RPROF_IMPLEMENTATION
|
||||||
#ifdef RPROF_IMPLEMENTATION
|
#ifdef RPROF_IMPLEMENTATION
|
||||||
|
|
||||||
// ------------------------ CPU Timing -------------------------
|
// ------------------------ CPU Timing -------------------------
|
||||||
@ -158,20 +165,94 @@ static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms)
|
|||||||
|
|
||||||
// ------------------------ Profiling -------------------------
|
// ------------------------ Profiling -------------------------
|
||||||
|
|
||||||
void rprof_init()
|
#ifndef RPROF_STUB_OUT
|
||||||
{
|
void rprof_init()
|
||||||
assert(!g_rprof.started);
|
{
|
||||||
g_rprof.init_time = rprof_read_cpu_timer();
|
assert(!g_rprof.started);
|
||||||
g_rprof.started = true;
|
g_rprof.init_time = rprof_read_cpu_timer();
|
||||||
}
|
g_rprof.started = true;
|
||||||
|
}
|
||||||
|
|
||||||
void rprof_end()
|
void rprof_end()
|
||||||
{
|
{
|
||||||
assert(!g_rprof.finished);
|
assert(!g_rprof.finished);
|
||||||
g_rprof.end_time = rprof_read_cpu_timer();
|
g_rprof.end_time = rprof_read_cpu_timer();
|
||||||
g_rprof.finished = true;
|
g_rprof.finished = true;
|
||||||
g_rprof.started = false;
|
g_rprof.started = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void rprof_output(prof_sort_cmp_cb sort_cb)
|
||||||
|
{
|
||||||
|
assert(g_rprof.finished);
|
||||||
|
|
||||||
|
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
|
||||||
|
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
|
||||||
|
|
||||||
|
rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 };
|
||||||
|
uint32_t slot_count = 0;
|
||||||
|
uint64_t profiled_duration = 0;
|
||||||
|
uint32_t label_width = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < RPROF_MAX_SLOTS; i++) {
|
||||||
|
rprof_slot *slot = &g_rprof.slots[i];
|
||||||
|
if (slot->label) {
|
||||||
|
slots[slot_count] = slot;
|
||||||
|
slot_count++;
|
||||||
|
label_width = MAX(label_width, strlen(slot->label));
|
||||||
|
profiled_duration += slot->exclusive_duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t other_duration = total_time - profiled_duration;
|
||||||
|
rprof_slot other_slot = {
|
||||||
|
.label = "<other>",
|
||||||
|
.calls = 1,
|
||||||
|
.inclusive_duration = other_duration,
|
||||||
|
.exclusive_duration = other_duration
|
||||||
|
};
|
||||||
|
|
||||||
|
if (slot_count > 0) {
|
||||||
|
slots[slot_count++] = &other_slot;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sort_cb) {
|
||||||
|
qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
|
||||||
|
|
||||||
|
uint32_t duration_max_width = 0;
|
||||||
|
uint32_t percent_max_width = 0;
|
||||||
|
char percent_column[RPROF_MAX_SLOTS+1][128];
|
||||||
|
for (int i = 0; i < slot_count; i++) {
|
||||||
|
rprof_slot *slot = slots[i];
|
||||||
|
|
||||||
|
float percent = (float)slot->inclusive_duration*100/total_time;
|
||||||
|
float exclusive_percent = (float)slot->exclusive_duration*100/total_time;
|
||||||
|
uint32_t length;
|
||||||
|
if (slot->inclusive_duration == slot->exclusive_duration) {
|
||||||
|
length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent);
|
||||||
|
} else {
|
||||||
|
length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent);
|
||||||
|
}
|
||||||
|
percent_max_width = MAX(percent_max_width, length);
|
||||||
|
duration_max_width = MAX(duration_max_width, (int)log10(slot->inclusive_duration) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
char line_format[128];
|
||||||
|
snprintf(line_format, RPROF_ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width);
|
||||||
|
|
||||||
|
for (int i = 0; i < slot_count; i++) {
|
||||||
|
rprof_slot *slot = slots[i];
|
||||||
|
|
||||||
|
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define rprof_init()
|
||||||
|
#define rprof_end()
|
||||||
|
#define rprof_output(...)
|
||||||
|
#endif // RPROF_STUB_OUT
|
||||||
|
|
||||||
static int rprof_cmp_u32(uint32_t A, uint32_t B)
|
static int rprof_cmp_u32(uint32_t A, uint32_t B)
|
||||||
{
|
{
|
||||||
@ -199,10 +280,10 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
|
|||||||
return rprof_cmp_u32((*A)->inclusive_duration, (*B)->inclusive_duration);
|
return rprof_cmp_u32((*A)->inclusive_duration, (*B)->inclusive_duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RPROF_ONLY_TOTAL_TIME
|
#if defined(RPROF_ONLY_TOTAL_TIME) || defined(RPROF_STUB_OUT)
|
||||||
|
#define rprof_start(...)
|
||||||
#define RPROF_ARRAY_LEN(x) (sizeof(x)/sizeof(x[0]))
|
#define rprof_stop(...)
|
||||||
|
#else
|
||||||
void rprof_start(size_t slot_idx, const char *label)
|
void rprof_start(size_t slot_idx, const char *label)
|
||||||
{
|
{
|
||||||
assert(slot_idx < RPROF_MAX_SLOTS);
|
assert(slot_idx < RPROF_MAX_SLOTS);
|
||||||
@ -237,90 +318,8 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
|
|||||||
g_rprof.slots[slot_idx].inclusive_duration = inclusive_duration + duration;
|
g_rprof.slots[slot_idx].inclusive_duration = inclusive_duration + duration;
|
||||||
}
|
}
|
||||||
|
|
||||||
void rprof_output(prof_sort_cmp_cb sort_cb)
|
|
||||||
{
|
|
||||||
assert(g_rprof.finished);
|
|
||||||
|
|
||||||
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
|
|
||||||
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
|
|
||||||
|
|
||||||
rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 };
|
|
||||||
uint32_t slot_count = 0;
|
|
||||||
uint64_t profiled_duration = 0;
|
|
||||||
uint32_t label_width = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < RPROF_MAX_SLOTS; i++) {
|
|
||||||
rprof_slot *slot = &g_rprof.slots[i];
|
|
||||||
if (slot->label) {
|
|
||||||
slots[slot_count] = slot;
|
|
||||||
slot_count++;
|
|
||||||
label_width = MAX(label_width, strlen(slot->label));
|
|
||||||
profiled_duration += slot->exclusive_duration;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t other_duration = total_time - profiled_duration;
|
|
||||||
rprof_slot other_slot = {
|
|
||||||
.label = "<other>",
|
|
||||||
.calls = 1,
|
|
||||||
.inclusive_duration = other_duration,
|
|
||||||
.exclusive_duration = other_duration
|
|
||||||
};
|
|
||||||
|
|
||||||
slots[slot_count++] = &other_slot;
|
|
||||||
|
|
||||||
if (sort_cb) {
|
|
||||||
qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time);
|
|
||||||
|
|
||||||
uint32_t duration_max_width = 0;
|
|
||||||
uint32_t percent_max_width = 0;
|
|
||||||
char percent_column[RPROF_MAX_SLOTS+1][128];
|
|
||||||
for (int i = 0; i < slot_count; i++) {
|
|
||||||
rprof_slot *slot = slots[i];
|
|
||||||
|
|
||||||
float percent = (float)slot->inclusive_duration*100/total_time;
|
|
||||||
float exclusive_percent = (float)slot->exclusive_duration*100/total_time;
|
|
||||||
uint32_t length;
|
|
||||||
if (slot->inclusive_duration == slot->exclusive_duration) {
|
|
||||||
length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent);
|
|
||||||
} else {
|
|
||||||
length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent);
|
|
||||||
}
|
|
||||||
percent_max_width = MAX(percent_max_width, length);
|
|
||||||
duration_max_width = MAX(duration_max_width, (int)log10(slot->inclusive_duration) + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
char line_format[128];
|
|
||||||
snprintf(line_format, RPROF_ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width);
|
|
||||||
|
|
||||||
for (int i = 0; i < slot_count; i++) {
|
|
||||||
rprof_slot *slot = slots[i];
|
|
||||||
|
|
||||||
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static_assert(__COUNTER__ < RPROF_MAX_SLOTS, "__COUNTER__ reached max profiler slots");
|
static_assert(__COUNTER__ < RPROF_MAX_SLOTS, "__COUNTER__ reached max profiler slots");
|
||||||
|
#endif
|
||||||
#else
|
|
||||||
|
|
||||||
#define rprof_start(...)
|
|
||||||
#define rprof_stop(...)
|
|
||||||
|
|
||||||
void rprof_output(prof_sort_cmp_cb sort_cb)
|
|
||||||
{
|
|
||||||
assert(g_rprof.finished);
|
|
||||||
|
|
||||||
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
|
|
||||||
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
|
|
||||||
|
|
||||||
printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // RPROF_ONLY_TOTAL_TIME
|
|
||||||
|
|
||||||
#endif // RPROF_IMPLEMENTATION
|
#endif // RPROF_IMPLEMENTATION
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user