From 3c627cd9e1cf3e93722dd7d3ebc890fa6435279b Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Thu, 20 Jul 2023 23:42:01 +0300 Subject: [PATCH] add option to profiler for compiling out everything --- src/rprof.h | 199 ++++++++++++++++++++++++++-------------------------- 1 file changed, 99 insertions(+), 100 deletions(-) diff --git a/src/rprof.h b/src/rprof.h index 365bc4e..644770d 100644 --- a/src/rprof.h +++ b/src/rprof.h @@ -14,6 +14,10 @@ // When using `rprof_start()`, you need to specify into which slot the timing will be saved. // This defines how many slots you have available. // +// RPROF_STUB_OUT: +// Stub out `rprof_init()`, `rprof_end()`, `rprof_output()`, `rprof_start()`, `rprof_stop()` so they would +// empty. So it could be compiled out. +// // RPROF_ONLY_TOTAL_TIME: // Don't time block marked between `rprof_start()` and `rprof_end()`. // Useful for checking the overhead added by the profiler. @@ -34,6 +38,8 @@ #define RPROF_MAX_SLOTS 32 #endif +#define RPROF_ARRAY_LEN(x) (sizeof(x)/sizeof(x[0])) + typedef struct { const char *label; @@ -75,6 +81,7 @@ void rprof_output(prof_sort_cmp_cb sort_cb); #define RPROF_START(label) rprof_start(__COUNTER__, label) #define RPROF_STOP() rprof_stop() +#define RPROF_IMPLEMENTATION #ifdef RPROF_IMPLEMENTATION // ------------------------ CPU Timing ------------------------- @@ -158,20 +165,94 @@ static uint64_t rprof_get_cpu_timer_hz(uint64_t measure_time_ms) // ------------------------ Profiling ------------------------- -void rprof_init() -{ - assert(!g_rprof.started); - g_rprof.init_time = rprof_read_cpu_timer(); - g_rprof.started = true; -} +#ifndef RPROF_STUB_OUT + void rprof_init() + { + assert(!g_rprof.started); + g_rprof.init_time = rprof_read_cpu_timer(); + g_rprof.started = true; + } -void rprof_end() -{ - assert(!g_rprof.finished); - g_rprof.end_time = rprof_read_cpu_timer(); - g_rprof.finished = true; - g_rprof.started = false; -} + void rprof_end() + { + assert(!g_rprof.finished); + g_rprof.end_time = rprof_read_cpu_timer(); + g_rprof.finished = true; + g_rprof.started = false; + } + + void rprof_output(prof_sort_cmp_cb sort_cb) + { + assert(g_rprof.finished); + + uint64_t total_time = g_rprof.end_time - g_rprof.init_time; + uint64_t cpu_hz = rprof_get_cpu_timer_hz(100); + + rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 }; + uint32_t slot_count = 0; + uint64_t profiled_duration = 0; + uint32_t label_width = 0; + + for (int i = 0; i < RPROF_MAX_SLOTS; i++) { + rprof_slot *slot = &g_rprof.slots[i]; + if (slot->label) { + slots[slot_count] = slot; + slot_count++; + label_width = MAX(label_width, strlen(slot->label)); + profiled_duration += slot->exclusive_duration; + } + } + + uint64_t other_duration = total_time - profiled_duration; + rprof_slot other_slot = { + .label = "", + .calls = 1, + .inclusive_duration = other_duration, + .exclusive_duration = other_duration + }; + + if (slot_count > 0) { + slots[slot_count++] = &other_slot; + } + + if (sort_cb) { + qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb); + } + + printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time); + + uint32_t duration_max_width = 0; + uint32_t percent_max_width = 0; + char percent_column[RPROF_MAX_SLOTS+1][128]; + for (int i = 0; i < slot_count; i++) { + rprof_slot *slot = slots[i]; + + float percent = (float)slot->inclusive_duration*100/total_time; + float exclusive_percent = (float)slot->exclusive_duration*100/total_time; + uint32_t length; + if (slot->inclusive_duration == slot->exclusive_duration) { + length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent); + } else { + length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent); + } + percent_max_width = MAX(percent_max_width, length); + duration_max_width = MAX(duration_max_width, (int)log10(slot->inclusive_duration) + 1); + } + + char line_format[128]; + snprintf(line_format, RPROF_ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width); + + for (int i = 0; i < slot_count; i++) { + rprof_slot *slot = slots[i]; + + printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls); + } + } +#else + #define rprof_init() + #define rprof_end() + #define rprof_output(...) +#endif // RPROF_STUB_OUT static int rprof_cmp_u32(uint32_t A, uint32_t B) { @@ -199,10 +280,10 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B) return rprof_cmp_u32((*A)->inclusive_duration, (*B)->inclusive_duration); } -#ifndef RPROF_ONLY_TOTAL_TIME - - #define RPROF_ARRAY_LEN(x) (sizeof(x)/sizeof(x[0])) - +#if defined(RPROF_ONLY_TOTAL_TIME) || defined(RPROF_STUB_OUT) + #define rprof_start(...) + #define rprof_stop(...) +#else void rprof_start(size_t slot_idx, const char *label) { assert(slot_idx < RPROF_MAX_SLOTS); @@ -237,90 +318,8 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B) g_rprof.slots[slot_idx].inclusive_duration = inclusive_duration + duration; } - void rprof_output(prof_sort_cmp_cb sort_cb) - { - assert(g_rprof.finished); - - uint64_t total_time = g_rprof.end_time - g_rprof.init_time; - uint64_t cpu_hz = rprof_get_cpu_timer_hz(100); - - rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 }; - uint32_t slot_count = 0; - uint64_t profiled_duration = 0; - uint32_t label_width = 0; - - for (int i = 0; i < RPROF_MAX_SLOTS; i++) { - rprof_slot *slot = &g_rprof.slots[i]; - if (slot->label) { - slots[slot_count] = slot; - slot_count++; - label_width = MAX(label_width, strlen(slot->label)); - profiled_duration += slot->exclusive_duration; - } - } - - uint64_t other_duration = total_time - profiled_duration; - rprof_slot other_slot = { - .label = "", - .calls = 1, - .inclusive_duration = other_duration, - .exclusive_duration = other_duration - }; - - slots[slot_count++] = &other_slot; - - if (sort_cb) { - qsort(slots, slot_count, sizeof(rprof_slot*), (qsort_cmp*)sort_cb); - } - - printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time); - - uint32_t duration_max_width = 0; - uint32_t percent_max_width = 0; - char percent_column[RPROF_MAX_SLOTS+1][128]; - for (int i = 0; i < slot_count; i++) { - rprof_slot *slot = slots[i]; - - float percent = (float)slot->inclusive_duration*100/total_time; - float exclusive_percent = (float)slot->exclusive_duration*100/total_time; - uint32_t length; - if (slot->inclusive_duration == slot->exclusive_duration) { - length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent); - } else { - length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent); - } - percent_max_width = MAX(percent_max_width, length); - duration_max_width = MAX(duration_max_width, (int)log10(slot->inclusive_duration) + 1); - } - - char line_format[128]; - snprintf(line_format, RPROF_ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width); - - for (int i = 0; i < slot_count; i++) { - rprof_slot *slot = slots[i]; - - printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls); - } - } - static_assert(__COUNTER__ < RPROF_MAX_SLOTS, "__COUNTER__ reached max profiler slots"); - -#else - - #define rprof_start(...) - #define rprof_stop(...) - - void rprof_output(prof_sort_cmp_cb sort_cb) - { - assert(g_rprof.finished); - - uint64_t total_time = g_rprof.end_time - g_rprof.init_time; - uint64_t cpu_hz = rprof_get_cpu_timer_hz(100); - - printf("\nTotal time taken: %.3fms (%llu)\n", (float)total_time*1000/cpu_hz, total_time); - } - -#endif // RPROF_ONLY_TOTAL_TIME +#endif #endif // RPROF_IMPLEMENTATION