1
0

update profiler to support sorting rows

This commit is contained in:
Rokas Puzonas 2023-07-12 20:07:04 +03:00
parent 973bfe4bd6
commit 4b7c184634
2 changed files with 79 additions and 23 deletions

View File

@ -197,7 +197,7 @@ int main(int argc, char **argv)
printf("Diff of reference harvensine sum: %.16f\n", harvensine_sum - reference_harvensine_sum);
}
prof_output();
prof_output(prof_cmp_by_exclusive_duration);
return 0;
}

View File

@ -15,10 +15,10 @@
typedef struct {
char *label;
u32 calls;
u64 duration;
u64 children_duration;
u32 calls;
u64 inclusive_duration;
u64 exclusive_duration;
} prof_slot;
typedef struct {
@ -29,12 +29,15 @@ typedef struct {
u64 end_time;
size_t slot_stack[PROFILER_MAX_STACK];
u64 duration_stack[PROFILER_MAX_STACK];
u64 timer_stack[PROFILER_MAX_STACK];
u32 stack_size;
prof_slot slots[PROFILER_MAX_SLOTS];
} prof;
typedef int prof_sort_cmp_cb(const prof_slot **A, const prof_slot **B);
static prof g_prof = { 0 };
void prof_init()
@ -63,8 +66,9 @@ void prof_start(size_t slot_idx, char *label)
slot->label = label;
slot->calls++;
g_prof.slot_stack[g_prof.stack_size] = slot_idx;
g_prof.timer_stack[g_prof.stack_size] = read_cpu_timer();
g_prof.duration_stack[g_prof.stack_size] = slot->inclusive_duration;
g_prof.slot_stack[g_prof.stack_size] = slot_idx;
g_prof.timer_stack[g_prof.stack_size] = read_cpu_timer();
g_prof.stack_size++;
}
@ -74,26 +78,55 @@ void prof_stop()
g_prof.stack_size--;
u64 start = g_prof.timer_stack[g_prof.stack_size];
size_t slot_idx = g_prof.slot_stack[g_prof.stack_size];
size_t inclusive_duration = g_prof.duration_stack[g_prof.stack_size];
u64 duration = (now - start);
g_prof.slots[slot_idx].duration += duration;
if (g_prof.stack_size > 0) {
size_t parent_slot = g_prof.slot_stack[g_prof.stack_size-1];
g_prof.slots[parent_slot].children_duration += duration;
g_prof.slots[parent_slot].exclusive_duration -= duration;
}
g_prof.slots[slot_idx].exclusive_duration += duration;
g_prof.slots[slot_idx].inclusive_duration = inclusive_duration + duration;
}
static int prof_cmp_u32(u32 A, u32 B)
{
if (A == B) {
return 0;
} else if (A < B) {
return 1;
} else {
return -1;
}
}
void prof_output()
int prof_cmp_by_calls(const prof_slot **A, const prof_slot **B)
{
return prof_cmp_u32((*A)->calls, (*B)->calls);
}
int prof_cmp_by_exclusive_duration(const prof_slot **A, const prof_slot **B)
{
return prof_cmp_u32((*A)->exclusive_duration, (*B)->exclusive_duration);
}
int prof_cmp_by_inclusive_duration(const prof_slot **A, const prof_slot **B)
{
return prof_cmp_u32((*A)->inclusive_duration, (*B)->inclusive_duration);
}
void prof_output(prof_sort_cmp_cb sort_cb)
{
assert(g_prof.finished);
u64 total_time = g_prof.end_time - g_prof.init_time;
u64 cpu_hz = get_cpu_timer_hz(100);
prof_slot *slots[PROFILER_MAX_SLOTS] = { 0 };
prof_slot *slots[PROFILER_MAX_SLOTS+1] = { 0 };
u32 slot_count = 0;
u64 profiled_duration = 0;
u32 label_width = 0;
for (int i = 0; i < PROFILER_MAX_SLOTS; i++) {
@ -102,31 +135,54 @@ void prof_output()
slots[slot_count] = slot;
slot_count++;
label_width = MAX(label_width, strlen(slot->label));
profiled_duration += slot->exclusive_duration;
}
}
u64 other_duration = total_time - profiled_duration;
prof_slot other_slot = {
.label = "<other>",
.calls = 1,
.inclusive_duration = other_duration,
.exclusive_duration = other_duration
};
slots[slot_count++] = &other_slot;
if (sort_cb) {
qsort(slots, slot_count, sizeof(prof_slot*), (void*)sort_cb);
}
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
char line_format[128];
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%9lu (%%6.3f%%%%, %%7.3f%%%% w/children) [%%d]\n", label_width);
u64 profiled_duration = 0;
u32 percent_max_width = 0;
char percent_column[PROFILER_MAX_SLOTS+1][128];
for (int i = 0; i < slot_count; i++) {
prof_slot *slot = slots[i];
f32 percent = (f32)slot->duration*100/total_time;
u64 exclusive_duration = slot->duration - slot->children_duration;
f32 exclusive_percent = (f32)exclusive_duration*100/total_time;
profiled_duration += exclusive_duration;
printf(line_format, slot->label, slot->duration, exclusive_percent, percent, slot->calls);
f32 percent = (f32)slot->inclusive_duration*100/total_time;
f32 exclusive_percent = (f32)slot->exclusive_duration*100/total_time;
u32 length;
if (slot->inclusive_duration == slot->exclusive_duration) {
length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent);
} else {
length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent);
}
percent_max_width = MAX(percent_max_width, length);
}
u64 other_duration = total_time - profiled_duration;
f32 other_percent = (f32)other_duration*100/total_time;
printf(line_format, "Other", other_duration, other_percent, other_percent, 1);
char line_format[128];
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%9lu %%-%ds [%%d]\n", label_width, percent_max_width);
for (int i = 0; i < slot_count; i++) {
prof_slot *slot = slots[i];
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
}
}
#define PROF_START(label) prof_start(__COUNTER__, label)
#define PROF_STOP() prof_stop()
static_assert(__COUNTER__ < PROFILER_MAX_SLOTS, "__COUNTER__ reached max profiler slots");
#endif //PROFILER_H_