update profiler to support sorting rows
This commit is contained in:
parent
973bfe4bd6
commit
4b7c184634
@ -197,7 +197,7 @@ int main(int argc, char **argv)
|
||||
printf("Diff of reference harvensine sum: %.16f\n", harvensine_sum - reference_harvensine_sum);
|
||||
}
|
||||
|
||||
prof_output();
|
||||
prof_output(prof_cmp_by_exclusive_duration);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -15,10 +15,10 @@
|
||||
|
||||
typedef struct {
|
||||
char *label;
|
||||
u32 calls;
|
||||
|
||||
u64 duration;
|
||||
u64 children_duration;
|
||||
u32 calls;
|
||||
u64 inclusive_duration;
|
||||
u64 exclusive_duration;
|
||||
} prof_slot;
|
||||
|
||||
typedef struct {
|
||||
@ -29,12 +29,15 @@ typedef struct {
|
||||
u64 end_time;
|
||||
|
||||
size_t slot_stack[PROFILER_MAX_STACK];
|
||||
u64 duration_stack[PROFILER_MAX_STACK];
|
||||
u64 timer_stack[PROFILER_MAX_STACK];
|
||||
u32 stack_size;
|
||||
|
||||
prof_slot slots[PROFILER_MAX_SLOTS];
|
||||
} prof;
|
||||
|
||||
typedef int prof_sort_cmp_cb(const prof_slot **A, const prof_slot **B);
|
||||
|
||||
static prof g_prof = { 0 };
|
||||
|
||||
void prof_init()
|
||||
@ -63,6 +66,7 @@ void prof_start(size_t slot_idx, char *label)
|
||||
slot->label = label;
|
||||
slot->calls++;
|
||||
|
||||
g_prof.duration_stack[g_prof.stack_size] = slot->inclusive_duration;
|
||||
g_prof.slot_stack[g_prof.stack_size] = slot_idx;
|
||||
g_prof.timer_stack[g_prof.stack_size] = read_cpu_timer();
|
||||
g_prof.stack_size++;
|
||||
@ -74,26 +78,55 @@ void prof_stop()
|
||||
g_prof.stack_size--;
|
||||
u64 start = g_prof.timer_stack[g_prof.stack_size];
|
||||
size_t slot_idx = g_prof.slot_stack[g_prof.stack_size];
|
||||
size_t inclusive_duration = g_prof.duration_stack[g_prof.stack_size];
|
||||
|
||||
u64 duration = (now - start);
|
||||
|
||||
g_prof.slots[slot_idx].duration += duration;
|
||||
|
||||
if (g_prof.stack_size > 0) {
|
||||
size_t parent_slot = g_prof.slot_stack[g_prof.stack_size-1];
|
||||
g_prof.slots[parent_slot].children_duration += duration;
|
||||
g_prof.slots[parent_slot].exclusive_duration -= duration;
|
||||
}
|
||||
|
||||
g_prof.slots[slot_idx].exclusive_duration += duration;
|
||||
g_prof.slots[slot_idx].inclusive_duration = inclusive_duration + duration;
|
||||
}
|
||||
|
||||
static int prof_cmp_u32(u32 A, u32 B)
|
||||
{
|
||||
if (A == B) {
|
||||
return 0;
|
||||
} else if (A < B) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void prof_output()
|
||||
int prof_cmp_by_calls(const prof_slot **A, const prof_slot **B)
|
||||
{
|
||||
return prof_cmp_u32((*A)->calls, (*B)->calls);
|
||||
}
|
||||
|
||||
int prof_cmp_by_exclusive_duration(const prof_slot **A, const prof_slot **B)
|
||||
{
|
||||
return prof_cmp_u32((*A)->exclusive_duration, (*B)->exclusive_duration);
|
||||
}
|
||||
|
||||
int prof_cmp_by_inclusive_duration(const prof_slot **A, const prof_slot **B)
|
||||
{
|
||||
return prof_cmp_u32((*A)->inclusive_duration, (*B)->inclusive_duration);
|
||||
}
|
||||
|
||||
void prof_output(prof_sort_cmp_cb sort_cb)
|
||||
{
|
||||
assert(g_prof.finished);
|
||||
|
||||
u64 total_time = g_prof.end_time - g_prof.init_time;
|
||||
u64 cpu_hz = get_cpu_timer_hz(100);
|
||||
|
||||
prof_slot *slots[PROFILER_MAX_SLOTS] = { 0 };
|
||||
prof_slot *slots[PROFILER_MAX_SLOTS+1] = { 0 };
|
||||
u32 slot_count = 0;
|
||||
u64 profiled_duration = 0;
|
||||
u32 label_width = 0;
|
||||
|
||||
for (int i = 0; i < PROFILER_MAX_SLOTS; i++) {
|
||||
@ -102,31 +135,54 @@ void prof_output()
|
||||
slots[slot_count] = slot;
|
||||
slot_count++;
|
||||
label_width = MAX(label_width, strlen(slot->label));
|
||||
profiled_duration += slot->exclusive_duration;
|
||||
}
|
||||
}
|
||||
|
||||
u64 other_duration = total_time - profiled_duration;
|
||||
prof_slot other_slot = {
|
||||
.label = "<other>",
|
||||
.calls = 1,
|
||||
.inclusive_duration = other_duration,
|
||||
.exclusive_duration = other_duration
|
||||
};
|
||||
|
||||
slots[slot_count++] = &other_slot;
|
||||
|
||||
if (sort_cb) {
|
||||
qsort(slots, slot_count, sizeof(prof_slot*), (void*)sort_cb);
|
||||
}
|
||||
|
||||
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
|
||||
|
||||
char line_format[128];
|
||||
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%9lu (%%6.3f%%%%, %%7.3f%%%% w/children) [%%d]\n", label_width);
|
||||
|
||||
u64 profiled_duration = 0;
|
||||
u32 percent_max_width = 0;
|
||||
char percent_column[PROFILER_MAX_SLOTS+1][128];
|
||||
for (int i = 0; i < slot_count; i++) {
|
||||
prof_slot *slot = slots[i];
|
||||
f32 percent = (f32)slot->duration*100/total_time;
|
||||
u64 exclusive_duration = slot->duration - slot->children_duration;
|
||||
f32 exclusive_percent = (f32)exclusive_duration*100/total_time;
|
||||
|
||||
profiled_duration += exclusive_duration;
|
||||
printf(line_format, slot->label, slot->duration, exclusive_percent, percent, slot->calls);
|
||||
f32 percent = (f32)slot->inclusive_duration*100/total_time;
|
||||
f32 exclusive_percent = (f32)slot->exclusive_duration*100/total_time;
|
||||
u32 length;
|
||||
if (slot->inclusive_duration == slot->exclusive_duration) {
|
||||
length = snprintf(percent_column[i], 128, "(%6.3f%%)", exclusive_percent);
|
||||
} else {
|
||||
length = snprintf(percent_column[i], 128, "(%6.3f%%, %6.3f%% w/children)", exclusive_percent, percent);
|
||||
}
|
||||
percent_max_width = MAX(percent_max_width, length);
|
||||
}
|
||||
|
||||
u64 other_duration = total_time - profiled_duration;
|
||||
f32 other_percent = (f32)other_duration*100/total_time;
|
||||
printf(line_format, "Other", other_duration, other_percent, other_percent, 1);
|
||||
char line_format[128];
|
||||
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%9lu %%-%ds [%%d]\n", label_width, percent_max_width);
|
||||
|
||||
for (int i = 0; i < slot_count; i++) {
|
||||
prof_slot *slot = slots[i];
|
||||
|
||||
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
|
||||
}
|
||||
}
|
||||
|
||||
#define PROF_START(label) prof_start(__COUNTER__, label)
|
||||
#define PROF_STOP() prof_stop()
|
||||
static_assert(__COUNTER__ < PROFILER_MAX_SLOTS, "__COUNTER__ reached max profiler slots");
|
||||
|
||||
#endif //PROFILER_H_
|
||||
|
Loading…
Reference in New Issue
Block a user