diff --git a/Makefile b/Makefile index 930014e..1109089 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ build/gen_data: src/gen_data.c mkdir -p build gcc -o build/gen_data src/gen_data.c $(CFLAGS) -build/main: src/main.c src/json_parser.c +build/main: src/main.c src/json_parser.c src/profiler.c mkdir -p build gcc -o build/main src/main.c $(CFLAGS) diff --git a/src/harvensine_compute.h b/src/harvensine_compute.h index 1480376..cde0544 100644 --- a/src/harvensine_compute.h +++ b/src/harvensine_compute.h @@ -3,6 +3,8 @@ #define ARRAY_LEN(x) (sizeof(x)/sizeof(x[0])) typedef double f64; +typedef float f32; +typedef uint8_t u8; typedef uint32_t u32; typedef uint64_t u64; diff --git a/src/json_parser.c b/src/json_parser.c index a78c619..d8eaf4b 100644 --- a/src/json_parser.c +++ b/src/json_parser.c @@ -8,7 +8,7 @@ #include "harvensine_compute.h" #include "json_parser.h" -typedef uint8_t u8; +#include "profiler.c" void free_json_object(struct json_object *object) { @@ -76,12 +76,15 @@ static bool is_whitespace(char c) static size_t skip_ws(char **data, size_t *data_size) { + PROF_START("skip_ws()"); size_t ws_count = 0; while (data_size > 0) { if (!is_whitespace(**data)) break; inc_cursor(data, data_size, 1); ws_count++; } + PROF_STOP(); + return ws_count; } @@ -148,6 +151,7 @@ static bool expect_number_char(char *data, size_t data_size) int parse_json_value(struct json_value *result, char *data, size_t data_size) { + PROF_START("parse_json_value()"); size_t bytes_parsed = 0; bytes_parsed += skip_ws(&data, &data_size); @@ -173,6 +177,8 @@ int parse_json_value(struct json_value *result, char *data, size_t data_size) assert(false && "todo"); } + PROF_STOP(); + return bytes_parsed; } @@ -268,6 +274,7 @@ int parse_json_array(struct json_array *result, char *data, size_t data_size) int parse_json_number(f64 *result, char *data, size_t data_size) { + PROF_START("parse_json_number()"); int bytes_parsed = 0; for (int i = 0; i < data_size; i++) { if (!is_number_char(data[i])) break; @@ -278,11 +285,13 @@ int parse_json_number(f64 *result, char *data, size_t data_size) // Use alternative parsing, so `data_size` can be enforced *result = strtod(data, NULL); + PROF_STOP(); return bytes_parsed; } int parse_json_string(struct json_string *result, char *data, size_t data_size) { + PROF_START("parse_json_string()"); assert(expect_char(data, data_size, '"')); int json_string_size = get_json_string_size(data, data_size); @@ -326,6 +335,7 @@ int parse_json_string(struct json_string *result, char *data, size_t data_size) result->str = realloc(result->str, result->size*sizeof(char)); + PROF_STOP(); return json_string_size; } diff --git a/src/main.c b/src/main.c index 95c2dc6..98dbb31 100644 --- a/src/main.c +++ b/src/main.c @@ -5,8 +5,10 @@ #include #include + #include "harvensine_compute.h" #include "timer.c" +#include "profiler.c" #include "json_parser.c" #include "harvensine_formula.c" @@ -89,7 +91,7 @@ static void free_point_pairs(struct point_pairs *pairs) int main(int argc, char **argv) { - struct timer total_timer = START_TIMER("Total time"); + prof_init(); if (argc <= 1) { print_usage(argv[0]); @@ -104,7 +106,7 @@ int main(int argc, char **argv) return -1; } - struct timer read_files_timer = START_TIMER("Read files"); + PROF_START("Read files"); size_t json_size = get_file_size(f); char json_data[json_size]; size_t bytes_read = fread(json_data, 1, json_size, f); @@ -136,18 +138,18 @@ int main(int argc, char **argv) fclose(f); } - END_TIMER(read_files_timer); + PROF_STOP(); // Step 1. Read json file - struct timer json_parse_timer = START_TIMER("Parse JSON"); + PROF_START("Parse JSON"); struct json_value *parsed = NULL; { parsed = malloc(sizeof(struct json_value)); int bytes_parsed = parse_json_value(parsed, json_data, json_size); assert(bytes_parsed >= 0); } - END_TIMER(json_parse_timer); + PROF_STOP(); u32 row_count = parsed->object->values[0]->array->count; if (reference_harvensines_count > 0) { @@ -156,36 +158,35 @@ int main(int argc, char **argv) // Step 2. Convert json object to struct - struct timer struct_convert_timer = START_TIMER("Conver to struct"); + PROF_START("Convert to struct"); struct point_pairs *pairs = convert_to_struct(parsed); - END_TIMER(struct_convert_timer); + PROF_STOP(); // Step 3. Calculate harvensine distances - struct timer harvensines_timer = START_TIMER("Compute harvensines"); + PROF_START("Compute harvensines"); f64 *harvensines = malloc(pairs->count*sizeof(f64)); for (int i = 0; i < pairs->count; i++) { struct point_pair *p = &pairs->pairs[i]; harvensines[i] = get_harvensine_distance(p->x0, p->y0, p->x1, p->y1, EARTH_RADIUS); } - END_TIMER(harvensines_timer); + PROF_STOP(); - struct timer sum_harvensines_timer = START_TIMER("Sum harvensines"); + PROF_START("Sum harvensines"); f64 harvensine_sum = 0; for (int i = 0; i < pairs->count; i++) { harvensine_sum += harvensines[i]; } - END_TIMER(sum_harvensines_timer); + PROF_STOP(); - - struct timer freeing_timer = START_TIMER("Free memory"); + PROF_START("Free memory"); free(reference_harvensines); free(harvensines); free_json_value(parsed); free_point_pairs(pairs); - END_TIMER(freeing_timer); + PROF_STOP(); - END_TIMER(total_timer); + prof_end(); // Output results @@ -196,24 +197,7 @@ int main(int argc, char **argv) printf("Diff of reference harvensine sum: %.16f\n", harvensine_sum - reference_harvensine_sum); } - u64 cpu_hz = get_cpu_timer_hz(100); - u64 total_time = TIMER_DURATION(total_timer); - - printf("\nTotal time taken: %.3fms\n", (float)total_time*1000/cpu_hz); - struct timer timers[] = { - read_files_timer, - json_parse_timer, - struct_convert_timer, - harvensines_timer, - sum_harvensines_timer, - freeing_timer - }; - for (size_t i = 0; i < ARRAY_LEN(timers); i++) { - u64 duration = TIMER_DURATION(timers[i]); - printf("\t%20s - %9lu (%.03f%%)\n", timers[i].name, duration, (float)duration*100/total_time); - } - u64 other_duration = total_time - sum_timer(timers, ARRAY_LEN(timers)); - printf("\t%20s - %9lu (%.03f%%)\n", "Other", other_duration, (float)other_duration*100/total_time); + prof_output(); return 0; } diff --git a/src/profiler.c b/src/profiler.c new file mode 100644 index 0000000..ebf8e85 --- /dev/null +++ b/src/profiler.c @@ -0,0 +1,131 @@ +#ifndef PROFILER_H_ +#define PROFILER_H_ + +#include +#include +#include +#include +#include + +#include "harvensine_compute.h" +#include "timer.c" + +#define PROFILER_MAX_STACK 128 +#define PROFILER_MAX_SLOTS 32 + +typedef struct { + char *label; + u32 calls; + + u64 duration; + u64 children_duration; +} prof_slot; + +typedef struct { + bool started; + bool finished; + + u64 init_time; + u64 end_time; + + size_t slot_stack[PROFILER_MAX_STACK]; + u64 timer_stack[PROFILER_MAX_STACK]; + u32 stack_size; + + prof_slot slots[PROFILER_MAX_SLOTS]; +} prof; + +static prof g_prof = { 0 }; + +void prof_init() +{ + assert(!g_prof.started); + g_prof.init_time = read_cpu_timer(); + g_prof.started = true; +} + +void prof_end() +{ + assert(!g_prof.finished); + g_prof.end_time = read_cpu_timer(); + g_prof.finished = true; +} + +void prof_start(size_t slot_idx, char *label) +{ + assert(g_prof.started); + assert(!g_prof.finished); + + assert(slot_idx < PROFILER_MAX_SLOTS); + assert(g_prof.stack_size < PROFILER_MAX_STACK-1); + + prof_slot *slot = &g_prof.slots[slot_idx]; + slot->label = label; + slot->calls++; + + g_prof.slot_stack[g_prof.stack_size] = slot_idx; + g_prof.timer_stack[g_prof.stack_size] = read_cpu_timer(); + g_prof.stack_size++; +} + +void prof_stop() +{ + u64 now = read_cpu_timer(); + g_prof.stack_size--; + u64 start = g_prof.timer_stack[g_prof.stack_size]; + size_t slot_idx = g_prof.slot_stack[g_prof.stack_size]; + + u64 duration = (now - start); + + g_prof.slots[slot_idx].duration += duration; + + if (g_prof.stack_size > 0) { + size_t parent_slot = g_prof.slot_stack[g_prof.stack_size-1]; + g_prof.slots[parent_slot].children_duration += duration; + } +} + +void prof_output() +{ + assert(g_prof.finished); + + u64 total_time = g_prof.end_time - g_prof.init_time; + u64 cpu_hz = get_cpu_timer_hz(100); + + prof_slot *slots[PROFILER_MAX_SLOTS] = { 0 }; + u32 slot_count = 0; + u32 label_width = 0; + + for (int i = 0; i < PROFILER_MAX_SLOTS; i++) { + prof_slot *slot = &g_prof.slots[i]; + if (slot->label) { + slots[slot_count] = slot; + slot_count++; + label_width = MAX(label_width, strlen(slot->label)); + } + } + + printf("\nTotal time taken: %.3fms\n", (float)total_time*1000/cpu_hz); + + char line_format[128]; + snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%9lu (%%6.3f%%%%, %%7.3f%%%% w/children) [%%d]\n", label_width); + + u64 profiled_duration = 0; + for (int i = 0; i < slot_count; i++) { + prof_slot *slot = slots[i]; + f32 percent = (f32)slot->duration*100/total_time; + u64 exclusive_duration = slot->duration - slot->children_duration; + f32 exclusive_percent = (f32)exclusive_duration*100/total_time; + + profiled_duration += exclusive_duration; + printf(line_format, slot->label, slot->duration, exclusive_percent, percent, slot->calls); + } + + u64 other_duration = total_time - profiled_duration; + printf(line_format, "Other", other_duration, (f32)other_duration*100/total_time, 0.0, 1); +} + +#define PROF_START(label) prof_start(__COUNTER__, label) +#define PROF_STOP() prof_stop() + +#endif //PROFILER_H_ diff --git a/src/timer.c b/src/timer.c index 549305f..23d6527 100644 --- a/src/timer.c +++ b/src/timer.c @@ -1,3 +1,6 @@ +#ifndef TIMER_ +#define TIMER_ + #include "harvensine_compute.h" #ifdef WIN32 @@ -88,3 +91,5 @@ static u64 sum_timer(struct timer *timers, size_t count) } return sum; } + +#endif //TIMER_