1
0

add annotation to see memory throughput

This commit is contained in:
Rokas Puzonas 2023-08-19 10:53:17 +03:00
parent 0cf0e112ca
commit 6ccdbed005
2 changed files with 32 additions and 11 deletions

View File

@ -106,8 +106,8 @@ int main(int argc, char **argv)
return -1; return -1;
} }
RPROF_START("Read files");
size_t json_size = get_file_size(f); size_t json_size = get_file_size(f);
RPROF_START_BYTES("Read JSON file", json_size);
char *json_data = malloc(json_size); char *json_data = malloc(json_size);
size_t bytes_read = fread(json_data, 1, json_size, f); size_t bytes_read = fread(json_data, 1, json_size, f);
if (bytes_read != json_size) { if (bytes_read != json_size) {
@ -115,12 +115,14 @@ int main(int argc, char **argv)
return -1; return -1;
} }
fclose(f); fclose(f);
RPROF_STOP();
f64 *reference_harvensines = NULL; f64 *reference_harvensines = NULL;
size_t reference_harvensines_count = 0; size_t reference_harvensines_count = 0;
f64 reference_harvensine_sum = 0; f64 reference_harvensine_sum = 0;
if (argc >= 3) if (argc >= 3)
{ {
RPROF_START("Read answer file");
char *answers_filename = argv[2]; char *answers_filename = argv[2];
FILE *f = fopen(answers_filename, "r"); FILE *f = fopen(answers_filename, "r");
if (f == NULL) { if (f == NULL) {
@ -137,12 +139,12 @@ int main(int argc, char **argv)
fread(&reference_harvensine_sum, sizeof(f64), 1, f); fread(&reference_harvensine_sum, sizeof(f64), 1, f);
fclose(f); fclose(f);
RPROF_STOP();
} }
RPROF_STOP();
// Step 1. Read json file // Step 1. Read json file
RPROF_START("Parse JSON"); RPROF_START_BYTES("Parse JSON", json_size);
struct json_value *parsed = NULL; struct json_value *parsed = NULL;
{ {
parsed = malloc(sizeof(struct json_value)); parsed = malloc(sizeof(struct json_value));
@ -164,7 +166,7 @@ int main(int argc, char **argv)
// Step 3. Calculate harvensine distances // Step 3. Calculate harvensine distances
RPROF_START("Compute harvensines"); RPROF_START_BYTES("Compute harvensines", sizeof(struct point_pair)*pairs->count);
f64 *harvensines = malloc(pairs->count*sizeof(f64)); f64 *harvensines = malloc(pairs->count*sizeof(f64));
for (int i = 0; i < pairs->count; i++) { for (int i = 0; i < pairs->count; i++) {
struct point_pair *p = &pairs->pairs[i]; struct point_pair *p = &pairs->pairs[i];
@ -172,19 +174,24 @@ int main(int argc, char **argv)
} }
RPROF_STOP(); RPROF_STOP();
RPROF_START("Sum harvensines"); RPROF_START_BYTES("Sum harvensines", sizeof(f64)*pairs->count);
f64 harvensine_sum = 0; f64 harvensine_sum = 0;
for (int i = 0; i < pairs->count; i++) { for (int i = 0; i < pairs->count; i++) {
harvensine_sum += harvensines[i]; harvensine_sum += harvensines[i];
} }
RPROF_STOP(); RPROF_STOP();
RPROF_START("Free memory"); RPROF_START_BYTES("Free struct memory", sizeof(f64)*pairs->count + sizeof(struct point_pair)*pairs->count);
free(reference_harvensines); free(reference_harvensines);
free(harvensines); free(harvensines);
RPROF_STOP();
RPROF_START("Free json memory");
free_json_value(parsed); free_json_value(parsed);
free_point_pairs(pairs); free_point_pairs(pairs);
RPROF_STOP(); RPROF_STOP();
RPROF_START_BYTES("Free json file", json_size);
free(json_data);
RPROF_STOP();
rprof_end(); rprof_end();

View File

@ -40,6 +40,7 @@ typedef struct {
uint32_t calls; uint32_t calls;
uint64_t inclusive_duration; uint64_t inclusive_duration;
uint64_t exclusive_duration; uint64_t exclusive_duration;
uint64_t bytes_processed;
} rprof_slot; } rprof_slot;
typedef struct { typedef struct {
@ -63,7 +64,7 @@ static rprof g_rprof = { 0 };
void rprof_init(); void rprof_init();
void rprof_end(); void rprof_end();
void rprof_start(size_t slot_idx, char *label); void rprof_start(size_t slot_idx, char *label, uint64_t bytes_processed);
void rprof_stop(); void rprof_stop();
int rprof_cmp_by_calls(const rprof_slot **A, const rprof_slot **B); int rprof_cmp_by_calls(const rprof_slot **A, const rprof_slot **B);
@ -71,7 +72,8 @@ int rprof_cmp_by_exclusive_duration(const rprof_slot **A, const rprof_slot **B);
int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B); int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B);
void rprof_output(prof_sort_cmp_cb sort_cb); void rprof_output(prof_sort_cmp_cb sort_cb);
#define RPROF_START(label) rprof_start(__COUNTER__, label) #define RPROF_START(label) rprof_start(__COUNTER__, label, 0)
#define RPROF_START_BYTES(label, bytes) rprof_start(__COUNTER__, label, bytes)
#define RPROF_STOP() rprof_stop() #define RPROF_STOP() rprof_stop()
#ifdef RPROF_IMPLEMENTATION #ifdef RPROF_IMPLEMENTATION
@ -187,7 +189,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
#define ARRAY_LEN(x) (sizeof(x)/sizeof(x[0])) #define ARRAY_LEN(x) (sizeof(x)/sizeof(x[0]))
#endif #endif
void rprof_start(size_t slot_idx, char *label) void rprof_start(size_t slot_idx, char *label, uint64_t bytes_processed)
{ {
assert(g_rprof.started); assert(g_rprof.started);
assert(!g_rprof.finished); assert(!g_rprof.finished);
@ -198,6 +200,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
rprof_slot *slot = &g_rprof.slots[slot_idx]; rprof_slot *slot = &g_rprof.slots[slot_idx];
slot->label = label; slot->label = label;
slot->calls++; slot->calls++;
slot->bytes_processed += bytes_processed;
g_rprof.duration_stack[g_rprof.stack_size] = slot->inclusive_duration; g_rprof.duration_stack[g_rprof.stack_size] = slot->inclusive_duration;
g_rprof.slot_stack[g_rprof.stack_size] = slot_idx; g_rprof.slot_stack[g_rprof.stack_size] = slot_idx;
@ -230,6 +233,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
uint64_t total_time = g_rprof.end_time - g_rprof.init_time; uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100); uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
float total_time_secs = (float)total_time / cpu_hz;
rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 }; rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 };
uint32_t slot_count = 0; uint32_t slot_count = 0;
@ -260,7 +264,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
qsort(slots, slot_count, sizeof(rprof_slot*), (void*)sort_cb); qsort(slots, slot_count, sizeof(rprof_slot*), (void*)sort_cb);
} }
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time); printf("\nTotal time taken: %.3fms (%lu) (CPU: ~%.3fGHz)\n", total_time_secs*1000, total_time, (float)cpu_hz/1000000000);
uint32_t duration_max_width = 0; uint32_t duration_max_width = 0;
uint32_t percent_max_width = 0; uint32_t percent_max_width = 0;
@ -281,12 +285,22 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
} }
char line_format[128]; char line_format[128];
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width); snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]", label_width, duration_max_width, percent_max_width);
for (int i = 0; i < slot_count; i++) { for (int i = 0; i < slot_count; i++) {
rprof_slot *slot = slots[i]; rprof_slot *slot = slots[i];
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls); printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
if (slot->bytes_processed > 0) {
float time_spent = (float)slot->inclusive_duration / cpu_hz;
float megabytes = (float)slot->bytes_processed / (1024 * 1024);
if (megabytes > 10) {
printf(" at %.3fgb/s", (megabytes / 1024) / time_spent);
} else {
printf(" at %.3fmb/s", megabytes / time_spent);
}
}
printf("\n");
} }
} }