1
0

add annotation to see memory throughput

This commit is contained in:
Rokas Puzonas 2023-08-19 10:53:17 +03:00
parent 0cf0e112ca
commit 6ccdbed005
2 changed files with 32 additions and 11 deletions

View File

@ -106,8 +106,8 @@ int main(int argc, char **argv)
return -1;
}
RPROF_START("Read files");
size_t json_size = get_file_size(f);
RPROF_START_BYTES("Read JSON file", json_size);
char *json_data = malloc(json_size);
size_t bytes_read = fread(json_data, 1, json_size, f);
if (bytes_read != json_size) {
@ -115,12 +115,14 @@ int main(int argc, char **argv)
return -1;
}
fclose(f);
RPROF_STOP();
f64 *reference_harvensines = NULL;
size_t reference_harvensines_count = 0;
f64 reference_harvensine_sum = 0;
if (argc >= 3)
{
RPROF_START("Read answer file");
char *answers_filename = argv[2];
FILE *f = fopen(answers_filename, "r");
if (f == NULL) {
@ -137,12 +139,12 @@ int main(int argc, char **argv)
fread(&reference_harvensine_sum, sizeof(f64), 1, f);
fclose(f);
RPROF_STOP();
}
RPROF_STOP();
// Step 1. Read json file
RPROF_START("Parse JSON");
RPROF_START_BYTES("Parse JSON", json_size);
struct json_value *parsed = NULL;
{
parsed = malloc(sizeof(struct json_value));
@ -164,7 +166,7 @@ int main(int argc, char **argv)
// Step 3. Calculate harvensine distances
RPROF_START("Compute harvensines");
RPROF_START_BYTES("Compute harvensines", sizeof(struct point_pair)*pairs->count);
f64 *harvensines = malloc(pairs->count*sizeof(f64));
for (int i = 0; i < pairs->count; i++) {
struct point_pair *p = &pairs->pairs[i];
@ -172,19 +174,24 @@ int main(int argc, char **argv)
}
RPROF_STOP();
RPROF_START("Sum harvensines");
RPROF_START_BYTES("Sum harvensines", sizeof(f64)*pairs->count);
f64 harvensine_sum = 0;
for (int i = 0; i < pairs->count; i++) {
harvensine_sum += harvensines[i];
}
RPROF_STOP();
RPROF_START("Free memory");
RPROF_START_BYTES("Free struct memory", sizeof(f64)*pairs->count + sizeof(struct point_pair)*pairs->count);
free(reference_harvensines);
free(harvensines);
RPROF_STOP();
RPROF_START("Free json memory");
free_json_value(parsed);
free_point_pairs(pairs);
RPROF_STOP();
RPROF_START_BYTES("Free json file", json_size);
free(json_data);
RPROF_STOP();
rprof_end();

View File

@ -40,6 +40,7 @@ typedef struct {
uint32_t calls;
uint64_t inclusive_duration;
uint64_t exclusive_duration;
uint64_t bytes_processed;
} rprof_slot;
typedef struct {
@ -63,7 +64,7 @@ static rprof g_rprof = { 0 };
void rprof_init();
void rprof_end();
void rprof_start(size_t slot_idx, char *label);
void rprof_start(size_t slot_idx, char *label, uint64_t bytes_processed);
void rprof_stop();
int rprof_cmp_by_calls(const rprof_slot **A, const rprof_slot **B);
@ -71,7 +72,8 @@ int rprof_cmp_by_exclusive_duration(const rprof_slot **A, const rprof_slot **B);
int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B);
void rprof_output(prof_sort_cmp_cb sort_cb);
#define RPROF_START(label) rprof_start(__COUNTER__, label)
#define RPROF_START(label) rprof_start(__COUNTER__, label, 0)
#define RPROF_START_BYTES(label, bytes) rprof_start(__COUNTER__, label, bytes)
#define RPROF_STOP() rprof_stop()
#ifdef RPROF_IMPLEMENTATION
@ -187,7 +189,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
#define ARRAY_LEN(x) (sizeof(x)/sizeof(x[0]))
#endif
void rprof_start(size_t slot_idx, char *label)
void rprof_start(size_t slot_idx, char *label, uint64_t bytes_processed)
{
assert(g_rprof.started);
assert(!g_rprof.finished);
@ -198,6 +200,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
rprof_slot *slot = &g_rprof.slots[slot_idx];
slot->label = label;
slot->calls++;
slot->bytes_processed += bytes_processed;
g_rprof.duration_stack[g_rprof.stack_size] = slot->inclusive_duration;
g_rprof.slot_stack[g_rprof.stack_size] = slot_idx;
@ -230,6 +233,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
uint64_t total_time = g_rprof.end_time - g_rprof.init_time;
uint64_t cpu_hz = rprof_get_cpu_timer_hz(100);
float total_time_secs = (float)total_time / cpu_hz;
rprof_slot *slots[RPROF_MAX_SLOTS+1] = { 0 };
uint32_t slot_count = 0;
@ -260,7 +264,7 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
qsort(slots, slot_count, sizeof(rprof_slot*), (void*)sort_cb);
}
printf("\nTotal time taken: %.3fms (%lu)\n", (float)total_time*1000/cpu_hz, total_time);
printf("\nTotal time taken: %.3fms (%lu) (CPU: ~%.3fGHz)\n", total_time_secs*1000, total_time, (float)cpu_hz/1000000000);
uint32_t duration_max_width = 0;
uint32_t percent_max_width = 0;
@ -281,12 +285,22 @@ int rprof_cmp_by_inclusive_duration(const rprof_slot **A, const rprof_slot **B)
}
char line_format[128];
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]\n", label_width, duration_max_width, percent_max_width);
snprintf(line_format, ARRAY_LEN(line_format), " %%%ds - %%%dlu %%-%ds [%%d]", label_width, duration_max_width, percent_max_width);
for (int i = 0; i < slot_count; i++) {
rprof_slot *slot = slots[i];
printf(line_format, slot->label, slot->inclusive_duration, percent_column[i], slot->calls);
if (slot->bytes_processed > 0) {
float time_spent = (float)slot->inclusive_duration / cpu_hz;
float megabytes = (float)slot->bytes_processed / (1024 * 1024);
if (megabytes > 10) {
printf(" at %.3fgb/s", (megabytes / 1024) / time_spent);
} else {
printf(" at %.3fmb/s", megabytes / time_spent);
}
}
printf("\n");
}
}