add cache size tests
This commit is contained in:
parent
d275d207a7
commit
4fb4fdbc36
22
src/cyclic_load_bytes.asm
Normal file
22
src/cyclic_load_bytes.asm
Normal file
@ -0,0 +1,22 @@
|
||||
global cyclic_load_bytes
|
||||
|
||||
section .text
|
||||
|
||||
; rdi - buffer
|
||||
; rsi - byte_count
|
||||
; rdx - mask
|
||||
cyclic_load_bytes:
|
||||
xor rcx, rcx
|
||||
align 64
|
||||
.loop:
|
||||
mov r8, rcx
|
||||
and r8, rdx
|
||||
add r8, rdi
|
||||
vmovdqu ymm0, [r8]
|
||||
vmovdqu ymm0, [r8 + 32]
|
||||
vmovdqu ymm0, [r8 + 64]
|
||||
vmovdqu ymm0, [r8 + 96]
|
||||
add rcx, 128
|
||||
cmp rcx, rsi
|
||||
jb .loop
|
||||
ret
|
3
src/cyclic_load_bytes.h
Normal file
3
src/cyclic_load_bytes.h
Normal file
@ -0,0 +1,3 @@
|
||||
#include <stdint.h>
|
||||
|
||||
void cyclic_load_bytes(uint8_t *buffer, uint64_t byte_count, uint64_t mask);
|
@ -165,6 +165,12 @@ static float divide_safe(float a, float b) {
|
||||
return b != 0 ? a / b : 0;
|
||||
}
|
||||
|
||||
float repetitor_get_best_bandwidth(struct repetitor *repetitor) {
|
||||
uint64_t min_time_taken = repetitor->min_time_taken;
|
||||
uint64_t min_byte_count = repetitor->min_byte_count;
|
||||
return bytes_to_gb_s(repetitor, min_byte_count, min_time_taken);
|
||||
}
|
||||
|
||||
void repetitor_print_results(struct repetitor *repetitor) {
|
||||
uint64_t avg_time_taken = repetitor->total_time_taken/repetitor->repetition_count;
|
||||
uint64_t min_time_taken = repetitor->min_time_taken;
|
||||
|
75
src/tests/cache_size.c
Normal file
75
src/tests/cache_size.c
Normal file
@ -0,0 +1,75 @@
|
||||
#include "repetition_tester.c"
|
||||
#include "cyclic_load_bytes.h"
|
||||
#include <sys/mman.h>
|
||||
|
||||
struct testcase {
|
||||
char *name;
|
||||
uint64_t mask;
|
||||
};
|
||||
|
||||
struct testcase cases[] = {
|
||||
{ .name = "load 4KiB" , .mask = 0b111111111111 },
|
||||
{ .name = "load 8KiB" , .mask = 0b1111111111111 },
|
||||
{ .name = "load 16KiB" , .mask = 0b11111111111111 },
|
||||
{ .name = "load 32KiB" , .mask = 0b111111111111111 },
|
||||
{ .name = "load 64KiB" , .mask = 0b1111111111111111 },
|
||||
{ .name = "load 128KiB", .mask = 0b11111111111111111 },
|
||||
|
||||
{ .name = "load 512KiB", .mask = 0b1111111111111111111 },
|
||||
{ .name = "load 1MiB" , .mask = 0b11111111111111111111 },
|
||||
{ .name = "load 2MiB" , .mask = 0b111111111111111111111 },
|
||||
{ .name = "load 4MiB" , .mask = 0b1111111111111111111111 },
|
||||
{ .name = "load 8MiB" , .mask = 0b11111111111111111111111 },
|
||||
{ .name = "load 16MiB" , .mask = 0b111111111111111111111111 },
|
||||
{ .name = "load 32MiB" , .mask = 0b1111111111111111111111111 },
|
||||
{ .name = "load 64MiB" , .mask = 0b11111111111111111111111111 },
|
||||
{ .name = "load 128MiB", .mask = 0b111111111111111111111111111 },
|
||||
|
||||
{ .name = "load 1GiB" , .mask = 0b111111111111111111111111111111 },
|
||||
};
|
||||
|
||||
int main() {
|
||||
struct repetitor repetitor = {};
|
||||
repetitor_init(&repetitor);
|
||||
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));
|
||||
|
||||
uint64_t byte_count = 1024 * 1024 * 1024;
|
||||
if (byte_count % 4096) {
|
||||
printf("ERROR: Size of buffer is not page aligned\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint8_t *buffer = mmap(0, byte_count, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (buffer == NULL) {
|
||||
printf("ERROR: Failed to allocate buffer\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((size_t)buffer % 64 != 0) {
|
||||
printf("ERROR: Allocated buffer is not cache line aligned, it is %ld\n", (size_t)buffer % 64);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Touch pages so they would be mapped in, to avoid page faults during tests
|
||||
for (uint64_t i = 0; i < byte_count; i += 4096) {
|
||||
buffer[i] = (uint8_t)i;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ARRAY_LEN(cases); i++) {
|
||||
struct testcase *testcase = &cases[i];
|
||||
repetitor_clear(&repetitor);
|
||||
while (repetitor_repeat(&repetitor, 2)) {
|
||||
repetitor_start(&repetitor);
|
||||
repetitor_measure_start(&repetitor);
|
||||
cyclic_load_bytes(buffer, byte_count, testcase->mask);
|
||||
repetitor_measure_stop(&repetitor, byte_count);
|
||||
repetitor_stop(&repetitor);
|
||||
}
|
||||
repetitor_print_results_label(&repetitor, testcase->name);
|
||||
// printf("%ld;%f\n", testcase->mask, repetitor_get_best_bandwidth(&repetitor));
|
||||
}
|
||||
|
||||
munmap(buffer, byte_count);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,20 +1,20 @@
|
||||
#include "repetition_tester.c"
|
||||
#include "load_uop.h"
|
||||
|
||||
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
|
||||
struct testcase {
|
||||
char *name;
|
||||
test_cb cb;
|
||||
};
|
||||
|
||||
struct testcase cases[] = {
|
||||
{ .name = "mov_load_x1()", .cb = mov_load_x1 },
|
||||
{ .name = "mov_load_x2()", .cb = mov_load_x2 },
|
||||
{ .name = "mov_load_x3()", .cb = mov_load_x3 },
|
||||
{ .name = "mov_load_x4()", .cb = mov_load_x4 },
|
||||
};
|
||||
|
||||
int main() {
|
||||
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
|
||||
struct testcase {
|
||||
char *name;
|
||||
test_cb cb;
|
||||
};
|
||||
|
||||
struct testcase cases[] = {
|
||||
{ .name = "mov_load_x1()", .cb = mov_load_x1 },
|
||||
{ .name = "mov_load_x2()", .cb = mov_load_x2 },
|
||||
{ .name = "mov_load_x3()", .cb = mov_load_x3 },
|
||||
{ .name = "mov_load_x4()", .cb = mov_load_x4 },
|
||||
};
|
||||
|
||||
struct repetitor repetitor = {};
|
||||
repetitor_init(&repetitor);
|
||||
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));
|
||||
|
Loading…
Reference in New Issue
Block a user