From 71a004e23c372c52c45e9322a3544da5b02e1c53 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Thu, 15 Feb 2024 21:18:21 +0200 Subject: [PATCH] add short load uop tests --- Makefile | 7 +++-- src/main.c | 3 +++ src/main_short_load_uop.c | 41 +++++++++++++++++++++++++++++ src/short_load_uop.asm | 54 +++++++++++++++++++++++++++++++++++++++ src/short_load_uop.h | 6 +++++ 5 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 src/main_short_load_uop.c create mode 100644 src/short_load_uop.asm create mode 100644 src/short_load_uop.h diff --git a/Makefile b/Makefile index ada7fda..feacc98 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,11 @@ CFLAGS=-lm -g -Wall -O1 -build/main: src/main.c src/repetition_tester.c build/multi_nop_loop.o build/write_loops.o build/load_uop.o build/store_uop.o src/rprof.h +build/main: src/main.c src/repetition_tester.c build/multi_nop_loop.o build/write_loops.o build/load_uop.o build/store_uop.o build/short_load_uop.o src/rprof.h mkdir -p build - gcc -o build/main src/main.c build/multi_nop_loop.o build/write_loops.o build/load_uop.o build/store_uop.o $(CFLAGS) + gcc -o build/main src/main.c build/multi_nop_loop.o build/write_loops.o build/load_uop.o build/store_uop.o build/short_load_uop.o $(CFLAGS) + +build/short_load_uop.o: src/short_load_uop.asm + nasm -g -f elf64 -o build/short_load_uop.o src/short_load_uop.asm build/load_uop.o: src/load_uop.asm nasm -g -f elf64 -o build/load_uop.o src/load_uop.asm diff --git a/src/main.c b/src/main.c index 5a39cd3..6922aa1 100644 --- a/src/main.c +++ b/src/main.c @@ -16,6 +16,7 @@ #include "main_write_loop.c" #include "main_load_uop.c" #include "main_store_uop.c" +#include "main_short_load_uop.c" int main(int argc, char **argv) { if (argc < 2) { @@ -33,6 +34,8 @@ int main(int argc, char **argv) { return main_test_load_uop(); } else if (!strncmp(test_name, "store_uop", sizeof("store_uop"))) { return main_test_store_uop(); + } else if (!strncmp(test_name, "short_load_uop", sizeof("short_load_uop"))) { + return main_test_short_load_uop(); } else if (!strncmp(test_name, "write_loop", sizeof("write_loop"))) { return main_test_write_loop(); } else if (!strncmp(test_name, "write_backward", sizeof("write_backward"))) { diff --git a/src/main_short_load_uop.c b/src/main_short_load_uop.c new file mode 100644 index 0000000..961fb06 --- /dev/null +++ b/src/main_short_load_uop.c @@ -0,0 +1,41 @@ +#include "repetition_tester.c" +#include "short_load_uop.h" + +int main_test_short_load_uop() { + typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count); + struct testcase { + char *name; + test_cb cb; + }; + + struct testcase cases[] = { + { .name = "mov_load_1x2()", .cb = mov_load_1x2 }, + { .name = "mov_load_2x2()", .cb = mov_load_2x2 }, + { .name = "mov_load_4x2()", .cb = mov_load_4x2 }, + { .name = "mov_load_8x2()", .cb = mov_load_8x2 }, + }; + + struct repetitor repetitor = {}; + repetitor_init(&repetitor); + printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000)); + + uint64_t byte_count = 4096 * 1024; + uint8_t buffer[byte_count]; + + for (int i = 0; i < ARRAY_LEN(cases); i++) { + struct testcase *testcase = &cases[i]; + repetitor_clear(&repetitor); + while (repetitor_repeat(&repetitor, 2)) { + repetitor_start(&repetitor); + repetitor_measure_start(&repetitor); + testcase->cb(buffer, byte_count); + repetitor_measure_stop(&repetitor, byte_count); + repetitor_stop(&repetitor); + } + repetitor_print_results_label(&repetitor, testcase->name); + } + + return 0; +} + + diff --git a/src/short_load_uop.asm b/src/short_load_uop.asm new file mode 100644 index 0000000..e8823c2 --- /dev/null +++ b/src/short_load_uop.asm @@ -0,0 +1,54 @@ +global mov_load_1x2 +global mov_load_2x2 +global mov_load_4x2 +global mov_load_8x2 + +section .text + +; rsi - byte_count +; rdi - buffer +mov_load_1x2: + mov rcx, rsi + align 64 +.loop: + mov al, [rdi] + mov al, [rdi] + sub rcx, 2 + jnle .loop + ret + +; rsi - byte_count +; rdi - buffer +mov_load_2x2: + mov rcx, rsi + align 64 +.loop: + mov ax, [rdi] + mov ax, [rdi] + sub rcx, 2 + jnle .loop + ret + +; rsi - byte_count +; rdi - buffer +mov_load_4x2: + mov rcx, rsi + align 64 +.loop: + mov eax, [rdi] + mov eax, [rdi] + sub rcx, 2 + jnle .loop + ret + +; rsi - byte_count +; rdi - buffer +mov_load_8x2: + mov rcx, rsi + align 64 +.loop: + mov rax, [rdi] + mov rax, [rdi] + sub rcx, 2 + jnle .loop + ret diff --git a/src/short_load_uop.h b/src/short_load_uop.h new file mode 100644 index 0000000..ab21f7b --- /dev/null +++ b/src/short_load_uop.h @@ -0,0 +1,6 @@ +#include + +void mov_load_1x2(uint8_t *buffer, uint64_t byte_count); +void mov_load_2x2(uint8_t *buffer, uint64_t byte_count); +void mov_load_4x2(uint8_t *buffer, uint64_t byte_count); +void mov_load_8x2(uint8_t *buffer, uint64_t byte_count);