1
0

Compare commits

...

2 Commits

Author SHA1 Message Date
4fb4fdbc36 add cache size tests 2024-03-01 00:44:20 +02:00
d275d207a7 replace Makefile with build.zig 2024-02-29 22:30:50 +02:00
18 changed files with 228 additions and 53 deletions

3
.gitignore vendored
View File

@ -1 +1,2 @@
build
zig-cache
zig-out

View File

@ -1,31 +0,0 @@
CFLAGS=-lm -g -Wall -O1
# TODO: Move this to a build.zig. This is becoming a mess, I don't want to deal with Makefiles anymore
build/main: src/main.c src/repetition_tester.c build/multi_nop_loop.o build/write_loops.o build/load_uop.o build/store_uop.o build/short_load_uop.o src/rprof.h build/read_widths.o
mkdir -p build
gcc -o build/main src/main.c build/multi_nop_loop.o build/write_loops.o build/load_uop.o build/store_uop.o build/short_load_uop.o build/read_widths.o $(CFLAGS)
build/short_load_uop.o: src/short_load_uop.asm
nasm -g -f elf64 -o build/short_load_uop.o src/short_load_uop.asm
build/load_uop.o: src/load_uop.asm
nasm -g -f elf64 -o build/load_uop.o src/load_uop.asm
build/write_loops.o: src/write_loops.asm
nasm -g -f elf64 -o build/write_loops.o src/write_loops.asm
build/multi_nop_loop.o: src/multi_nop_loop.asm
nasm -g -f elf64 -o build/multi_nop_loop.o src/multi_nop_loop.asm
build/store_uop.o: src/store_uop.asm
nasm -g -f elf64 -o build/store_uop.o src/store_uop.asm
build/read_widths.o: src/read_widths.asm
nasm -g -f elf64 -o build/read_widths.o src/read_widths.asm
run: ./build/main
./build/main $(TEST_NAME)
clean:
rm -r build

83
build.zig Normal file
View File

@ -0,0 +1,83 @@
const std = @import("std");
const Builder = std.build.Builder;
fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath {
const obj_basename = try std.mem.concat(b.allocator, u8, &.{
std.fs.path.stem(filename),
".o"
});
const obj = b.addSystemCommand(&.{ "nasm", "-g", "-f", "elf64", "-o" });
const output_obj = obj.addOutputFileArg(obj_basename);
obj.addFileArg(.{ .path = filename });
return output_obj;
}
fn addAllLinuxAssmeblies(b: *Builder) !std.ArrayList(std.Build.LazyPath) {
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(b.allocator);
errdefer linux_assemblies.deinit();
var dir = try std.fs.cwd().openIterableDir("src", .{ });
var it = dir.iterate();
while (try it.next()) |file| {
if (file.kind != .file) continue;
const ext = std.fs.path.extension(file.name);
if (!std.mem.eql(u8, ext, ".asm")) continue;
const assembly_path = try std.mem.concat(b.allocator, u8, &.{ "src/", file.name });
defer b.allocator.free(assembly_path);
try linux_assemblies.append(try addLinuxAssembly(b, assembly_path));
}
return linux_assemblies;
}
pub fn build(b: *Builder) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
var linux_assemblies = try addAllLinuxAssmeblies(b);
defer linux_assemblies.deinit();
var dir = try std.fs.cwd().openIterableDir("src/tests", .{ });
var it = dir.iterate();
while (try it.next()) |file| {
if (file.kind != .file) continue;
const ext = std.fs.path.extension(file.name);
if (!std.mem.eql(u8, ext, ".c")) continue;
const source_file_path = try std.mem.concat(b.allocator, u8, &.{ "src/tests/", file.name });
defer b.allocator.free(source_file_path);
const executable_name = std.fs.path.stem(file.name);
const exe = b.addExecutable(.{
.name = executable_name,
.root_source_file = .{ .path = source_file_path },
.optimize = optimize,
.target = target
});
exe.addIncludePath(.{ .path = "src" });
exe.linkLibC();
for (linux_assemblies.items) |obj| {
exe.addObjectFile(obj);
}
b.installArtifact(exe);
const run_exe = b.addRunArtifact(exe);
if (b.args) |args| {
run_exe.addArgs(args);
}
const step_description = try std.fmt.allocPrint(b.allocator, "Run '{s}' test", .{source_file_path});
defer b.allocator.free(step_description);
const run_step = b.step(executable_name, step_description);
run_step.dependOn(&run_exe.step);
}
}

1
compile_flags.txt Normal file
View File

@ -0,0 +1 @@
-Isrc

22
src/cyclic_load_bytes.asm Normal file
View File

@ -0,0 +1,22 @@
global cyclic_load_bytes
section .text
; rdi - buffer
; rsi - byte_count
; rdx - mask
cyclic_load_bytes:
xor rcx, rcx
align 64
.loop:
mov r8, rcx
and r8, rdx
add r8, rdi
vmovdqu ymm0, [r8]
vmovdqu ymm0, [r8 + 32]
vmovdqu ymm0, [r8 + 64]
vmovdqu ymm0, [r8 + 96]
add rcx, 128
cmp rcx, rsi
jb .loop
ret

3
src/cyclic_load_bytes.h Normal file
View File

@ -0,0 +1,3 @@
#include <stdint.h>
void cyclic_load_bytes(uint8_t *buffer, uint64_t byte_count, uint64_t mask);

View File

@ -165,6 +165,12 @@ static float divide_safe(float a, float b) {
return b != 0 ? a / b : 0;
}
float repetitor_get_best_bandwidth(struct repetitor *repetitor) {
uint64_t min_time_taken = repetitor->min_time_taken;
uint64_t min_byte_count = repetitor->min_byte_count;
return bytes_to_gb_s(repetitor, min_byte_count, min_time_taken);
}
void repetitor_print_results(struct repetitor *repetitor) {
uint64_t avg_time_taken = repetitor->total_time_taken/repetitor->repetition_count;
uint64_t min_time_taken = repetitor->min_time_taken;

75
src/tests/cache_size.c Normal file
View File

@ -0,0 +1,75 @@
#include "repetition_tester.c"
#include "cyclic_load_bytes.h"
#include <sys/mman.h>
struct testcase {
char *name;
uint64_t mask;
};
struct testcase cases[] = {
{ .name = "load 4KiB" , .mask = 0b111111111111 },
{ .name = "load 8KiB" , .mask = 0b1111111111111 },
{ .name = "load 16KiB" , .mask = 0b11111111111111 },
{ .name = "load 32KiB" , .mask = 0b111111111111111 },
{ .name = "load 64KiB" , .mask = 0b1111111111111111 },
{ .name = "load 128KiB", .mask = 0b11111111111111111 },
{ .name = "load 512KiB", .mask = 0b1111111111111111111 },
{ .name = "load 1MiB" , .mask = 0b11111111111111111111 },
{ .name = "load 2MiB" , .mask = 0b111111111111111111111 },
{ .name = "load 4MiB" , .mask = 0b1111111111111111111111 },
{ .name = "load 8MiB" , .mask = 0b11111111111111111111111 },
{ .name = "load 16MiB" , .mask = 0b111111111111111111111111 },
{ .name = "load 32MiB" , .mask = 0b1111111111111111111111111 },
{ .name = "load 64MiB" , .mask = 0b11111111111111111111111111 },
{ .name = "load 128MiB", .mask = 0b111111111111111111111111111 },
{ .name = "load 1GiB" , .mask = 0b111111111111111111111111111111 },
};
int main() {
struct repetitor repetitor = {};
repetitor_init(&repetitor);
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));
uint64_t byte_count = 1024 * 1024 * 1024;
if (byte_count % 4096) {
printf("ERROR: Size of buffer is not page aligned\n");
return -1;
}
uint8_t *buffer = mmap(0, byte_count, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (buffer == NULL) {
printf("ERROR: Failed to allocate buffer\n");
return -1;
}
if ((size_t)buffer % 64 != 0) {
printf("ERROR: Allocated buffer is not cache line aligned, it is %ld\n", (size_t)buffer % 64);
return -1;
}
// Touch pages so they would be mapped in, to avoid page faults during tests
for (uint64_t i = 0; i < byte_count; i += 4096) {
buffer[i] = (uint8_t)i;
}
for (int i = 0; i < ARRAY_LEN(cases); i++) {
struct testcase *testcase = &cases[i];
repetitor_clear(&repetitor);
while (repetitor_repeat(&repetitor, 2)) {
repetitor_start(&repetitor);
repetitor_measure_start(&repetitor);
cyclic_load_bytes(buffer, byte_count, testcase->mask);
repetitor_measure_stop(&repetitor, byte_count);
repetitor_stop(&repetitor);
}
repetitor_print_results_label(&repetitor, testcase->name);
// printf("%ld;%f\n", testcase->mask, repetitor_get_best_bandwidth(&repetitor));
}
munmap(buffer, byte_count);
return 0;
}

View File

@ -1,20 +1,20 @@
#include "repetition_tester.c"
#include "load_uop.h"
int main_test_load_uop() {
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
struct testcase {
char *name;
test_cb cb;
};
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
struct testcase {
char *name;
test_cb cb;
};
struct testcase cases[] = {
{ .name = "mov_load_x1()", .cb = mov_load_x1 },
{ .name = "mov_load_x2()", .cb = mov_load_x2 },
{ .name = "mov_load_x3()", .cb = mov_load_x3 },
{ .name = "mov_load_x4()", .cb = mov_load_x4 },
};
struct testcase cases[] = {
{ .name = "mov_load_x1()", .cb = mov_load_x1 },
{ .name = "mov_load_x2()", .cb = mov_load_x2 },
{ .name = "mov_load_x3()", .cb = mov_load_x3 },
{ .name = "mov_load_x4()", .cb = mov_load_x4 },
};
int main() {
struct repetitor repetitor = {};
repetitor_init(&repetitor);
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));

View File

@ -54,7 +54,14 @@ static void read_malloc_file_with_read(bool should_alloc, struct repetitor *repe
free_buffer(should_alloc, buffer);
}
int main_test_malloc_read(char *filename) {
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: %s <filename>\n", argv[0]);
return -1;
}
char *filename = argv[1];
typedef void (*read_file_b)(bool should_alloc, struct repetitor *repetitor, uint8_t *buffer, uint64_t buffer_size, char *filename);
struct testcase {
char *name;

View File

@ -46,7 +46,15 @@ void read_file_with_read(struct repetitor *repetitor, uint8_t *buffer, uint64_t
close(file);
}
int main_test_read_file(char *filename) {
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: %s <filename>\n", argv[0]);
return -1;
}
char *filename = argv[1];
typedef void (*read_file_b)(struct repetitor *repetitor, uint8_t *buffer, uint64_t buffer_size, char *filename);
struct testcase {
char *name;

View File

@ -1,7 +1,7 @@
#include "repetition_tester.c"
#include "read_widths.h"
int main_test_read_widths() {
int main() {
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
struct testcase {
char *name;

View File

@ -1,7 +1,7 @@
#include "repetition_tester.c"
#include "short_load_uop.h"
int main_test_short_load_uop() {
int main() {
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
struct testcase {
char *name;

View File

@ -1,7 +1,7 @@
#include "repetition_tester.c"
#include "store_uop.h"
int main_test_store_uop() {
int main() {
typedef void (*test_cb)(uint8_t *buffer, uint64_t byte_count);
struct testcase {
char *name;

View File

@ -19,7 +19,7 @@ static void test_write_to_all_bytes(struct repetitor *repetitor, uint8_t *buffer
}
}
int main_test_write_all_bytes() {
int main() {
struct repetitor repetitor = {};
repetitor_init(&repetitor);
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));

View File

@ -1,6 +1,6 @@
#include "repetition_tester.c"
int main_test_write_backward() {
int main() {
struct repetitor repetitor = {};
repetitor_init(&repetitor);
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));

View File

@ -1,7 +1,7 @@
#include "repetition_tester.c"
#include "multi_nop_loop.h"
int main_test_write_bytes_asm() {
int main() {
typedef void (*write_bytes_cb)(uint8_t *buffer, uint64_t byte_count);
struct testcase {
char *name;

View File

@ -1,7 +1,7 @@
#include "repetition_tester.c"
#include "write_loops.h"
int main_test_write_loop() {
int main() {
struct repetitor repetitor = {};
repetitor_init(&repetitor);
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));