From 3f66b12c923bb2b8b7694d56a9c31838cd84a2c4 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Fri, 17 May 2024 00:08:46 +0300 Subject: [PATCH] add cache set tests --- README.md | 9 ++++ build.zig | 14 +++--- src/13_cache_sets/load_bytes.asm | 23 ++++++++++ src/13_cache_sets/main.c | 76 ++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 README.md create mode 100644 src/13_cache_sets/load_bytes.asm create mode 100644 src/13_cache_sets/main.c diff --git a/README.md b/README.md new file mode 100644 index 0000000..8a38cc0 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# Repetition tester + +For: https://www.computerenhance.com/ + +Zig version used: 0.12.0 + +## Linux calling convention register order + +RDI, RSI, RDX, RCX, R8, R9, [XYZ]MM0–7 diff --git a/build.zig b/build.zig index c0ce427..2453faa 100644 --- a/build.zig +++ b/build.zig @@ -1,7 +1,7 @@ const std = @import("std"); -const Builder = std.build.Builder; +const Build = std.Build; -fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath { +fn addLinuxAssembly(b: *Build, filename: []const u8) !std.Build.LazyPath { const obj_basename = try std.mem.concat(b.allocator, u8, &.{ std.fs.path.stem(filename), ".o" @@ -14,13 +14,13 @@ fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath { return output_obj; } -fn addAllLinuxAssmeblies(b: *Builder, path: []const u8) !std.ArrayList(std.Build.LazyPath) { +fn addAllLinuxAssmeblies(b: *Build, path: []const u8) !std.ArrayList(std.Build.LazyPath) { const allocator = b.allocator; var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(allocator); errdefer linux_assemblies.deinit(); - var dir = try std.fs.cwd().openIterableDir(path, .{ }); + var dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); var it = dir.iterate(); while (try it.next()) |file| { if (file.kind != .file) continue; @@ -37,13 +37,13 @@ fn addAllLinuxAssmeblies(b: *Builder, path: []const u8) !std.ArrayList(std.Build return linux_assemblies; } -pub fn build(b: *Builder) !void { +pub fn build(b: *Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); const allocator = b.allocator; - var dir = try std.fs.cwd().openIterableDir("src", .{ }); + var dir = try std.fs.cwd().openDir("src", .{ .iterate = true }); var it = dir.iterate(); while (try it.next()) |entry| { if (entry.kind != .directory) continue; @@ -56,12 +56,12 @@ pub fn build(b: *Builder) !void { const exe = b.addExecutable(.{ .name = entry.name, - .root_source_file = .{ .path = main_c }, .optimize = optimize, .target = target }); exe.addIncludePath(.{ .path = program_dir }); exe.addIncludePath(.{ .path = "src" }); + exe.addCSourceFile(.{ .file = b.path(main_c) }); exe.linkLibC(); var assemblies = try addAllLinuxAssmeblies(b, program_dir); diff --git a/src/13_cache_sets/load_bytes.asm b/src/13_cache_sets/load_bytes.asm new file mode 100644 index 0000000..a1fa2cb --- /dev/null +++ b/src/13_cache_sets/load_bytes.asm @@ -0,0 +1,23 @@ +global load_bytes + +section .text + +; rdi - buffer +; rsi - inner_loop_count +; rdx - outer_loop_count +; rcx - step_size +load_bytes: + align 64 +.outer_loop: + mov r8, rdi + mov r9, rsi + .inner_loop: + vmovdqu ymm0, [r8] + vmovdqu ymm0, [r8 + 32] + add r8, rcx + dec r9 + jnz .inner_loop + + dec rdx + jnz .outer_loop + ret diff --git a/src/13_cache_sets/main.c b/src/13_cache_sets/main.c new file mode 100644 index 0000000..611bf87 --- /dev/null +++ b/src/13_cache_sets/main.c @@ -0,0 +1,76 @@ +#include "repetition_tester.c" +#include + +void load_bytes(uint8_t *buffer, uint32_t inner_loop, uint32_t outer_loop, uint32_t step_size); + +static uint64_t kibibytes(uint64_t count) { + return 1024 * count; +} + +static uint64_t mibibytes(uint64_t count) { + return 1024 * kibibytes(count); +} + +static uint64_t gibibytes(uint64_t count) { + return 1024 * mibibytes(count); +} + +int main() { + uint32_t byte_count = kibibytes(16); + assert(byte_count % 64 == 0); // Must be divisible by 128 + + struct repetitor repetitor = {}; + repetitor_init(&repetitor); + printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000)); + + uint64_t buffer_size = gibibytes(1); + if (buffer_size % 4096) { + printf("ERROR: Size of buffer is not page aligned\n"); + return -1; + } + + uint8_t *buffer = mmap(0, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer == NULL) { + printf("ERROR: Failed to allocate buffer\n"); + return -1; + } + + if ((size_t)buffer % 64 != 0) { + printf("ERROR: Allocated buffer is not cache line aligned, it is %ld\n", (size_t)buffer % 64); + return -1; + } + + // Touch pages so they would be mapped in, to avoid page faults during tests + for (uint64_t i = 0; i < buffer_size; i += 4096) { + buffer[i] = (uint8_t)i; + } + + // uint64_t byte_count = unadjusted_byte_count - 64; + + for (int i = 0; i < 128; i++) { + uint64_t step_size = i*64; + + uint64_t outer_loop = 64; + uint64_t inner_loop = 256; + + repetitor_clear(&repetitor); + while (repetitor_repeat(&repetitor, 2)) { + repetitor_start(&repetitor); + repetitor_measure_start(&repetitor); + load_bytes(buffer, inner_loop, outer_loop, step_size); + repetitor_measure_stop(&repetitor, outer_loop * inner_loop * 64); + repetitor_stop(&repetitor); + } + + // char name[128] = { 0 }; + // snprintf(name, sizeof(name), "%ld step_size", step_size); + // repetitor_print_results_label(&repetitor, name); + + printf("%ld;%f\n", step_size, repetitor_get_best_bandwidth(&repetitor)); + } + + munmap(buffer, buffer_size); + + return 0; +} +