add cache set tests
This commit is contained in:
parent
19b4bf5fbf
commit
3f66b12c92
9
README.md
Normal file
9
README.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Repetition tester
|
||||||
|
|
||||||
|
For: https://www.computerenhance.com/
|
||||||
|
|
||||||
|
Zig version used: 0.12.0
|
||||||
|
|
||||||
|
## Linux calling convention register order
|
||||||
|
|
||||||
|
RDI, RSI, RDX, RCX, R8, R9, [XYZ]MM0–7
|
14
build.zig
14
build.zig
@ -1,7 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const Builder = std.build.Builder;
|
const Build = std.Build;
|
||||||
|
|
||||||
fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath {
|
fn addLinuxAssembly(b: *Build, filename: []const u8) !std.Build.LazyPath {
|
||||||
const obj_basename = try std.mem.concat(b.allocator, u8, &.{
|
const obj_basename = try std.mem.concat(b.allocator, u8, &.{
|
||||||
std.fs.path.stem(filename),
|
std.fs.path.stem(filename),
|
||||||
".o"
|
".o"
|
||||||
@ -14,13 +14,13 @@ fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath {
|
|||||||
return output_obj;
|
return output_obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn addAllLinuxAssmeblies(b: *Builder, path: []const u8) !std.ArrayList(std.Build.LazyPath) {
|
fn addAllLinuxAssmeblies(b: *Build, path: []const u8) !std.ArrayList(std.Build.LazyPath) {
|
||||||
const allocator = b.allocator;
|
const allocator = b.allocator;
|
||||||
|
|
||||||
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(allocator);
|
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(allocator);
|
||||||
errdefer linux_assemblies.deinit();
|
errdefer linux_assemblies.deinit();
|
||||||
|
|
||||||
var dir = try std.fs.cwd().openIterableDir(path, .{ });
|
var dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
|
||||||
var it = dir.iterate();
|
var it = dir.iterate();
|
||||||
while (try it.next()) |file| {
|
while (try it.next()) |file| {
|
||||||
if (file.kind != .file) continue;
|
if (file.kind != .file) continue;
|
||||||
@ -37,13 +37,13 @@ fn addAllLinuxAssmeblies(b: *Builder, path: []const u8) !std.ArrayList(std.Build
|
|||||||
return linux_assemblies;
|
return linux_assemblies;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build(b: *Builder) !void {
|
pub fn build(b: *Build) !void {
|
||||||
const target = b.standardTargetOptions(.{});
|
const target = b.standardTargetOptions(.{});
|
||||||
const optimize = b.standardOptimizeOption(.{});
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
|
|
||||||
const allocator = b.allocator;
|
const allocator = b.allocator;
|
||||||
|
|
||||||
var dir = try std.fs.cwd().openIterableDir("src", .{ });
|
var dir = try std.fs.cwd().openDir("src", .{ .iterate = true });
|
||||||
var it = dir.iterate();
|
var it = dir.iterate();
|
||||||
while (try it.next()) |entry| {
|
while (try it.next()) |entry| {
|
||||||
if (entry.kind != .directory) continue;
|
if (entry.kind != .directory) continue;
|
||||||
@ -56,12 +56,12 @@ pub fn build(b: *Builder) !void {
|
|||||||
|
|
||||||
const exe = b.addExecutable(.{
|
const exe = b.addExecutable(.{
|
||||||
.name = entry.name,
|
.name = entry.name,
|
||||||
.root_source_file = .{ .path = main_c },
|
|
||||||
.optimize = optimize,
|
.optimize = optimize,
|
||||||
.target = target
|
.target = target
|
||||||
});
|
});
|
||||||
exe.addIncludePath(.{ .path = program_dir });
|
exe.addIncludePath(.{ .path = program_dir });
|
||||||
exe.addIncludePath(.{ .path = "src" });
|
exe.addIncludePath(.{ .path = "src" });
|
||||||
|
exe.addCSourceFile(.{ .file = b.path(main_c) });
|
||||||
exe.linkLibC();
|
exe.linkLibC();
|
||||||
|
|
||||||
var assemblies = try addAllLinuxAssmeblies(b, program_dir);
|
var assemblies = try addAllLinuxAssmeblies(b, program_dir);
|
||||||
|
23
src/13_cache_sets/load_bytes.asm
Normal file
23
src/13_cache_sets/load_bytes.asm
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
global load_bytes
|
||||||
|
|
||||||
|
section .text
|
||||||
|
|
||||||
|
; rdi - buffer
|
||||||
|
; rsi - inner_loop_count
|
||||||
|
; rdx - outer_loop_count
|
||||||
|
; rcx - step_size
|
||||||
|
load_bytes:
|
||||||
|
align 64
|
||||||
|
.outer_loop:
|
||||||
|
mov r8, rdi
|
||||||
|
mov r9, rsi
|
||||||
|
.inner_loop:
|
||||||
|
vmovdqu ymm0, [r8]
|
||||||
|
vmovdqu ymm0, [r8 + 32]
|
||||||
|
add r8, rcx
|
||||||
|
dec r9
|
||||||
|
jnz .inner_loop
|
||||||
|
|
||||||
|
dec rdx
|
||||||
|
jnz .outer_loop
|
||||||
|
ret
|
76
src/13_cache_sets/main.c
Normal file
76
src/13_cache_sets/main.c
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#include "repetition_tester.c"
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
void load_bytes(uint8_t *buffer, uint32_t inner_loop, uint32_t outer_loop, uint32_t step_size);
|
||||||
|
|
||||||
|
static uint64_t kibibytes(uint64_t count) {
|
||||||
|
return 1024 * count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t mibibytes(uint64_t count) {
|
||||||
|
return 1024 * kibibytes(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t gibibytes(uint64_t count) {
|
||||||
|
return 1024 * mibibytes(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
uint32_t byte_count = kibibytes(16);
|
||||||
|
assert(byte_count % 64 == 0); // Must be divisible by 128
|
||||||
|
|
||||||
|
struct repetitor repetitor = {};
|
||||||
|
repetitor_init(&repetitor);
|
||||||
|
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));
|
||||||
|
|
||||||
|
uint64_t buffer_size = gibibytes(1);
|
||||||
|
if (buffer_size % 4096) {
|
||||||
|
printf("ERROR: Size of buffer is not page aligned\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *buffer = mmap(0, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (buffer == NULL) {
|
||||||
|
printf("ERROR: Failed to allocate buffer\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((size_t)buffer % 64 != 0) {
|
||||||
|
printf("ERROR: Allocated buffer is not cache line aligned, it is %ld\n", (size_t)buffer % 64);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Touch pages so they would be mapped in, to avoid page faults during tests
|
||||||
|
for (uint64_t i = 0; i < buffer_size; i += 4096) {
|
||||||
|
buffer[i] = (uint8_t)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// uint64_t byte_count = unadjusted_byte_count - 64;
|
||||||
|
|
||||||
|
for (int i = 0; i < 128; i++) {
|
||||||
|
uint64_t step_size = i*64;
|
||||||
|
|
||||||
|
uint64_t outer_loop = 64;
|
||||||
|
uint64_t inner_loop = 256;
|
||||||
|
|
||||||
|
repetitor_clear(&repetitor);
|
||||||
|
while (repetitor_repeat(&repetitor, 2)) {
|
||||||
|
repetitor_start(&repetitor);
|
||||||
|
repetitor_measure_start(&repetitor);
|
||||||
|
load_bytes(buffer, inner_loop, outer_loop, step_size);
|
||||||
|
repetitor_measure_stop(&repetitor, outer_loop * inner_loop * 64);
|
||||||
|
repetitor_stop(&repetitor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// char name[128] = { 0 };
|
||||||
|
// snprintf(name, sizeof(name), "%ld step_size", step_size);
|
||||||
|
// repetitor_print_results_label(&repetitor, name);
|
||||||
|
|
||||||
|
printf("%ld;%f\n", step_size, repetitor_get_best_bandwidth(&repetitor));
|
||||||
|
}
|
||||||
|
|
||||||
|
munmap(buffer, buffer_size);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user