1
0

Compare commits

..

2 Commits

Author SHA1 Message Date
19b4bf5fbf add unaligned load penalty tests 2024-04-08 19:40:54 +03:00
adcb0891fb restructure project layout 2024-04-07 12:50:27 +03:00
30 changed files with 135 additions and 85 deletions

View File

@ -14,11 +14,13 @@ fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath {
return output_obj;
}
fn addAllLinuxAssmeblies(b: *Builder) !std.ArrayList(std.Build.LazyPath) {
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(b.allocator);
fn addAllLinuxAssmeblies(b: *Builder, path: []const u8) !std.ArrayList(std.Build.LazyPath) {
const allocator = b.allocator;
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(allocator);
errdefer linux_assemblies.deinit();
var dir = try std.fs.cwd().openIterableDir("src", .{ });
var dir = try std.fs.cwd().openIterableDir(path, .{ });
var it = dir.iterate();
while (try it.next()) |file| {
if (file.kind != .file) continue;
@ -26,8 +28,8 @@ fn addAllLinuxAssmeblies(b: *Builder) !std.ArrayList(std.Build.LazyPath) {
const ext = std.fs.path.extension(file.name);
if (!std.mem.eql(u8, ext, ".asm")) continue;
const assembly_path = try std.mem.concat(b.allocator, u8, &.{ "src/", file.name });
defer b.allocator.free(assembly_path);
const assembly_path = try std.fs.path.join(allocator, &.{ path, file.name });
defer allocator.free(assembly_path);
try linux_assemblies.append(try addLinuxAssembly(b, assembly_path));
}
@ -39,32 +41,32 @@ pub fn build(b: *Builder) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
var linux_assemblies = try addAllLinuxAssmeblies(b);
defer linux_assemblies.deinit();
const allocator = b.allocator;
var dir = try std.fs.cwd().openIterableDir("src/tests", .{ });
var dir = try std.fs.cwd().openIterableDir("src", .{ });
var it = dir.iterate();
while (try it.next()) |file| {
if (file.kind != .file) continue;
while (try it.next()) |entry| {
if (entry.kind != .directory) continue;
const ext = std.fs.path.extension(file.name);
if (!std.mem.eql(u8, ext, ".c")) continue;
const program_dir = try std.fs.path.join(allocator, &.{ "src", entry.name });
defer allocator.free(program_dir);
const source_file_path = try std.mem.concat(b.allocator, u8, &.{ "src/tests/", file.name });
defer b.allocator.free(source_file_path);
const executable_name = std.fs.path.stem(file.name);
const main_c = try std.fs.path.join(allocator, &.{ program_dir, "main.c" });
defer allocator.free(main_c);
const exe = b.addExecutable(.{
.name = executable_name,
.root_source_file = .{ .path = source_file_path },
.name = entry.name,
.root_source_file = .{ .path = main_c },
.optimize = optimize,
.target = target
});
exe.addIncludePath(.{ .path = program_dir });
exe.addIncludePath(.{ .path = "src" });
exe.linkLibC();
for (linux_assemblies.items) |obj| {
var assemblies = try addAllLinuxAssmeblies(b, program_dir);
defer assemblies.deinit();
for (assemblies.items) |obj| {
exe.addObjectFile(obj);
}
@ -75,9 +77,9 @@ pub fn build(b: *Builder) !void {
run_exe.addArgs(args);
}
const step_description = try std.fmt.allocPrint(b.allocator, "Run '{s}' test", .{source_file_path});
const step_description = try std.fmt.allocPrint(b.allocator, "Run '{s}' test", .{ entry.name });
defer b.allocator.free(step_description);
const run_step = b.step(executable_name, step_description);
const run_step = b.step(entry.name, step_description);
run_step.dependOn(&run_exe.step);
}
}

View File

@ -119,3 +119,4 @@ int main(int argc, char **argv) {
return 0;
}

View File

@ -54,3 +54,4 @@ int main() {
return 0;
}

View File

@ -38,3 +38,4 @@ int main() {
return 0;
}

View File

@ -38,3 +38,4 @@ int main() {
return 0;
}

View File

@ -37,3 +37,4 @@ int main() {
return 0;
}

View File

@ -0,0 +1,27 @@
global cyclic_load_bytes
section .text
; rdi - buffer
; rsi - inner_loop_count
; rdx - outer_loop_count
cyclic_load_bytes:
xor r8, r8
.outer_loop:
mov rcx, rdi
xor r9, r9
.inner_loop:
vmovdqu ymm0, [rcx]
vmovdqu ymm0, [rcx + 32]
vmovdqu ymm0, [rcx + 64]
vmovdqu ymm0, [rcx + 96]
add rcx, 128
add r9, 128
cmp r9, rsi
jb .inner_loop
inc r8
cmp r8, rdx
jb .outer_loop
ret

View File

@ -0,0 +1,3 @@
#include <stdint.h>
void cyclic_load_bytes(uint8_t *buffer, uint64_t inner_loop_count, uint64_t outer_loop_count);

View File

@ -0,0 +1,77 @@
#include "repetition_tester.c"
#include "cyclic_load_bytes.h"
#include <sys/mman.h>
static uint64_t kibibytes(uint64_t count) {
return 1024 * count;
}
static uint64_t mibibytes(uint64_t count) {
return 1024 * kibibytes(count);
}
static uint64_t gibibytes(uint64_t count) {
return 1024 * mibibytes(count);
}
int main() {
// uint32_t byte_counts[] = { kibibytes(16), kibibytes(64), mibibytes(1) };
uint32_t byte_counts[] = { mibibytes(1) };
uint64_t offsets[] = { 0, 1, 2, 4, 8, 16, 32, 64 };
struct repetitor repetitor = {};
repetitor_init(&repetitor);
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));
uint64_t buffer_size = gibibytes(1);
if (buffer_size % 4096) {
printf("ERROR: Size of buffer is not page aligned\n");
return -1;
}
uint8_t *buffer = mmap(0, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (buffer == NULL) {
printf("ERROR: Failed to allocate buffer\n");
return -1;
}
if ((size_t)buffer % 64 != 0) {
printf("ERROR: Allocated buffer is not cache line aligned, it is %ld\n", (size_t)buffer % 64);
return -1;
}
// Touch pages so they would be mapped in, to avoid page faults during tests
for (uint64_t i = 0; i < buffer_size; i += 4096) {
buffer[i] = (uint8_t)i;
}
for (int i = 0; i < ARRAY_LEN(byte_counts); i++) {
uint64_t unadjusted_byte_count = byte_counts[i];
assert(unadjusted_byte_count % 128 == 0); // Must be divisible by 128
uint64_t byte_count = unadjusted_byte_count - 128;
for (int j = 0; j < ARRAY_LEN(offsets); j++) {
uint64_t offset = offsets[j];
uint64_t outer_loop_count = buffer_size / byte_count;
uint64_t bytes_read = outer_loop_count * byte_count;
repetitor_clear(&repetitor);
while (repetitor_repeat(&repetitor, 5)) {
repetitor_start(&repetitor);
repetitor_measure_start(&repetitor);
cyclic_load_bytes(buffer + offset, byte_count, outer_loop_count);
repetitor_measure_stop(&repetitor, bytes_read);
repetitor_stop(&repetitor);
}
char name[128] = { 0 };
snprintf(name, sizeof(name), "%ld offset, %ld size", offset, byte_count);
repetitor_print_results_label(&repetitor, name);
}
}
munmap(buffer, buffer_size);
return 0;
}

View File

@ -1,64 +0,0 @@
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/stat.h>
#include "repetition_tester.c"
#define RPROF_IMPLEMENTATION
#include "rprof.h"
#include "main_read_file.c"
#include "main_write_bytes_asm.c"
#include "main_write_all_bytes.c"
#include "main_malloc_read.c"
#include "main_write_backward.c"
#include "main_write_loop.c"
#include "main_load_uop.c"
#include "main_store_uop.c"
#include "main_short_load_uop.c"
#include "main_read_widths.c"
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: %s <test-name>\n", argv[0]);
return -1;
}
char *test_name = argv[1];
if (!strncmp(test_name, "write_bytes_asm", sizeof("write_bytes_asm"))) {
return main_test_write_bytes_asm();
} else if (!strncmp(test_name, "read_widths", sizeof("read_widths"))) {
return main_test_read_widths();
} else if (!strncmp(test_name, "write_all_bytes", sizeof("write_bytes"))) {
return main_test_write_all_bytes();
} else if (!strncmp(test_name, "load_uop", sizeof("load_uop"))) {
return main_test_load_uop();
} else if (!strncmp(test_name, "store_uop", sizeof("store_uop"))) {
return main_test_store_uop();
} else if (!strncmp(test_name, "short_load_uop", sizeof("short_load_uop"))) {
return main_test_short_load_uop();
} else if (!strncmp(test_name, "write_loop", sizeof("write_loop"))) {
return main_test_write_loop();
} else if (!strncmp(test_name, "write_backward", sizeof("write_backward"))) {
return main_test_write_backward();
} else if (!strncmp(test_name, "read_file", sizeof("read_file"))) {
if (argc < 3) {
printf("Usage: %s read_file <filename>\n", argv[0]);
return -1;
}
return main_test_read_file(argv[2]);
} else if (!strncmp(test_name, "malloc_read", sizeof("malloc_read"))) {
if (argc < 3) {
printf("Usage: %s malloc_read <filename>\n", argv[0]);
return -1;
}
return main_test_malloc_read(argv[2]);
} else {
printf("ERROR: Unknown test case '%s'\n", test_name);
return -1;
}
}