Compare commits
2 Commits
cef071d28f
...
19b4bf5fbf
Author | SHA1 | Date | |
---|---|---|---|
19b4bf5fbf | |||
adcb0891fb |
44
build.zig
44
build.zig
@ -14,11 +14,13 @@ fn addLinuxAssembly(b: *Builder, filename: []const u8) !std.Build.LazyPath {
|
|||||||
return output_obj;
|
return output_obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn addAllLinuxAssmeblies(b: *Builder) !std.ArrayList(std.Build.LazyPath) {
|
fn addAllLinuxAssmeblies(b: *Builder, path: []const u8) !std.ArrayList(std.Build.LazyPath) {
|
||||||
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(b.allocator);
|
const allocator = b.allocator;
|
||||||
|
|
||||||
|
var linux_assemblies = std.ArrayList(std.Build.LazyPath).init(allocator);
|
||||||
errdefer linux_assemblies.deinit();
|
errdefer linux_assemblies.deinit();
|
||||||
|
|
||||||
var dir = try std.fs.cwd().openIterableDir("src", .{ });
|
var dir = try std.fs.cwd().openIterableDir(path, .{ });
|
||||||
var it = dir.iterate();
|
var it = dir.iterate();
|
||||||
while (try it.next()) |file| {
|
while (try it.next()) |file| {
|
||||||
if (file.kind != .file) continue;
|
if (file.kind != .file) continue;
|
||||||
@ -26,8 +28,8 @@ fn addAllLinuxAssmeblies(b: *Builder) !std.ArrayList(std.Build.LazyPath) {
|
|||||||
const ext = std.fs.path.extension(file.name);
|
const ext = std.fs.path.extension(file.name);
|
||||||
if (!std.mem.eql(u8, ext, ".asm")) continue;
|
if (!std.mem.eql(u8, ext, ".asm")) continue;
|
||||||
|
|
||||||
const assembly_path = try std.mem.concat(b.allocator, u8, &.{ "src/", file.name });
|
const assembly_path = try std.fs.path.join(allocator, &.{ path, file.name });
|
||||||
defer b.allocator.free(assembly_path);
|
defer allocator.free(assembly_path);
|
||||||
|
|
||||||
try linux_assemblies.append(try addLinuxAssembly(b, assembly_path));
|
try linux_assemblies.append(try addLinuxAssembly(b, assembly_path));
|
||||||
}
|
}
|
||||||
@ -39,32 +41,32 @@ pub fn build(b: *Builder) !void {
|
|||||||
const target = b.standardTargetOptions(.{});
|
const target = b.standardTargetOptions(.{});
|
||||||
const optimize = b.standardOptimizeOption(.{});
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
|
|
||||||
var linux_assemblies = try addAllLinuxAssmeblies(b);
|
const allocator = b.allocator;
|
||||||
defer linux_assemblies.deinit();
|
|
||||||
|
|
||||||
var dir = try std.fs.cwd().openIterableDir("src/tests", .{ });
|
var dir = try std.fs.cwd().openIterableDir("src", .{ });
|
||||||
var it = dir.iterate();
|
var it = dir.iterate();
|
||||||
while (try it.next()) |file| {
|
while (try it.next()) |entry| {
|
||||||
if (file.kind != .file) continue;
|
if (entry.kind != .directory) continue;
|
||||||
|
|
||||||
const ext = std.fs.path.extension(file.name);
|
const program_dir = try std.fs.path.join(allocator, &.{ "src", entry.name });
|
||||||
if (!std.mem.eql(u8, ext, ".c")) continue;
|
defer allocator.free(program_dir);
|
||||||
|
|
||||||
const source_file_path = try std.mem.concat(b.allocator, u8, &.{ "src/tests/", file.name });
|
const main_c = try std.fs.path.join(allocator, &.{ program_dir, "main.c" });
|
||||||
defer b.allocator.free(source_file_path);
|
defer allocator.free(main_c);
|
||||||
|
|
||||||
const executable_name = std.fs.path.stem(file.name);
|
|
||||||
|
|
||||||
const exe = b.addExecutable(.{
|
const exe = b.addExecutable(.{
|
||||||
.name = executable_name,
|
.name = entry.name,
|
||||||
.root_source_file = .{ .path = source_file_path },
|
.root_source_file = .{ .path = main_c },
|
||||||
.optimize = optimize,
|
.optimize = optimize,
|
||||||
.target = target
|
.target = target
|
||||||
});
|
});
|
||||||
|
exe.addIncludePath(.{ .path = program_dir });
|
||||||
exe.addIncludePath(.{ .path = "src" });
|
exe.addIncludePath(.{ .path = "src" });
|
||||||
exe.linkLibC();
|
exe.linkLibC();
|
||||||
|
|
||||||
for (linux_assemblies.items) |obj| {
|
var assemblies = try addAllLinuxAssmeblies(b, program_dir);
|
||||||
|
defer assemblies.deinit();
|
||||||
|
for (assemblies.items) |obj| {
|
||||||
exe.addObjectFile(obj);
|
exe.addObjectFile(obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,9 +77,9 @@ pub fn build(b: *Builder) !void {
|
|||||||
run_exe.addArgs(args);
|
run_exe.addArgs(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
const step_description = try std.fmt.allocPrint(b.allocator, "Run '{s}' test", .{source_file_path});
|
const step_description = try std.fmt.allocPrint(b.allocator, "Run '{s}' test", .{ entry.name });
|
||||||
defer b.allocator.free(step_description);
|
defer b.allocator.free(step_description);
|
||||||
const run_step = b.step(executable_name, step_description);
|
const run_step = b.step(entry.name, step_description);
|
||||||
run_step.dependOn(&run_exe.step);
|
run_step.dependOn(&run_exe.step);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -119,3 +119,4 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -54,3 +54,4 @@ int main() {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -38,3 +38,4 @@ int main() {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -38,3 +38,4 @@ int main() {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -37,3 +37,4 @@ int main() {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
27
src/12_unaligned_load_penalties/cyclic_load_bytes.asm
Normal file
27
src/12_unaligned_load_penalties/cyclic_load_bytes.asm
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
global cyclic_load_bytes
|
||||||
|
|
||||||
|
section .text
|
||||||
|
|
||||||
|
; rdi - buffer
|
||||||
|
; rsi - inner_loop_count
|
||||||
|
; rdx - outer_loop_count
|
||||||
|
cyclic_load_bytes:
|
||||||
|
xor r8, r8
|
||||||
|
.outer_loop:
|
||||||
|
mov rcx, rdi
|
||||||
|
xor r9, r9
|
||||||
|
|
||||||
|
.inner_loop:
|
||||||
|
vmovdqu ymm0, [rcx]
|
||||||
|
vmovdqu ymm0, [rcx + 32]
|
||||||
|
vmovdqu ymm0, [rcx + 64]
|
||||||
|
vmovdqu ymm0, [rcx + 96]
|
||||||
|
add rcx, 128
|
||||||
|
add r9, 128
|
||||||
|
cmp r9, rsi
|
||||||
|
jb .inner_loop
|
||||||
|
|
||||||
|
inc r8
|
||||||
|
cmp r8, rdx
|
||||||
|
jb .outer_loop
|
||||||
|
ret
|
3
src/12_unaligned_load_penalties/cyclic_load_bytes.h
Normal file
3
src/12_unaligned_load_penalties/cyclic_load_bytes.h
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
void cyclic_load_bytes(uint8_t *buffer, uint64_t inner_loop_count, uint64_t outer_loop_count);
|
77
src/12_unaligned_load_penalties/main.c
Normal file
77
src/12_unaligned_load_penalties/main.c
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
#include "repetition_tester.c"
|
||||||
|
#include "cyclic_load_bytes.h"
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
static uint64_t kibibytes(uint64_t count) {
|
||||||
|
return 1024 * count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t mibibytes(uint64_t count) {
|
||||||
|
return 1024 * kibibytes(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t gibibytes(uint64_t count) {
|
||||||
|
return 1024 * mibibytes(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
// uint32_t byte_counts[] = { kibibytes(16), kibibytes(64), mibibytes(1) };
|
||||||
|
uint32_t byte_counts[] = { mibibytes(1) };
|
||||||
|
uint64_t offsets[] = { 0, 1, 2, 4, 8, 16, 32, 64 };
|
||||||
|
|
||||||
|
struct repetitor repetitor = {};
|
||||||
|
repetitor_init(&repetitor);
|
||||||
|
printf("CPU Frequency: %ldHz (~%.2fGHz)\n", repetitor.cpu_freq, (float)repetitor.cpu_freq/(1000*1000*1000));
|
||||||
|
|
||||||
|
uint64_t buffer_size = gibibytes(1);
|
||||||
|
if (buffer_size % 4096) {
|
||||||
|
printf("ERROR: Size of buffer is not page aligned\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t *buffer = mmap(0, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (buffer == NULL) {
|
||||||
|
printf("ERROR: Failed to allocate buffer\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((size_t)buffer % 64 != 0) {
|
||||||
|
printf("ERROR: Allocated buffer is not cache line aligned, it is %ld\n", (size_t)buffer % 64);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Touch pages so they would be mapped in, to avoid page faults during tests
|
||||||
|
for (uint64_t i = 0; i < buffer_size; i += 4096) {
|
||||||
|
buffer[i] = (uint8_t)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < ARRAY_LEN(byte_counts); i++) {
|
||||||
|
uint64_t unadjusted_byte_count = byte_counts[i];
|
||||||
|
assert(unadjusted_byte_count % 128 == 0); // Must be divisible by 128
|
||||||
|
uint64_t byte_count = unadjusted_byte_count - 128;
|
||||||
|
|
||||||
|
for (int j = 0; j < ARRAY_LEN(offsets); j++) {
|
||||||
|
uint64_t offset = offsets[j];
|
||||||
|
|
||||||
|
uint64_t outer_loop_count = buffer_size / byte_count;
|
||||||
|
uint64_t bytes_read = outer_loop_count * byte_count;
|
||||||
|
|
||||||
|
repetitor_clear(&repetitor);
|
||||||
|
while (repetitor_repeat(&repetitor, 5)) {
|
||||||
|
repetitor_start(&repetitor);
|
||||||
|
repetitor_measure_start(&repetitor);
|
||||||
|
cyclic_load_bytes(buffer + offset, byte_count, outer_loop_count);
|
||||||
|
repetitor_measure_stop(&repetitor, bytes_read);
|
||||||
|
repetitor_stop(&repetitor);
|
||||||
|
}
|
||||||
|
|
||||||
|
char name[128] = { 0 };
|
||||||
|
snprintf(name, sizeof(name), "%ld offset, %ld size", offset, byte_count);
|
||||||
|
repetitor_print_results_label(&repetitor, name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
munmap(buffer, buffer_size);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
64
src/main.c
64
src/main.c
@ -1,64 +0,0 @@
|
|||||||
#include <fcntl.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
|
|
||||||
#include "repetition_tester.c"
|
|
||||||
|
|
||||||
#define RPROF_IMPLEMENTATION
|
|
||||||
#include "rprof.h"
|
|
||||||
|
|
||||||
#include "main_read_file.c"
|
|
||||||
#include "main_write_bytes_asm.c"
|
|
||||||
#include "main_write_all_bytes.c"
|
|
||||||
#include "main_malloc_read.c"
|
|
||||||
#include "main_write_backward.c"
|
|
||||||
#include "main_write_loop.c"
|
|
||||||
#include "main_load_uop.c"
|
|
||||||
#include "main_store_uop.c"
|
|
||||||
#include "main_short_load_uop.c"
|
|
||||||
#include "main_read_widths.c"
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
if (argc < 2) {
|
|
||||||
printf("Usage: %s <test-name>\n", argv[0]);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *test_name = argv[1];
|
|
||||||
|
|
||||||
if (!strncmp(test_name, "write_bytes_asm", sizeof("write_bytes_asm"))) {
|
|
||||||
return main_test_write_bytes_asm();
|
|
||||||
} else if (!strncmp(test_name, "read_widths", sizeof("read_widths"))) {
|
|
||||||
return main_test_read_widths();
|
|
||||||
} else if (!strncmp(test_name, "write_all_bytes", sizeof("write_bytes"))) {
|
|
||||||
return main_test_write_all_bytes();
|
|
||||||
} else if (!strncmp(test_name, "load_uop", sizeof("load_uop"))) {
|
|
||||||
return main_test_load_uop();
|
|
||||||
} else if (!strncmp(test_name, "store_uop", sizeof("store_uop"))) {
|
|
||||||
return main_test_store_uop();
|
|
||||||
} else if (!strncmp(test_name, "short_load_uop", sizeof("short_load_uop"))) {
|
|
||||||
return main_test_short_load_uop();
|
|
||||||
} else if (!strncmp(test_name, "write_loop", sizeof("write_loop"))) {
|
|
||||||
return main_test_write_loop();
|
|
||||||
} else if (!strncmp(test_name, "write_backward", sizeof("write_backward"))) {
|
|
||||||
return main_test_write_backward();
|
|
||||||
} else if (!strncmp(test_name, "read_file", sizeof("read_file"))) {
|
|
||||||
if (argc < 3) {
|
|
||||||
printf("Usage: %s read_file <filename>\n", argv[0]);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return main_test_read_file(argv[2]);
|
|
||||||
} else if (!strncmp(test_name, "malloc_read", sizeof("malloc_read"))) {
|
|
||||||
if (argc < 3) {
|
|
||||||
printf("Usage: %s malloc_read <filename>\n", argv[0]);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return main_test_malloc_read(argv[2]);
|
|
||||||
} else {
|
|
||||||
printf("ERROR: Unknown test case '%s'\n", test_name);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user