From 495260ab9b0b4e244d6b4a4f76561777594e1629 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Sat, 14 Sep 2024 15:10:05 +0300 Subject: [PATCH] add nasm backend --- README.md | 2 +- build.zig | 5 ++ examples/collatz.bf | 33 +++++++++++ examples/hello.bf | 3 + examples/sierpinski.bf | 12 ++++ include/bf_compiler.h | 5 +- src/bf_compiler.c | 14 ++++- src/bf_compiler_nasm.c | 129 +++++++++++++++++++++++++++++++++++++++++ src/main.c | 42 +++++++++++--- 9 files changed, 233 insertions(+), 12 deletions(-) create mode 100644 examples/collatz.bf create mode 100644 examples/hello.bf create mode 100644 examples/sierpinski.bf create mode 100644 src/bf_compiler_nasm.c diff --git a/README.md b/README.md index 8162eb2..f5551ea 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ zig build cdb ## Feature wishlist * Update TinyCC backend, so host would not need to have it installed. Relies on host having include paths used by TinyCC. -* Add more backends. Maybe LLVM, nasm or create my own +* Add more backends. Maybe LLVM or create my own * Perform optimizations? Like removing not needed `[]` blocks which are known never to be ran? Idk, what could be optimized * Add WASM/Web build diff --git a/build.zig b/build.zig index 1d1f897..7db99be 100644 --- a/build.zig +++ b/build.zig @@ -45,6 +45,11 @@ pub fn build(b: *Build) !void { } } + if (b.option(bool, "nasm", "Toggle 'nasm' compiler backend") orelse false) { + // TODO: Get nasm source code and embed it to executable. + exe.root_module.addCMacro("BF_BACKEND_NASM", ""); + } + zcc.createStep(b, "cdb", .{ .target = exe }); b.installArtifact(exe); diff --git a/examples/collatz.bf b/examples/collatz.bf new file mode 100644 index 0000000..c2dfe4a --- /dev/null +++ b/examples/collatz.bf @@ -0,0 +1,33 @@ +>,[ + [ + ----------[ + >>>[>>>>]+[[-]+<[->>>>++>>>>+[>>>>]++[->+<<<<<]]<<<] + ++++++[>------<-]>--[>>[->>>>]+>+[<<<<]>-],< + ]> + ]>>>++>+>>[ + <<[>>>>[-]+++++++++<[>-<-]+++++++++>[-[<->-]+[<<<<]]<[>+<-]>] + >[>[>>>>]+[[-]<[+[->>>>]>+<]>[<+>[<<<<]]+<<<<]>>>[->>>>]+>+[<<<<]] + >[[>+>>[<<<<+>>>>-]>]<<<<[-]>[-<<<<]]>>>>>>> + ]>>+[[-]++++++>>>>]<<<<[[<++++++++>-]<.[-]<[-]<[-]<]<, +] + +[The Collatz problem or 3n+1 problem is as follows. Take a natural number n. +If it's even, halve it; if odd, triple it and add one. Repeat the process with +the resulting number, and continue indefinitely. If n is 0, the resulting +sequence is 0, 0, 0, 0... It is conjectured but not proven that for any +positive integer n, the resulting sequence will end in 1, 4, 2, 1... +See also http://www.research.att.com/projects/OEIS?Anum=A006577 + +This program takes a series of decimal numbers, followed by linefeeds (10). +The entire series is terminated by an EOF (0 or "no change"). For each number +input, the program outputs, in decimal, the number of steps from that number +to zero or one, when following the rule above. It's quite fast; on a Sun +machine, it took three seconds for a random 640-digit number. + +One more note. This program was originally written for Tristan Parker's +Brainfuck Texas Holdem contest, and won by default (it was the only entry); +the version I submitted before the contest deadline is at +http://www.hevanet.com/cristofd/brainfuck/oldcollatz.b + +Daniel B Cristofani (cristofdathevanetdotcom) +http://www.hevanet.com/cristofd/brainfuck/] diff --git a/examples/hello.bf b/examples/hello.bf new file mode 100644 index 0000000..2570b01 --- /dev/null +++ b/examples/hello.bf @@ -0,0 +1,3 @@ +>++++++++[<+++++++++>-]<.>++++[<+++++++>-]<+.+++++++..+++.>>++++++[<+++++++>-]<+ ++.------------.>++++++[<+++++++++>-]<+.<.+++.------.--------.>>>++++[<++++++++>- +]<+. diff --git a/examples/sierpinski.bf b/examples/sierpinski.bf new file mode 100644 index 0000000..71f2abf --- /dev/null +++ b/examples/sierpinski.bf @@ -0,0 +1,12 @@ +[sierpinski.b -- display Sierpinski triangle +(c) 2016 Daniel B. Cristofani +http://brainfuck.org/] + +++++++++[>+>++++<<-]>++>>+<[-[>>+<<-]+>>]>+[ + -<<<[ + ->[+[-]+>++>>>-<<]<[<]>>++++++[<<+++++>>-]+<<++.[-]<< + ]>.>+[>>]>+ +] + +[Shows an ASCII representation of the Sierpinski triangle +(iteration 5).] diff --git a/include/bf_compiler.h b/include/bf_compiler.h index adbed7a..4a815ef 100644 --- a/include/bf_compiler.h +++ b/include/bf_compiler.h @@ -5,7 +5,8 @@ #include enum bf_compiler_backend { - BF_COMPILER_TINYCC + BF_COMPILER_TINYCC, + BF_COMPILER_NASM, }; struct bf_compiler { @@ -14,7 +15,7 @@ struct bf_compiler { uint32_t cell_size; }; -void bf_compiler_init(struct bf_compiler *compiler, uint32_t cell_size, uint32_t data_len); +void bf_compiler_init(struct bf_compiler *compiler, enum bf_compiler_backend backend, uint32_t cell_size, uint32_t data_len); int bf_compiler_compile(struct bf_compiler *compiler, const char *output_filename, const char *program, size_t program_len); #endif //BF_COMPILER_H_ diff --git a/src/bf_compiler.c b/src/bf_compiler.c index c5e8ba0..503400e 100644 --- a/src/bf_compiler.c +++ b/src/bf_compiler.c @@ -7,11 +7,17 @@ #include "bf_compiler_tinycc.c" #endif -void bf_compiler_init(struct bf_compiler *compiler, uint32_t cell_size, uint32_t data_len) +#ifdef BF_BACKEND_NASM +#include "bf_compiler_nasm.c" +#endif + +void bf_compiler_init(struct bf_compiler *compiler, enum bf_compiler_backend backend, uint32_t cell_size, uint32_t data_len) { assert(cell_size == 8 || cell_size == 16 || cell_size == 32); compiler->cell_size = cell_size; compiler->data_len = data_len; + + compiler->backend = backend; } int bf_compiler_compile(struct bf_compiler *compiler, const char *output_filename, const char *program, size_t program_len) @@ -22,5 +28,11 @@ int bf_compiler_compile(struct bf_compiler *compiler, const char *output_filenam } #endif +#ifdef BF_BACKEND_NASM + if (compiler->backend == BF_COMPILER_NASM) { + return bf_compiler_compile_nasm(compiler, output_filename, program, program_len); + } +#endif + return -1; } diff --git a/src/bf_compiler_nasm.c b/src/bf_compiler_nasm.c new file mode 100644 index 0000000..6ccca90 --- /dev/null +++ b/src/bf_compiler_nasm.c @@ -0,0 +1,129 @@ +#include "bf_compiler.h" + +#include +#include +#include + +static void fwrite_str(char *str, FILE *f) +{ + fwrite(str, strlen(str), 1, f); +} + +static void fwrite_str_line(char *str, FILE *f) +{ + fwrite_str(str, f); + fwrite_str("\n", f); +} + +static int bf_compiler_compile_nasm(struct bf_compiler *compiler, const char *output_filename, const char *program, size_t program_len) +{ + int rc = -1; + FILE *f = NULL; + + if (compiler->cell_size != 8) { + return -1; + } + + // TODO: Write to temp directory + f = fopen("tmp.asm", "w"); + if (!f) { + goto err; + } + + fwrite_str_line("section .bss", f); + char data_define[128] = { 0 }; + snprintf(data_define, sizeof(data_define), "data resb %d", compiler->data_len); + fwrite_str_line(data_define, f); + + fwrite_str_line("section .text", f); + fwrite_str_line("global _start", f); + fwrite_str_line("_start:", f); + + fwrite_str_line("mov bl, 0", f); + + uint32_t loop_stack[128] = { 0 }; + uint32_t loop_stack_len = 0; + + uint32_t last_loop_index = 0; + for (int i = 0; i < program_len; i++) { + char inst = program[i]; + + if (inst == '>') { + fwrite_str_line("; >", f); + fwrite_str_line("inc bl", f); + } else if (inst == '<') { + fwrite_str_line("; <", f); + fwrite_str_line("dec bl", f); + } else if (inst == '+') { + fwrite_str_line("; +", f); + fwrite_str_line("inc byte [data + rbx]", f); + } else if (inst == '-') { + fwrite_str_line("; -", f); + fwrite_str_line("dec byte [data + rbx]", f); + } else if (inst == '.') { + fwrite_str_line("; .", f); + // sys_write + fwrite_str_line("mov rax, 1", f); // Syscall id + fwrite_str_line("mov rdi, 1", f); // fd + fwrite_str_line("lea rsi, byte [data + rbx]", f); // buff + fwrite_str_line("mov rdx, 1", f); // count + fwrite_str_line("syscall", f); + } else if (inst == ',') { + fwrite_str_line("; ,", f); + // sys_read + fwrite_str_line("mov rax, 0", f); // Syscall id + fwrite_str_line("mov rdi, 0", f); // fd + fwrite_str_line("lea rsi, byte [data + rbx]", f); // buff + fwrite_str_line("mov rdx, 1", f); // count + fwrite_str_line("syscall", f); + } else if (inst == '[') { + fwrite_str_line("; [", f); + + uint32_t loop_index = last_loop_index; + last_loop_index++; + + loop_stack[loop_stack_len++] = loop_index; + + char start_label[128] = { 0 }; + snprintf(start_label, sizeof(start_label), "S%d_start:", loop_index); + fwrite_str_line(start_label, f); + + fwrite_str_line("cmp byte [data + rbx], 0", f); + + char jump_inst[128] = { 0 }; + snprintf(jump_inst, sizeof(jump_inst), "jz S%d_end", loop_index); + fwrite_str_line(jump_inst, f); + } else if (inst == ']') { + fwrite_str_line("; ]", f); + uint32_t loop_index = loop_stack[--loop_stack_len]; + + char jump_to_start[128] = { 0 }; + snprintf(jump_to_start, sizeof(jump_to_start), "jmp S%d_start", loop_index); + fwrite_str_line(jump_to_start, f); + + char end_label[128] = { 0 }; + snprintf(end_label, sizeof(end_label), "S%d_end:", loop_index); + fwrite_str_line(end_label, f); + } + } + + fwrite_str_line("; exit(0)", f); + fwrite_str_line("mov rax, 60", f); + fwrite_str_line("mov rdi, 0", f); + fwrite_str_line("syscall", f); + + fclose(f); + f = NULL; + + // TODO: Use exec instead of system + // TODO: Don't hardcode the platform + system("nasm -felf64 tmp.asm -o tmp.o"); + + char ld_command[256] = { 0 }; + snprintf(ld_command, sizeof(ld_command), "ld tmp.o -o %s", output_filename); + system(ld_command); + rc = 0; +err: + if (f) fclose(f); + return rc; +} diff --git a/src/main.c b/src/main.c index ed9de21..3644ddf 100644 --- a/src/main.c +++ b/src/main.c @@ -50,6 +50,11 @@ err: return result; } +void show_usage() { + printf("Usage: brainfuck run \n"); + printf(" brainfuck compile [--nasm] [--tinycc] \n"); +} + int run_command(int argc, const char **argv) { const char *filename = argv[0]; @@ -71,15 +76,41 @@ int run_command(int argc, const char **argv) { } int compile_command(int argc, const char **argv) { - const char *input_filename = argv[0]; - const char *output_filename = argv[1]; + enum bf_compiler_backend backend = BF_COMPILER_TINYCC; + uint32_t cell_size = 8; // TODO: Make this configurable using CLI + uint32_t data_len = 256; // TODO: Make this configurable using CLI + + int last_arg = 0; + for (int i = 0; i < argc; i++) { + last_arg = i; + if (strncmp(argv[i], "--", sizeof("--")-1)) { + break; + } + + if (!strncmp(argv[i], "--nasm", sizeof("--nasm"))) { + backend = BF_COMPILER_NASM; + } else if (!strncmp(argv[i], "--tinycc", sizeof("--tinycc"))) { + backend = BF_COMPILER_TINYCC; + } else { + printf("Invalid argument: \"%s\"", argv[i]); + return -1; + } + } + + if (last_arg+2 != argc) { + show_usage(); + return -1; + } + + const char *input_filename = argv[last_arg]; + const char *output_filename = argv[last_arg+1]; size_t program_len = 0; char *program = read_file(input_filename, &program_len); assert(program != NULL); struct bf_compiler compiler = { 0 }; - bf_compiler_init(&compiler, 16, 4096); + bf_compiler_init(&compiler, backend, cell_size, data_len); if (bf_compiler_compile(&compiler, output_filename, program, program_len)) { printf("ERROR: Failed to compile program\n"); @@ -89,11 +120,6 @@ int compile_command(int argc, const char **argv) { return 0; } -void show_usage() { - printf("Usage: brainfuck run \n"); - printf(" brainfuck compile \n"); -} - int main(int argc, const char **argv) { if (argc >= 3 && !strncmp(argv[1], "run", sizeof("run"))) { return run_command(argc - 2, argv + 2);