From 7c0024436d4828bafa0e005eb7bfee546ee1ef02 Mon Sep 17 00:00:00 2001 From: Rokas Puzonas Date: Sun, 7 May 2023 14:01:04 +0300 Subject: [PATCH] update decoder to decode from memory --- src/main.c | 37 ++++++++++++------- src/sim8086.h | 7 +++- src/sim8086_decoder.c | 84 ++++++++++++++++++++++++------------------- src/sim8086_memory.c | 36 +++++++++++++++++++ 4 files changed, 115 insertions(+), 49 deletions(-) create mode 100644 src/sim8086_memory.c diff --git a/src/main.c b/src/main.c index ddd659d..e81d83b 100644 --- a/src/main.c +++ b/src/main.c @@ -8,6 +8,7 @@ #include "sim8086.h" #include "sim8086.c" +#include "sim8086_memory.c" #include "sim8086_decoder.c" #include "sim8086_simulator.c" @@ -86,39 +87,51 @@ err: int dissassemble(FILE *src, FILE *dst) { fprintf(dst, "bits 16\n\n"); + struct memory mem = { .mem = { 0xFF } }; + int byte_count = load_mem_from_stream(&mem, src, 0); + if (byte_count == -1) { + fprintf(stderr, "ERROR: Failed to load file to memory\n"); + return -1; + } + char buff[256]; struct instruction inst; - int counter = 1; - while (true) { - enum decode_error err = decode_instruction(src, &inst); + u32 inst_address = 0; + while (inst_address < byte_count) { + enum decode_error err = decode_instruction(&mem, &inst_address, &inst); if (err == DECODE_ERR_EOF) break; if (err != DECODE_OK) { - fprintf(stderr, "ERROR: Failed to decode %d instruction: %s\n", counter, decode_error_to_str(err)); + fprintf(stderr, "ERROR: Failed to decode instruction at 0x%08x: %s\n", inst_address, decode_error_to_str(err)); return -1; } instruction_to_str(buff, sizeof(buff), &inst); fprintf(dst, buff); fprintf(dst, "\n"); - counter += 1; } return 0; } int simulate(FILE *src) { + struct memory mem; + int byte_count = load_mem_from_stream(&mem, src, 0); + if (byte_count == -1) { + fprintf(stderr, "ERROR: Failed to load file to memory\n"); + return -1; + } + struct cpu_state state = { 0 }; struct instruction inst; - int counter = 1; - while (true) { - enum decode_error err = decode_instruction(src, &inst); + u32 inst_address = 0; + while (inst_address < byte_count) { + enum decode_error err = decode_instruction(&mem, &inst_address, &inst); if (err == DECODE_ERR_EOF) break; if (err != DECODE_OK) { - fprintf(stderr, "ERROR: Failed to decode %d instruction: %s\n", counter, decode_error_to_str(err)); + fprintf(stderr, "ERROR: Failed to decode instruction at 0x%08x: %s\n", inst_address, decode_error_to_str(err)); return -1; } execute_instruction(&state, &inst); - counter += 1; } printf("Final registers:\n"); @@ -134,7 +147,7 @@ int simulate(FILE *src) { } void print_usage(const char *program) { - fprintf(stderr, "Usage: %s \n", program); + fprintf(stderr, "Usage: %s \n", program); } int test_decoder(const char *asm_file) { @@ -257,7 +270,7 @@ int main(int argc, char **argv) { } else if (strequal(argv[1], "dump")) { return dump_decompilation(argv[2]); - } else if (strequal(argv[1], "run")) { + } else if (strequal(argv[1], "sim")) { return run_simulation(argv[2]); } else { diff --git a/src/sim8086.h b/src/sim8086.h index b33c6b3..debb120 100644 --- a/src/sim8086.h +++ b/src/sim8086.h @@ -11,7 +11,8 @@ #define panic(...) fprintf(stderr, "PANIC(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort() #define todo(...) fprintf(stderr, "TODO(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort() -#define ARRAY_LEN(arr) sizeof(arr) / sizeof(arr[0]) +#define ARRAY_LEN(arr) (sizeof(arr) / sizeof(arr[0])) +#define MEMORY_SIZE (1024 * 1024) // 1 MiB enum operation { OP_MOV, @@ -101,6 +102,10 @@ struct instruction { i8 jmp_offset; }; +struct memory { + u8 mem[MEMORY_SIZE]; +}; + struct cpu_state { u16 ax; u16 bx; diff --git a/src/sim8086_decoder.c b/src/sim8086_decoder.c index d14b56c..05d3347 100644 --- a/src/sim8086_decoder.c +++ b/src/sim8086_decoder.c @@ -1,5 +1,3 @@ -#include - #define dbg(...) printf("; "); printf(__VA_ARGS__); printf("\n") // TODO: find a way to merge "to/from register" with "to/from accumulator" branches into a single code path @@ -73,24 +71,31 @@ static enum mem_base decode_mem_base(u8 rm) { } // Table 4-10. R/M (Register/Memory) Field Encoding -static void decode_reg_or_mem(struct reg_or_mem_value *value, FILE *src, u8 rm, u8 mod, bool wide) { +static void decode_reg_or_mem( + struct reg_or_mem_value *value, + struct memory *mem, + u32 *addr, + u8 rm, + u8 mod, + bool wide + ) { if (mod == 0b11) { // Mod = 0b11, register value->is_reg = true; value->reg = decode_reg(rm, wide); } else if (mod == 0b10) { // Mod = 0b10, memory with i16 displacement - i16 displacement = fgetc(src) | (fgetc(src) << 8); + i16 displacement = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); value->is_reg = false; value->mem.base = decode_mem_base(rm); value->mem.disp = displacement; } else if (mod == 0b01) { // Mod = 0b01, memory with i8 displacement - i8 displacement = fgetc(src); + i8 displacement = pull_byte_at(mem, addr); value->is_reg = false; value->mem.base = decode_mem_base(rm); value->mem.disp = extend_sign_bit(displacement); } else if (mod == 0b00) { // Mod = 0b00, memory no displacement (most of the time) value->is_reg = false; if (rm == 0b110) { // Direct address - u16 address = fgetc(src) | (fgetc(src) << 8); + u16 address = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); value->mem.base = MEM_BASE_DIRECT_ADDRESS; value->mem.disp = address; } else { @@ -102,9 +107,16 @@ static void decode_reg_or_mem(struct reg_or_mem_value *value, FILE *src, u8 rm, } } -static void deocde_reg_or_mem_to_src(struct src_value *value, FILE *src, u8 rm, u8 mod, bool wide) { +static void deocde_reg_or_mem_to_src( + struct src_value *value, + struct memory *mem, + u32 *addr, + u8 rm, + u8 mod, + bool wide + ) { struct reg_or_mem_value reg_or_mem; - decode_reg_or_mem(®_or_mem, src, rm, mod, wide); + decode_reg_or_mem(®_or_mem, mem, addr, rm, mod, wide); if (reg_or_mem.is_reg) { value->variant = SRC_VALUE_REG; value->reg = reg_or_mem.reg; @@ -117,13 +129,12 @@ static void deocde_reg_or_mem_to_src(struct src_value *value, FILE *src, u8 rm, // TODO: change to readinf from a byte buffer // TODO: add handling for 'DECODE_ERR_MISSING_BYTES' // Handy reference: Table 4-12. 8086 Instruction Encoding -enum decode_error decode_instruction(FILE *src, struct instruction *output) { - u8 byte1 = fgetc(src); - if (feof(src)) return DECODE_ERR_EOF; +enum decode_error decode_instruction(struct memory *mem, u32 *addr, struct instruction *output) { + u8 byte1 = pull_byte_at(mem, addr); // MOVE: Register memory to/from register if ((byte1 & 0b11111100) == 0b10001000) { - u8 byte2 = fgetc(src); + u8 byte2 = pull_byte_at(mem, addr); bool wide = byte1 & 0b1; bool direction = (byte1 & 0b10) >> 1; @@ -135,11 +146,11 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { if (direction) { output->dest.is_reg = true; output->dest.reg = decode_reg(reg, wide); - deocde_reg_or_mem_to_src(&output->src, src, rm, mod, wide); + deocde_reg_or_mem_to_src(&output->src, mem, addr, rm, mod, wide); } else { output->src.variant = SRC_VALUE_REG; output->src.reg = decode_reg(reg, wide); - decode_reg_or_mem(&output->dest, src, rm, mod, wide); + decode_reg_or_mem(&output->dest, mem, addr, rm, mod, wide); } // MOVE: Immediate to register @@ -153,29 +164,30 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { if (wide) { output->src.variant = SRC_VALUE_IMMEDIATE16; - output->src.immediate = fgetc(src) | (fgetc(src) << 8); + output->src.immediate = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); } else { output->src.variant = SRC_VALUE_IMMEDIATE8; - output->src.immediate = fgetc(src); + output->src.immediate = pull_byte_at(mem, addr); } + // MOVE: Immediate to register/memory } else if ((byte1 & 0b11111110) == 0b11000110) { - u8 byte2 = fgetc(src); + u8 byte2 = pull_byte_at(mem, addr); bool wide = byte1 & 0b1; u8 mod = (byte2 & 0b11000000) >> 6; u8 rm = byte2 & 0b00000111; output->op = OP_MOV; - decode_reg_or_mem(&output->dest, src, rm, mod, wide); + decode_reg_or_mem(&output->dest, mem, addr, rm, mod, wide); if (wide) { output->src.variant = SRC_VALUE_IMMEDIATE16; - output->src.immediate = fgetc(src) | (fgetc(src) << 8); + output->src.immediate = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); } else { output->src.variant = SRC_VALUE_IMMEDIATE8; - output->src.immediate = fgetc(src); + output->src.immediate = pull_byte_at(mem, addr); } // MOVE: Memory to accumulator @@ -188,9 +200,9 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { bool wide = byte1 & 0b1; if (wide) { - output->src.mem.disp = fgetc(src) | (fgetc(src) << 8); + output->src.mem.disp = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); } else { - output->src.mem.disp = fgetc(src); + output->src.mem.disp = pull_byte_at(mem, addr); } // MOVE: Accumulator to memory @@ -204,9 +216,9 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { output->dest.mem.base = MEM_BASE_DIRECT_ADDRESS; if (wide) { - output->dest.mem.disp = fgetc(src) | (fgetc(src) << 8); + output->dest.mem.disp = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); } else { - output->dest.mem.disp = fgetc(src); + output->dest.mem.disp = pull_byte_at(mem, addr); } // ADD/SUB/CMP: Reg/memory with register to either @@ -223,7 +235,7 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { bool wide = byte1 & 0b01; bool direction = (byte1 & 0b10) >> 1; - u8 byte2 = fgetc(src); + u8 byte2 = pull_byte_at(mem, addr); u8 mod = (byte2 & 0b11000000) >> 6; u8 reg = (byte2 & 0b00111000) >> 3; u8 rm = byte2 & 0b00000111; @@ -231,16 +243,16 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { if (direction) { output->dest.is_reg = true; output->dest.reg = decode_reg(reg, wide); - deocde_reg_or_mem_to_src(&output->src, src, rm, mod, wide); + deocde_reg_or_mem_to_src(&output->src, mem, addr, rm, mod, wide); } else { output->src.variant = SRC_VALUE_REG; output->src.reg = decode_reg(reg, wide); - decode_reg_or_mem(&output->dest, src, rm, mod, wide); + decode_reg_or_mem(&output->dest, mem, addr, rm, mod, wide); } // ADD/SUB/CMP: immediate with register/memory } else if ((byte1 & 0b11111100) == 0b10000000) { - u8 byte2 = fgetc(src); + u8 byte2 = pull_byte_at(mem, addr); u8 variant = (byte2 & 0b00111000) >> 3; if (variant == 0b000) { @@ -256,19 +268,19 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { u8 mod = (byte2 & 0b11000000) >> 6; u8 rm = byte2 & 0b00000111; - decode_reg_or_mem(&output->dest, src, rm, mod, wide); + decode_reg_or_mem(&output->dest, mem, addr, rm, mod, wide); if (wide) { output->src.variant = SRC_VALUE_IMMEDIATE16; if (sign_extend) { - output->src.immediate = fgetc(src); + output->src.immediate = pull_byte_at(mem, addr); output->src.immediate = extend_sign_bit(output->src.immediate); } else { - output->src.immediate = fgetc(src) | (fgetc(src) << 8); + output->src.immediate = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); } } else { output->src.variant = SRC_VALUE_IMMEDIATE8; - output->src.immediate = fgetc(src); + output->src.immediate = pull_byte_at(mem, addr); } // ADD/SUB/CMP: immediate with accumulator @@ -289,22 +301,22 @@ enum decode_error decode_instruction(FILE *src, struct instruction *output) { if (wide) { output->src.variant = SRC_VALUE_IMMEDIATE16; - output->src.immediate = fgetc(src) | (fgetc(src) << 8); + output->src.immediate = pull_byte_at(mem, addr) | (pull_byte_at(mem, addr) << 8); } else { output->src.variant = SRC_VALUE_IMMEDIATE8; - output->src.immediate = fgetc(src); + output->src.immediate = pull_byte_at(mem, addr); } // Conditional jumps } else if ((byte1 & 0b11110000) == 0b01110000) { - i8 jmp_offset = fgetc(src); + i8 jmp_offset = pull_byte_at(mem, addr); u8 opcode = byte1 & 0b00001111; output->op = cond_jmp_lookup[opcode]; output->jmp_offset = jmp_offset; // Conditional loop jumps } else if ((byte1 & 0b11111100) == 0b11100000) { - i8 jmp_offset = fgetc(src); + i8 jmp_offset = pull_byte_at(mem, addr); u8 opcode = byte1 & 0b00000011; output->op = cond_loop_jmp_lookup[opcode]; output->jmp_offset = jmp_offset; diff --git a/src/sim8086_memory.c b/src/sim8086_memory.c new file mode 100644 index 0000000..be91079 --- /dev/null +++ b/src/sim8086_memory.c @@ -0,0 +1,36 @@ +#include + +// TODO: add error codes + +int load_mem_from_stream(struct memory *mem, FILE *stream, u32 start) { + u32 offset = 0; + while (true) { + u8 byte = fgetc(stream); + if (feof(stream)) break; + if (start + offset >= MEMORY_SIZE) return -1; + mem->mem[start + offset] = byte; + offset++; + } + return offset; +} + +int load_mem_from_file(struct memory *mem, const char *filename, u32 start) { + FILE *stream = fopen(filename, "rb"); + if (stream == NULL) { + return -1; + } + int byte_count = load_mem_from_stream(mem, stream, start); + fclose(stream); + return byte_count; +} + +// TODO: Make this error some kind of error, when reading past end +u8 read_byte_at(struct memory *mem, u32 address) { + return mem->mem[address % MEMORY_SIZE]; +} + +u8 pull_byte_at(struct memory *mem, u32 *address) { + u8 byte = read_byte_at(mem, *address); + (*address)++; + return byte; +} \ No newline at end of file