diff --git a/src/asm.c b/src/asm.c index b2c7387..0f194ed 100644 --- a/src/asm.c +++ b/src/asm.c @@ -4,19 +4,56 @@ #include #define u32 uint32_t +#define i32 int32_t #define u16 uint16_t -#define u8 uint8_t #define i16 int16_t +#define u8 uint8_t #define i8 int8_t +#define panic(...) fprintf(stderr, "ABORT(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort() #define todo(...) fprintf(stderr, "TODO(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort() #define dbg(...) printf("; "); printf(__VA_ARGS__); printf("\n") +#define ARRAY_LEN(arr) sizeof(arr) / sizeof(arr[0]) -const char *wide_reg_names[8] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; -const char *non_wide_reg_names[8] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; +enum decode_error { + DECODE_OK, + DECODE_ERR_EOF, + DECODE_ERR_MISSING_BYTES +}; -// eac - Effective address calculation -const char *eac_names[8] = { +enum operation { + OP_MOVE, + __OP_COUNT +}; +const char *operation_str[__OP_COUNT] = { + "mov" +}; + +// Order and place of these `enum reg_value` enums is IMPORTANT! Don't rearrange! +enum reg_value { + REG_AL, REG_CL, REG_DL, REG_BL, REG_AH, REG_CH, REG_DH, REG_BH, + REG_AX, REG_CX, REG_DX, REG_BX, REG_SP, REG_BP, REG_SI, REG_DI, + __REG_COUNT +}; +const char *reg_value_str[__REG_COUNT] = { + "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", + "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" +}; + +// Order and place of these `enum mem_base` enums is IMPORTANT! Don't rearrange! +enum mem_base { + MEM_BASE_BX_SI, + MEM_BASE_BX_DI, + MEM_BASE_BP_SI, + MEM_BASE_BP_DI, + MEM_BASE_SI, + MEM_BASE_DI, + MEM_BASE_BP, + MEM_BASE_BX, + MEM_BASE_DIRECT_ADDRESS, + __MEM_BASE_COUNT +}; +const char *mem_base_str[8] = { "bx + si", "bx + di", "bp + si", @@ -27,132 +64,291 @@ const char *eac_names[8] = { "bx" }; -const char *lookup_reg_name(u8 idx, bool wide) { - assert(0 <= idx && idx <= 7); - if (wide) { - return wide_reg_names[idx]; +struct mem_value { + enum mem_base base; + i16 disp; + // IMPORTANT! Keep in mind that `disp` should be interpreted as `u16`, if `base == MEM_BASE_DIRECT_ADDRESS` +}; + +struct reg_or_mem_value { + bool is_reg; + union { + enum reg_value reg; + struct mem_value mem; + }; +}; + +enum src_value_variant { + SRC_VALUE_REG, + SRC_VALUE_MEM, + SRC_VALUE_IMMIDIATE8, + SRC_VALUE_IMMIDIATE16 +}; + +struct src_value { + enum src_value_variant variant; + union { + enum reg_value reg; + struct mem_value mem; + u16 immidiate; + }; +}; + +struct instruction { + enum operation op; + struct reg_or_mem_value dest; + struct src_value src; +}; + +static const char *reg_to_str(enum reg_value reg) { + assert(0 <= reg && reg <= __REG_COUNT); + return reg_value_str[reg]; +} + +static void mem_to_str(char *buff, size_t max_size, struct mem_value *mem) { + assert(0 <= mem->base && mem->base <= __MEM_BASE_COUNT); + if (mem->base == MEM_BASE_DIRECT_ADDRESS) { + snprintf(buff, max_size, "[%d]", (u16)mem->disp); + } else if (mem->disp > 0) { + snprintf(buff, max_size, "[%s + %d]", mem_base_str[mem->base], mem->disp); + } else if (mem->disp < 0) { + snprintf(buff, max_size, "[%s - %d]", mem_base_str[mem->base], -mem->disp); } else { - return non_wide_reg_names[idx]; + snprintf(buff, max_size, "[%s]", mem_base_str[mem->base]); } } -void snprintf_mod_with_disp(char *rm_name, size_t size, u8 rm, i16 displacement) { - if (displacement < 0) { - snprintf(rm_name, size, "[%s - %d]", eac_names[rm], -displacement); +static void reg_or_mem_to_str(char *buff, size_t max_size, struct reg_or_mem_value *value) { + if (value->is_reg) { + strncpy(buff, reg_to_str(value->reg), max_size); } else { - snprintf(rm_name, size, "[%s + %d]", eac_names[rm], displacement); + mem_to_str(buff, max_size, &value->mem); } } -void get_rm_name(char *rm_name, size_t size, FILE *src, u8 rm, u8 mod, bool wide) { - if (mod == 0b11000000) { // Mod = 0b11 - strcpy(rm_name, lookup_reg_name(rm, wide)); - } else if (mod == 0b10000000) { // Mod = 0b10 - i16 displacement = fgetc(src) | (fgetc(src) << 8); - snprintf_mod_with_disp(rm_name, size, rm, displacement); - } else if (mod == 0b01000000) { // Mod = 0b01 - i16 displacement = fgetc(src); - if (displacement & 0b10000000) { - displacement |= (0b11111111 << 8); - } - snprintf_mod_with_disp(rm_name, size, rm, displacement); - } else if (mod == 0b00000000) { // Mod = 0b00 - if (rm == 0b110) { // Direct address - u16 displacement = fgetc(src) | (fgetc(src) << 8); - snprintf(rm_name, size, "[%d]", displacement); +static void src_to_str(char *buff, size_t max_size, struct src_value *value) { + switch (value->variant) + { + case SRC_VALUE_REG: + strncpy(buff, reg_to_str(value->reg), max_size); + break; + case SRC_VALUE_MEM: + mem_to_str(buff, max_size, &value->mem); + break; + case SRC_VALUE_IMMIDIATE16: + snprintf(buff, max_size, "%d", value->immidiate); + break; + case SRC_VALUE_IMMIDIATE8: + snprintf(buff, max_size, "%d", (u8)value->immidiate); + break; + } +} + +static const char *operation_to_str(enum operation op) { + assert(0 <= op && op <= __OP_COUNT); + return operation_str[op]; +} + +static void instruction_to_str(char *buff, size_t max_size, struct instruction *inst) { + switch (inst->op) + { + case OP_MOVE: { + char dest[24]; + char src[24]; + reg_or_mem_to_str(dest, sizeof(dest), &inst->dest); + src_to_str(src, sizeof(src), &inst->src); + + bool is_dest_mem = !inst->dest.is_reg; + if (is_dest_mem && inst->src.variant == SRC_VALUE_IMMIDIATE16) { + snprintf(buff, max_size, "mov %s, word %s", dest, src); + } else if (is_dest_mem && inst->src.variant == SRC_VALUE_IMMIDIATE8) { + snprintf(buff, max_size, "mov %s, byte %s", dest, src); } else { - snprintf(rm_name, size, "[%s]", eac_names[rm]); + snprintf(buff, max_size, "mov %s, %s", dest, src); } + break; + } + default: + panic("Invalid instruction opcode %d\n", inst->op); } } +static i16 extend_sign_bit(i8 number) { + if (number & 0b10000000) { + return number | (0b11111111 << 8); + } else { + return number; + } +} + +// This function assumes that the `enum reg_value` values are in a convenient order, for conversion. +// Look at "Table 4-9. REG (Register) Field Encoding" for more details +static enum reg_value decode_reg(u8 reg, bool wide) { + return reg + (u8)(wide) * 8; +} + +// This function assumes that the `enum mem_base` values are in a convenient order, for conversion. +// Look at "Table 4-10. R/M (Register/Memory) Field Encoding" for more details +static enum mem_base decode_mem_base(u8 rm) { + return rm; +} + +// Table 4-10. R/M (Register/Memory) Field Encoding +static void decode_reg_or_mem(struct reg_or_mem_value *value, FILE *src, u8 rm, u8 mod, bool wide) { + if (mod == 0b11000000) { // Mod = 0b11, register + value->is_reg = true; + value->reg = decode_reg(rm, wide); + } else if (mod == 0b10000000) { // Mod = 0b10, memory with i16 displacement + i16 displacement = fgetc(src) | (fgetc(src) << 8); + value->is_reg = false; + value->mem.base = decode_mem_base(rm); + value->mem.disp = displacement; + } else if (mod == 0b01000000) { // Mod = 0b01, memory with i8 displacement + i8 displacement = fgetc(src); + value->is_reg = false; + value->mem.base = decode_mem_base(rm); + value->mem.disp = extend_sign_bit(displacement); + } else if (mod == 0b00000000) { // Mod = 0b00, memory no displacement (most of the time) + value->is_reg = false; + if (rm == 0b110) { // Direct address + u16 address = fgetc(src) | (fgetc(src) << 8); + value->mem.base = MEM_BASE_DIRECT_ADDRESS; + value->mem.disp = address; + } else { + value->mem.base = decode_mem_base(rm); + value->mem.disp = 0; + } + } else { + panic("unknown 'mod' value: %d\n", mod); + } +} + +static void deocde_reg_or_mem_to_src(struct src_value *value, FILE *src, u8 rm, u8 mod, bool wide) { + struct reg_or_mem_value reg_or_mem; + decode_reg_or_mem(®_or_mem, src, rm, mod, wide); + if (reg_or_mem.is_reg) { + value->variant = SRC_VALUE_REG; + value->reg = reg_or_mem.reg; + } else { + value->variant = SRC_VALUE_MEM; + value->mem = reg_or_mem.mem; + } +} + +// TODO: add handling for 'DECODE_ERR_MISSING_BYTES' // Handy reference: Table 4-12. 8086 Instruction Encoding +enum decode_error decode_instruction(FILE *src, struct instruction *output) { + u8 byte1 = fgetc(src); + if (feof(src)) return DECODE_ERR_EOF; + + output->op = OP_MOVE; + + // Register memory to/from register + if ((byte1 & 0b11111100) == 0b10001000) { + u8 byte2 = fgetc(src); + bool wide = byte1 & 0b1; + bool direction = (byte1 & 0b10) >> 1; + + u8 mod = byte2 & 0b11000000; + u8 reg = (byte2 & 0b00111000) >> 3; + u8 rm = byte2 & 0b00000111; + + if (direction) { + output->dest.is_reg = true; + output->dest.reg = decode_reg(reg, wide); + deocde_reg_or_mem_to_src(&output->src, src, rm, mod, wide); + } else { + output->src.variant = SRC_VALUE_REG; + output->src.reg = decode_reg(reg, wide); + decode_reg_or_mem(&output->dest, src, rm, mod, wide); + } + + // Immediate to register + } else if ((byte1 & 0b11110000) == 0b10110000) { + bool wide = (byte1 & 0b1000) >> 3; + u8 reg = byte1 & 0b111; + + output->dest.is_reg = true; + output->dest.reg = decode_reg(reg, wide); + + if (wide) { + output->src.variant = SRC_VALUE_IMMIDIATE16; + output->src.immidiate = fgetc(src) | (fgetc(src) << 8); + } else { + output->src.variant = SRC_VALUE_IMMIDIATE8; + output->src.immidiate = fgetc(src); + } + + // Immediate to register/memory + } else if ((byte1 & 0b11111110) == 0b11000110) { + u8 byte2 = fgetc(src); + + bool wide = byte1 & 0b1; + u8 mod = byte2 & 0b11000000; + u8 rm = byte2 & 0b00000111; + + decode_reg_or_mem(&output->dest, src, rm, mod, wide); + + if (wide) { + output->src.variant = SRC_VALUE_IMMIDIATE16; + output->src.immidiate = fgetc(src) | (fgetc(src) << 8); + } else { + output->src.variant = SRC_VALUE_IMMIDIATE8; + output->src.immidiate = fgetc(src); + } + + // Memory to accumulator + } else if ((byte1 & 0b11111110) == 0b10100000) { + bool wide = byte1 & 0b1; + + output->dest.is_reg = true; + output->dest.reg = REG_AX; + output->src.variant = SRC_VALUE_MEM; + output->src.mem.base = MEM_BASE_DIRECT_ADDRESS; + + if (wide) { + output->src.mem.disp = fgetc(src) | (fgetc(src) << 8); + } else { + output->src.mem.disp = fgetc(src); + } + + // Accumulator to memory + } else if ((byte1 & 0b11111110) == 0b10100010) { + bool wide = byte1 & 0b1; + + output->src.variant = SRC_VALUE_REG; + output->src.reg = REG_AX; + output->dest.is_reg = false; + output->dest.mem.base = MEM_BASE_DIRECT_ADDRESS; + + if (wide) { + output->dest.mem.disp = fgetc(src) | (fgetc(src) << 8); + } else { + output->dest.mem.disp = fgetc(src); + } + + } else { + todo("unhandled byte %d", byte1); + } + + return DECODE_OK; +} + void dissassemble(FILE *src, FILE *dst) { fprintf(dst, "bits 16\n\n"); + + char line[256]; + struct instruction inst; while (true) { - u8 byte1 = fgetc(src); - if (feof(src)) break; - - // Register memory to/from register - if ((byte1 & 0b11111100) == 0b10001000) { - u8 byte2 = fgetc(src); - bool wide = byte1 & 0b1; - bool direction = (byte1 & 0b10) >> 1; - - u8 mod = byte2 & 0b11000000; - u8 reg = (byte2 & 0b00111000) >> 3; - u8 rm = byte2 & 0b00000111; - const char *reg_name = lookup_reg_name(reg, wide); - char rm_name[24] = { 0 }; - get_rm_name(rm_name, sizeof(rm_name), src, rm, mod, wide); - - if (direction) { - fprintf(dst, "mov %s, %s\n", reg_name, rm_name); - } else { - fprintf(dst, "mov %s, %s\n", rm_name, reg_name); - } - - // Immediate to register - } else if ((byte1 & 0b11110000) == 0b10110000) { - bool wide = (byte1 & 0b1000) >> 3; - u8 reg = byte1 & 0b111; - - u16 immediate; - if (wide) { - immediate = fgetc(src) | (fgetc(src) << 8); - } else { - immediate = fgetc(src); - } - - fprintf(dst, "mov %s, %d\n", lookup_reg_name(reg, wide), immediate); - - // Immediate to memory - } else if ((byte1 & 0b11111110) == 0b11000110) { - u8 byte2 = fgetc(src); - - bool wide = byte1 & 0b1; - u8 mod = byte2 & 0b11000000; - u8 rm = byte2 & 0b00000111; - char rm_name[24] = { 0 }; - get_rm_name(rm_name, sizeof(rm_name), src, rm, mod, wide); - - u16 immediate; - if (wide) { - immediate = fgetc(src) | (fgetc(src) << 8); - fprintf(dst, "mov %s, word %d\n", rm_name, immediate); - } else { - immediate = fgetc(src); - fprintf(dst, "mov %s, byte %d\n", rm_name, immediate); - } - - // Memory to accumulator - } else if ((byte1 & 0b11111110) == 0b10100000) { - bool wide = byte1 & 0b1; - - u16 immediate; - if (wide) { - immediate = fgetc(src) | (fgetc(src) << 8); - } else { - immediate = fgetc(src); - } - - fprintf(dst, "mov ax, [%d]\n", immediate); - - // Accumulator to memory - } else if ((byte1 & 0b11111110) == 0b10100010) { - bool wide = byte1 & 0b1; - - u16 immediate; - if (wide) { - immediate = fgetc(src) | (fgetc(src) << 8); - } else { - immediate = fgetc(src); - } - - fprintf(dst, "mov [%d], ax\n", immediate); - - } else { - todo("unhandled byte %d", byte1); + enum decode_error err = decode_instruction(src, &inst); + if (err == DECODE_ERR_EOF) break; + if (err != DECODE_OK) { + fprintf(stderr, "ERROR: Failed to decode instruction, code: %d\n", err); + break; } + + instruction_to_str(line, sizeof(line), &inst); + fprintf(dst, line); + fprintf(dst, "\n"); } } \ No newline at end of file