split decoding and printing instruction to file
This commit is contained in:
parent
44255973d2
commit
5cfffd9dc9
326
src/asm.c
326
src/asm.c
@ -4,19 +4,56 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#define u32 uint32_t
|
#define u32 uint32_t
|
||||||
|
#define i32 int32_t
|
||||||
#define u16 uint16_t
|
#define u16 uint16_t
|
||||||
#define u8 uint8_t
|
|
||||||
#define i16 int16_t
|
#define i16 int16_t
|
||||||
|
#define u8 uint8_t
|
||||||
#define i8 int8_t
|
#define i8 int8_t
|
||||||
|
|
||||||
|
#define panic(...) fprintf(stderr, "ABORT(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort()
|
||||||
#define todo(...) fprintf(stderr, "TODO(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort()
|
#define todo(...) fprintf(stderr, "TODO(%s:%d): ", __FILE__, __LINE__); fprintf(stderr, __VA_ARGS__); abort()
|
||||||
#define dbg(...) printf("; "); printf(__VA_ARGS__); printf("\n")
|
#define dbg(...) printf("; "); printf(__VA_ARGS__); printf("\n")
|
||||||
|
#define ARRAY_LEN(arr) sizeof(arr) / sizeof(arr[0])
|
||||||
|
|
||||||
const char *wide_reg_names[8] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
|
enum decode_error {
|
||||||
const char *non_wide_reg_names[8] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" };
|
DECODE_OK,
|
||||||
|
DECODE_ERR_EOF,
|
||||||
|
DECODE_ERR_MISSING_BYTES
|
||||||
|
};
|
||||||
|
|
||||||
// eac - Effective address calculation
|
enum operation {
|
||||||
const char *eac_names[8] = {
|
OP_MOVE,
|
||||||
|
__OP_COUNT
|
||||||
|
};
|
||||||
|
const char *operation_str[__OP_COUNT] = {
|
||||||
|
"mov"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Order and place of these `enum reg_value` enums is IMPORTANT! Don't rearrange!
|
||||||
|
enum reg_value {
|
||||||
|
REG_AL, REG_CL, REG_DL, REG_BL, REG_AH, REG_CH, REG_DH, REG_BH,
|
||||||
|
REG_AX, REG_CX, REG_DX, REG_BX, REG_SP, REG_BP, REG_SI, REG_DI,
|
||||||
|
__REG_COUNT
|
||||||
|
};
|
||||||
|
const char *reg_value_str[__REG_COUNT] = {
|
||||||
|
"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
|
||||||
|
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Order and place of these `enum mem_base` enums is IMPORTANT! Don't rearrange!
|
||||||
|
enum mem_base {
|
||||||
|
MEM_BASE_BX_SI,
|
||||||
|
MEM_BASE_BX_DI,
|
||||||
|
MEM_BASE_BP_SI,
|
||||||
|
MEM_BASE_BP_DI,
|
||||||
|
MEM_BASE_SI,
|
||||||
|
MEM_BASE_DI,
|
||||||
|
MEM_BASE_BP,
|
||||||
|
MEM_BASE_BX,
|
||||||
|
MEM_BASE_DIRECT_ADDRESS,
|
||||||
|
__MEM_BASE_COUNT
|
||||||
|
};
|
||||||
|
const char *mem_base_str[8] = {
|
||||||
"bx + si",
|
"bx + si",
|
||||||
"bx + di",
|
"bx + di",
|
||||||
"bp + si",
|
"bp + si",
|
||||||
@ -27,51 +64,184 @@ const char *eac_names[8] = {
|
|||||||
"bx"
|
"bx"
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *lookup_reg_name(u8 idx, bool wide) {
|
struct mem_value {
|
||||||
assert(0 <= idx && idx <= 7);
|
enum mem_base base;
|
||||||
if (wide) {
|
i16 disp;
|
||||||
return wide_reg_names[idx];
|
// IMPORTANT! Keep in mind that `disp` should be interpreted as `u16`, if `base == MEM_BASE_DIRECT_ADDRESS`
|
||||||
|
};
|
||||||
|
|
||||||
|
struct reg_or_mem_value {
|
||||||
|
bool is_reg;
|
||||||
|
union {
|
||||||
|
enum reg_value reg;
|
||||||
|
struct mem_value mem;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
enum src_value_variant {
|
||||||
|
SRC_VALUE_REG,
|
||||||
|
SRC_VALUE_MEM,
|
||||||
|
SRC_VALUE_IMMIDIATE8,
|
||||||
|
SRC_VALUE_IMMIDIATE16
|
||||||
|
};
|
||||||
|
|
||||||
|
struct src_value {
|
||||||
|
enum src_value_variant variant;
|
||||||
|
union {
|
||||||
|
enum reg_value reg;
|
||||||
|
struct mem_value mem;
|
||||||
|
u16 immidiate;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
enum operation op;
|
||||||
|
struct reg_or_mem_value dest;
|
||||||
|
struct src_value src;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *reg_to_str(enum reg_value reg) {
|
||||||
|
assert(0 <= reg && reg <= __REG_COUNT);
|
||||||
|
return reg_value_str[reg];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mem_to_str(char *buff, size_t max_size, struct mem_value *mem) {
|
||||||
|
assert(0 <= mem->base && mem->base <= __MEM_BASE_COUNT);
|
||||||
|
if (mem->base == MEM_BASE_DIRECT_ADDRESS) {
|
||||||
|
snprintf(buff, max_size, "[%d]", (u16)mem->disp);
|
||||||
|
} else if (mem->disp > 0) {
|
||||||
|
snprintf(buff, max_size, "[%s + %d]", mem_base_str[mem->base], mem->disp);
|
||||||
|
} else if (mem->disp < 0) {
|
||||||
|
snprintf(buff, max_size, "[%s - %d]", mem_base_str[mem->base], -mem->disp);
|
||||||
} else {
|
} else {
|
||||||
return non_wide_reg_names[idx];
|
snprintf(buff, max_size, "[%s]", mem_base_str[mem->base]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void snprintf_mod_with_disp(char *rm_name, size_t size, u8 rm, i16 displacement) {
|
static void reg_or_mem_to_str(char *buff, size_t max_size, struct reg_or_mem_value *value) {
|
||||||
if (displacement < 0) {
|
if (value->is_reg) {
|
||||||
snprintf(rm_name, size, "[%s - %d]", eac_names[rm], -displacement);
|
strncpy(buff, reg_to_str(value->reg), max_size);
|
||||||
} else {
|
} else {
|
||||||
snprintf(rm_name, size, "[%s + %d]", eac_names[rm], displacement);
|
mem_to_str(buff, max_size, &value->mem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_rm_name(char *rm_name, size_t size, FILE *src, u8 rm, u8 mod, bool wide) {
|
static void src_to_str(char *buff, size_t max_size, struct src_value *value) {
|
||||||
if (mod == 0b11000000) { // Mod = 0b11
|
switch (value->variant)
|
||||||
strcpy(rm_name, lookup_reg_name(rm, wide));
|
{
|
||||||
} else if (mod == 0b10000000) { // Mod = 0b10
|
case SRC_VALUE_REG:
|
||||||
|
strncpy(buff, reg_to_str(value->reg), max_size);
|
||||||
|
break;
|
||||||
|
case SRC_VALUE_MEM:
|
||||||
|
mem_to_str(buff, max_size, &value->mem);
|
||||||
|
break;
|
||||||
|
case SRC_VALUE_IMMIDIATE16:
|
||||||
|
snprintf(buff, max_size, "%d", value->immidiate);
|
||||||
|
break;
|
||||||
|
case SRC_VALUE_IMMIDIATE8:
|
||||||
|
snprintf(buff, max_size, "%d", (u8)value->immidiate);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *operation_to_str(enum operation op) {
|
||||||
|
assert(0 <= op && op <= __OP_COUNT);
|
||||||
|
return operation_str[op];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void instruction_to_str(char *buff, size_t max_size, struct instruction *inst) {
|
||||||
|
switch (inst->op)
|
||||||
|
{
|
||||||
|
case OP_MOVE: {
|
||||||
|
char dest[24];
|
||||||
|
char src[24];
|
||||||
|
reg_or_mem_to_str(dest, sizeof(dest), &inst->dest);
|
||||||
|
src_to_str(src, sizeof(src), &inst->src);
|
||||||
|
|
||||||
|
bool is_dest_mem = !inst->dest.is_reg;
|
||||||
|
if (is_dest_mem && inst->src.variant == SRC_VALUE_IMMIDIATE16) {
|
||||||
|
snprintf(buff, max_size, "mov %s, word %s", dest, src);
|
||||||
|
} else if (is_dest_mem && inst->src.variant == SRC_VALUE_IMMIDIATE8) {
|
||||||
|
snprintf(buff, max_size, "mov %s, byte %s", dest, src);
|
||||||
|
} else {
|
||||||
|
snprintf(buff, max_size, "mov %s, %s", dest, src);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
panic("Invalid instruction opcode %d\n", inst->op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static i16 extend_sign_bit(i8 number) {
|
||||||
|
if (number & 0b10000000) {
|
||||||
|
return number | (0b11111111 << 8);
|
||||||
|
} else {
|
||||||
|
return number;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function assumes that the `enum reg_value` values are in a convenient order, for conversion.
|
||||||
|
// Look at "Table 4-9. REG (Register) Field Encoding" for more details
|
||||||
|
static enum reg_value decode_reg(u8 reg, bool wide) {
|
||||||
|
return reg + (u8)(wide) * 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function assumes that the `enum mem_base` values are in a convenient order, for conversion.
|
||||||
|
// Look at "Table 4-10. R/M (Register/Memory) Field Encoding" for more details
|
||||||
|
static enum mem_base decode_mem_base(u8 rm) {
|
||||||
|
return rm;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table 4-10. R/M (Register/Memory) Field Encoding
|
||||||
|
static void decode_reg_or_mem(struct reg_or_mem_value *value, FILE *src, u8 rm, u8 mod, bool wide) {
|
||||||
|
if (mod == 0b11000000) { // Mod = 0b11, register
|
||||||
|
value->is_reg = true;
|
||||||
|
value->reg = decode_reg(rm, wide);
|
||||||
|
} else if (mod == 0b10000000) { // Mod = 0b10, memory with i16 displacement
|
||||||
i16 displacement = fgetc(src) | (fgetc(src) << 8);
|
i16 displacement = fgetc(src) | (fgetc(src) << 8);
|
||||||
snprintf_mod_with_disp(rm_name, size, rm, displacement);
|
value->is_reg = false;
|
||||||
} else if (mod == 0b01000000) { // Mod = 0b01
|
value->mem.base = decode_mem_base(rm);
|
||||||
i16 displacement = fgetc(src);
|
value->mem.disp = displacement;
|
||||||
if (displacement & 0b10000000) {
|
} else if (mod == 0b01000000) { // Mod = 0b01, memory with i8 displacement
|
||||||
displacement |= (0b11111111 << 8);
|
i8 displacement = fgetc(src);
|
||||||
}
|
value->is_reg = false;
|
||||||
snprintf_mod_with_disp(rm_name, size, rm, displacement);
|
value->mem.base = decode_mem_base(rm);
|
||||||
} else if (mod == 0b00000000) { // Mod = 0b00
|
value->mem.disp = extend_sign_bit(displacement);
|
||||||
|
} else if (mod == 0b00000000) { // Mod = 0b00, memory no displacement (most of the time)
|
||||||
|
value->is_reg = false;
|
||||||
if (rm == 0b110) { // Direct address
|
if (rm == 0b110) { // Direct address
|
||||||
u16 displacement = fgetc(src) | (fgetc(src) << 8);
|
u16 address = fgetc(src) | (fgetc(src) << 8);
|
||||||
snprintf(rm_name, size, "[%d]", displacement);
|
value->mem.base = MEM_BASE_DIRECT_ADDRESS;
|
||||||
|
value->mem.disp = address;
|
||||||
} else {
|
} else {
|
||||||
snprintf(rm_name, size, "[%s]", eac_names[rm]);
|
value->mem.base = decode_mem_base(rm);
|
||||||
|
value->mem.disp = 0;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
panic("unknown 'mod' value: %d\n", mod);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void deocde_reg_or_mem_to_src(struct src_value *value, FILE *src, u8 rm, u8 mod, bool wide) {
|
||||||
|
struct reg_or_mem_value reg_or_mem;
|
||||||
|
decode_reg_or_mem(®_or_mem, src, rm, mod, wide);
|
||||||
|
if (reg_or_mem.is_reg) {
|
||||||
|
value->variant = SRC_VALUE_REG;
|
||||||
|
value->reg = reg_or_mem.reg;
|
||||||
|
} else {
|
||||||
|
value->variant = SRC_VALUE_MEM;
|
||||||
|
value->mem = reg_or_mem.mem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: add handling for 'DECODE_ERR_MISSING_BYTES'
|
||||||
// Handy reference: Table 4-12. 8086 Instruction Encoding
|
// Handy reference: Table 4-12. 8086 Instruction Encoding
|
||||||
void dissassemble(FILE *src, FILE *dst) {
|
enum decode_error decode_instruction(FILE *src, struct instruction *output) {
|
||||||
fprintf(dst, "bits 16\n\n");
|
|
||||||
while (true) {
|
|
||||||
u8 byte1 = fgetc(src);
|
u8 byte1 = fgetc(src);
|
||||||
if (feof(src)) break;
|
if (feof(src)) return DECODE_ERR_EOF;
|
||||||
|
|
||||||
|
output->op = OP_MOVE;
|
||||||
|
|
||||||
// Register memory to/from register
|
// Register memory to/from register
|
||||||
if ((byte1 & 0b11111100) == 0b10001000) {
|
if ((byte1 & 0b11111100) == 0b10001000) {
|
||||||
@ -82,14 +252,15 @@ void dissassemble(FILE *src, FILE *dst) {
|
|||||||
u8 mod = byte2 & 0b11000000;
|
u8 mod = byte2 & 0b11000000;
|
||||||
u8 reg = (byte2 & 0b00111000) >> 3;
|
u8 reg = (byte2 & 0b00111000) >> 3;
|
||||||
u8 rm = byte2 & 0b00000111;
|
u8 rm = byte2 & 0b00000111;
|
||||||
const char *reg_name = lookup_reg_name(reg, wide);
|
|
||||||
char rm_name[24] = { 0 };
|
|
||||||
get_rm_name(rm_name, sizeof(rm_name), src, rm, mod, wide);
|
|
||||||
|
|
||||||
if (direction) {
|
if (direction) {
|
||||||
fprintf(dst, "mov %s, %s\n", reg_name, rm_name);
|
output->dest.is_reg = true;
|
||||||
|
output->dest.reg = decode_reg(reg, wide);
|
||||||
|
deocde_reg_or_mem_to_src(&output->src, src, rm, mod, wide);
|
||||||
} else {
|
} else {
|
||||||
fprintf(dst, "mov %s, %s\n", rm_name, reg_name);
|
output->src.variant = SRC_VALUE_REG;
|
||||||
|
output->src.reg = decode_reg(reg, wide);
|
||||||
|
decode_reg_or_mem(&output->dest, src, rm, mod, wide);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Immediate to register
|
// Immediate to register
|
||||||
@ -97,62 +268,87 @@ void dissassemble(FILE *src, FILE *dst) {
|
|||||||
bool wide = (byte1 & 0b1000) >> 3;
|
bool wide = (byte1 & 0b1000) >> 3;
|
||||||
u8 reg = byte1 & 0b111;
|
u8 reg = byte1 & 0b111;
|
||||||
|
|
||||||
u16 immediate;
|
output->dest.is_reg = true;
|
||||||
|
output->dest.reg = decode_reg(reg, wide);
|
||||||
|
|
||||||
if (wide) {
|
if (wide) {
|
||||||
immediate = fgetc(src) | (fgetc(src) << 8);
|
output->src.variant = SRC_VALUE_IMMIDIATE16;
|
||||||
|
output->src.immidiate = fgetc(src) | (fgetc(src) << 8);
|
||||||
} else {
|
} else {
|
||||||
immediate = fgetc(src);
|
output->src.variant = SRC_VALUE_IMMIDIATE8;
|
||||||
|
output->src.immidiate = fgetc(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(dst, "mov %s, %d\n", lookup_reg_name(reg, wide), immediate);
|
// Immediate to register/memory
|
||||||
|
|
||||||
// Immediate to memory
|
|
||||||
} else if ((byte1 & 0b11111110) == 0b11000110) {
|
} else if ((byte1 & 0b11111110) == 0b11000110) {
|
||||||
u8 byte2 = fgetc(src);
|
u8 byte2 = fgetc(src);
|
||||||
|
|
||||||
bool wide = byte1 & 0b1;
|
bool wide = byte1 & 0b1;
|
||||||
u8 mod = byte2 & 0b11000000;
|
u8 mod = byte2 & 0b11000000;
|
||||||
u8 rm = byte2 & 0b00000111;
|
u8 rm = byte2 & 0b00000111;
|
||||||
char rm_name[24] = { 0 };
|
|
||||||
get_rm_name(rm_name, sizeof(rm_name), src, rm, mod, wide);
|
|
||||||
|
|
||||||
u16 immediate;
|
decode_reg_or_mem(&output->dest, src, rm, mod, wide);
|
||||||
|
|
||||||
if (wide) {
|
if (wide) {
|
||||||
immediate = fgetc(src) | (fgetc(src) << 8);
|
output->src.variant = SRC_VALUE_IMMIDIATE16;
|
||||||
fprintf(dst, "mov %s, word %d\n", rm_name, immediate);
|
output->src.immidiate = fgetc(src) | (fgetc(src) << 8);
|
||||||
} else {
|
} else {
|
||||||
immediate = fgetc(src);
|
output->src.variant = SRC_VALUE_IMMIDIATE8;
|
||||||
fprintf(dst, "mov %s, byte %d\n", rm_name, immediate);
|
output->src.immidiate = fgetc(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Memory to accumulator
|
// Memory to accumulator
|
||||||
} else if ((byte1 & 0b11111110) == 0b10100000) {
|
} else if ((byte1 & 0b11111110) == 0b10100000) {
|
||||||
bool wide = byte1 & 0b1;
|
bool wide = byte1 & 0b1;
|
||||||
|
|
||||||
u16 immediate;
|
output->dest.is_reg = true;
|
||||||
if (wide) {
|
output->dest.reg = REG_AX;
|
||||||
immediate = fgetc(src) | (fgetc(src) << 8);
|
output->src.variant = SRC_VALUE_MEM;
|
||||||
} else {
|
output->src.mem.base = MEM_BASE_DIRECT_ADDRESS;
|
||||||
immediate = fgetc(src);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(dst, "mov ax, [%d]\n", immediate);
|
if (wide) {
|
||||||
|
output->src.mem.disp = fgetc(src) | (fgetc(src) << 8);
|
||||||
|
} else {
|
||||||
|
output->src.mem.disp = fgetc(src);
|
||||||
|
}
|
||||||
|
|
||||||
// Accumulator to memory
|
// Accumulator to memory
|
||||||
} else if ((byte1 & 0b11111110) == 0b10100010) {
|
} else if ((byte1 & 0b11111110) == 0b10100010) {
|
||||||
bool wide = byte1 & 0b1;
|
bool wide = byte1 & 0b1;
|
||||||
|
|
||||||
u16 immediate;
|
output->src.variant = SRC_VALUE_REG;
|
||||||
if (wide) {
|
output->src.reg = REG_AX;
|
||||||
immediate = fgetc(src) | (fgetc(src) << 8);
|
output->dest.is_reg = false;
|
||||||
} else {
|
output->dest.mem.base = MEM_BASE_DIRECT_ADDRESS;
|
||||||
immediate = fgetc(src);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(dst, "mov [%d], ax\n", immediate);
|
if (wide) {
|
||||||
|
output->dest.mem.disp = fgetc(src) | (fgetc(src) << 8);
|
||||||
|
} else {
|
||||||
|
output->dest.mem.disp = fgetc(src);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
todo("unhandled byte %d", byte1);
|
todo("unhandled byte %d", byte1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return DECODE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dissassemble(FILE *src, FILE *dst) {
|
||||||
|
fprintf(dst, "bits 16\n\n");
|
||||||
|
|
||||||
|
char line[256];
|
||||||
|
struct instruction inst;
|
||||||
|
while (true) {
|
||||||
|
enum decode_error err = decode_instruction(src, &inst);
|
||||||
|
if (err == DECODE_ERR_EOF) break;
|
||||||
|
if (err != DECODE_OK) {
|
||||||
|
fprintf(stderr, "ERROR: Failed to decode instruction, code: %d\n", err);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction_to_str(line, sizeof(line), &inst);
|
||||||
|
fprintf(dst, line);
|
||||||
|
fprintf(dst, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user