From a4a37b5a275606686e292fba2a592133f5575df5 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Mon, 10 Feb 2020 16:31:08 -0500 Subject: [PATCH] Add Disassemble, fix bug in position calculation * Disassemble structure can be used for dumping an object section * Assembler position calculation was messing up, causing jump addresses to be wrong. This is fixed. Signed-off-by: Alek Ratzloff --- src/main.rs | 26 +++- src/vm/disassemble.rs | 294 +++++++++++++++++++++++++++++++++++++ src/vm/mem.rs | 1 + src/vm/mod.rs | 1 + src/vm/obj/assemble/mod.rs | 22 +-- src/vm/reg.rs | 12 +- src/vm/tick.rs | 5 +- src/vm/visit.rs | 107 +++++++------- 8 files changed, 397 insertions(+), 71 deletions(-) create mode 100644 src/vm/disassemble.rs diff --git a/src/main.rs b/src/main.rs index 3d8e21a..c7230c8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,6 @@ #![allow(dead_code)] mod common; -//mod syn; mod vm; use std::{convert::TryFrom, env, fs, io, process}; @@ -32,10 +31,31 @@ fn main() -> Result<()> { } }; let obj = vm::obj::obj::Object::try_from(&ast)?; - //println!("{:#?}", obj); let mut vm = vm::vm::Vm::new(); vm.load_object(obj, 1024 * 1024 * 64)?; // 64mb let status = vm.run()?; - println!("status: {}", status); + println!("{}", status); Ok(()) } + + + /* +fn dump(obj: &Object) { + use vm::obj::obj::Section; + use vm::visit::VisitInst; + let mut stdout = io::stdout(); + for section in &obj.sections { + let mut disasm = match section { + Section::Code { start, contents, .. } + | Section::Data { start, contents, .. } => { + vm::disassemble::Disassemble::new(&mut stdout, contents, *start) + } + Section::Meta { .. } => continue, + }; + while !disasm.is_done() { + disasm.visit_inst()?; + } + println!(); + } +} + */ diff --git a/src/vm/disassemble.rs b/src/vm/disassemble.rs new file mode 100644 index 0000000..942e583 --- /dev/null +++ b/src/vm/disassemble.rs @@ -0,0 +1,294 @@ +use crate::vm::{ + error::*, + inst::*, + mem::MemCursor, + reg::*, + visit::*, + vm::{Addr, HalfWord, Word}, +}; +use std::io::Write; + +const WIDTH: usize = 60; + +pub struct Disassemble<'w, 'o> { + writer: &'w mut dyn Write, + cursor: MemCursor<'o>, + addr_offset: Addr, +} + +impl<'w, 'o> Disassemble<'w, 'o> { + pub fn new(writer: &'w mut dyn Write, content: &'o [u8], addr_offset: Addr) -> Self { + Disassemble { + writer, + cursor: MemCursor::new(content), + addr_offset, + } + } + + pub fn is_done(&self) -> bool { + self.cursor.position() >= (self.cursor.get_ref().len() as u64) + } + + fn adv(&mut self) -> Result<()> { + // note the () - this explicitly clones the cursor + let op = self.cursor().next_u16()?; + let next = self.cursor.position() + (inst_len(op) as u64); + self.cursor.set_position(next); + Ok(()) + } + + fn write_addr(&mut self, addr: Addr) { + write!(self.writer, "{:06x} | ", self.addr_offset + addr).unwrap(); + } + + fn write_bytes(&mut self, bytes: &[u8]) { + for b in bytes { + write!(self.writer, "{:02x} ", b).unwrap(); + } + } + + fn write_inst_bytes(&mut self, op: InstOp) { + let len = inst_len(op); + let start = self.cursor.position() as usize; + let end = start + len; + let bytes = &self.cursor.get_ref()[start..end]; + self.write_bytes(bytes); + } + + fn write_r1_r2_inst(&mut self, addr: Addr, op: InstOp, r1: Reg, r2: Reg) { + let len = inst_len(op); + let line_width = 6 + 3 + (3 * len); + let line_offset = WIDTH - line_width; + self.write_addr(addr); + self.write_inst_bytes(op); + + let iname = inst_name(op).unwrap(); + let r1name = reg_name(r1).unwrap().to_lowercase(); + let r2name = reg_name(r2).unwrap().to_lowercase(); + + writeln!( + self.writer, + "{}| {:>10} %{} %{}", + " ".repeat(line_offset), + iname, + r1name, + r2name + ) + .unwrap(); + } + + fn write_r1_inst(&mut self, addr: Addr, op: InstOp, r1: Reg) { + let line_width = 6 + 3 + (3 * inst_len(op)); + let line_offset = WIDTH - line_width; + self.write_addr(addr); + self.write_inst_bytes(op); + + let iname = inst_name(op).unwrap(); + let r1name = reg_name(r1).unwrap().to_lowercase(); + + writeln!( + self.writer, + "{}| {:>10} %{}", + " ".repeat(line_offset), + iname, + r1name, + ) + .unwrap(); + } + + fn write_r1_imm_inst(&mut self, addr: Addr, op: InstOp, r1: Reg, imm: Word) { + let line_width = 6 + 3 + (3 * inst_len(op)); + let line_offset = WIDTH - line_width; + self.write_addr(addr); + self.write_inst_bytes(op); + + let iname = inst_name(op).unwrap(); + let r1name = reg_name(r1).unwrap().to_lowercase(); + + writeln!( + self.writer, + "{}| {:>10} %{} {:#X}", + " ".repeat(line_offset), + iname, + r1name, + imm, + ) + .unwrap(); + } +} + +impl VisitInst for Disassemble<'_, '_> { + type Out = (); + + fn cursor(&self) -> MemCursor { + self.cursor.clone() + } + + fn add(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), ADD, r1, r2); + self.adv()?; + Ok(()) + } + + fn mul(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), MUL, r1, r2); + self.adv()?; + Ok(()) + } + + fn div(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), DIV, r1, r2); + self.adv()?; + Ok(()) + } + + fn mod_(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), MOD, r1, r2); + self.adv()?; + Ok(()) + } + + fn ineg(&mut self, r1: Reg) -> Result { + self.write_r1_inst(self.cursor.position(), INEG, r1); + self.adv()?; + Ok(()) + } + + fn and(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), AND, r1, r2); + self.adv()?; + Ok(()) + } + + fn or(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), OR, r1, r2); + self.adv()?; + Ok(()) + } + + fn inv(&mut self, r1: Reg) -> Result { + self.write_r1_inst(self.cursor.position(), INV, r1); + self.adv()?; + Ok(()) + } + + fn not(&mut self, r1: Reg) -> Result { + self.write_r1_inst(self.cursor.position(), NOT, r1); + self.adv()?; + Ok(()) + } + + fn xor(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), XOR, r1, r2); + self.adv()?; + Ok(()) + } + + fn shl(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), SHL, r1, r2); + self.adv()?; + Ok(()) + } + + fn shr(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), SHR, r1, r2); + self.adv()?; + Ok(()) + } + + fn cmpeq(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), CMPEQ, r1, r2); + self.adv()?; + Ok(()) + } + + fn cmplt(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), CMPLT, r1, r2); + self.adv()?; + Ok(()) + } + + fn jmp(&mut self, r1: Reg) -> Result { + self.write_r1_inst(self.cursor.position(), JMP, r1); + self.adv()?; + Ok(()) + } + + fn jz(&mut self, r1: Reg) -> Result { + self.write_r1_inst(self.cursor.position(), JZ, r1); + self.adv()?; + Ok(()) + } + + fn jnz(&mut self, r1: Reg) -> Result { + self.write_r1_inst(self.cursor.position(), JNZ, r1); + self.adv()?; + Ok(()) + } + + fn load(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), LOAD, r1, r2); + self.adv()?; + Ok(()) + } + + fn regcopy(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), REGCOPY, r1, r2); + self.adv()?; + Ok(()) + } + + fn storeimm64(&mut self, r1: Reg, w1: Word) -> Result { + self.write_r1_imm_inst(self.cursor.position(), STOREIMM64, r1, w1); + self.adv()?; + Ok(()) + } + + fn storeimm32(&mut self, r1: Reg, w1: HalfWord) -> Result { + self.write_r1_imm_inst(self.cursor.position(), STOREIMM32, r1, w1 as u64); + self.adv()?; + Ok(()) + } + + fn memcopy(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), MEMCOPY, r1, r2); + self.adv()?; + Ok(()) + } + + fn store(&mut self, r1: Reg, r2: Reg) -> Result { + self.write_r1_r2_inst(self.cursor.position(), STORE, r1, r2); + self.adv()?; + Ok(()) + } + + fn halt(&mut self) -> Result { + let line_width = 6 + 3 + 3 + 3; + let line_offset = WIDTH - line_width; + self.write_addr(self.cursor.position()); + self.write_inst_bytes(HALT); + writeln!( + self.writer, + "{}| {:>10}", + " ".repeat(line_offset), + "HALT", + ).unwrap(); + self.adv()?; + Ok(()) + } + + fn nop(&mut self) -> Result { + let line_width = 6 + 3 + 3 + 3; + let line_offset = WIDTH - line_width; + self.write_addr(self.cursor.position()); + self.write_inst_bytes(NOP); + writeln!( + self.writer, + "{}| {:>10}", + " ".repeat(line_offset), + "NOP", + ).unwrap(); + self.adv()?; + Ok(()) + } +} diff --git a/src/vm/mem.rs b/src/vm/mem.rs index 69d8cd2..5f3c0ab 100644 --- a/src/vm/mem.rs +++ b/src/vm/mem.rs @@ -8,6 +8,7 @@ use std::{ const R1_MASK: u16 = 0b1111_1100_0000_0000; const R2_MASK: u16 = 0b0000_0011_1111_0000; +#[derive(Debug, Clone)] pub struct MemCursor<'mem> { cursor: Cursor<&'mem [u8]>, } diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 1f5f3e8..80ef693 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -1,3 +1,4 @@ +pub mod disassemble; pub mod error; pub mod flags; pub mod inst; diff --git a/src/vm/obj/assemble/mod.rs b/src/vm/obj/assemble/mod.rs index 6425827..9532c45 100644 --- a/src/vm/obj/assemble/mod.rs +++ b/src/vm/obj/assemble/mod.rs @@ -60,7 +60,6 @@ impl<'a> Assemble<'a> { for block in self.ast.iter() { let locals = Self::gather_symbols(block, false)?; self.symbols.replace_locals(locals); - match block { SectionBlock::Data { org, body } | SectionBlock::Code { org, body } => { let mut bytes = Vec::new(); @@ -191,19 +190,20 @@ impl<'a> Assemble<'a> { Inst::Jnz(r1) => builder.op(JNZ).r1(*r1), Inst::Load(r1, r2) => builder.op(LOAD).r1(*r1).r2(*r2), Inst::Store(r1, r2) => builder.op(STORE).r1(*r1).r2(*r2), - Inst::StoreImm(r1, imm) => { - let imm = match imm { - ImmValue::Number(num) => *num, - ImmValue::Label(name) => { - self.symbols.get(name).expect("TODO: value label not found") + Inst::StoreImm(r1, imm) => match imm { + ImmValue::Number(num) => { + if *num > (u32::max_value() as u64) { + builder.op(STOREIMM64).imm64(*num) + } else { + builder.op(STOREIMM32).imm32(*num as u32) } - }; - if imm <= (u32::max_value() as u64) { - builder.op(STOREIMM32).r1(*r1).imm32(imm as u32) - } else { - builder.op(STOREIMM64).r1(*r1).imm64(imm) + } + ImmValue::Label(name) => { + let imm = self.symbols.get(name).expect("TODO: value label not found"); + builder.op(STOREIMM64).imm64(imm) } } + .r1(*r1), Inst::MemCopy(r1, r2) => builder.op(MEMCOPY).r1(*r1).r2(*r2), Inst::RegCopy(r1, r2) => builder.op(REGCOPY).r1(*r1).r2(*r2), Inst::Nop => builder.op(NOP), diff --git a/src/vm/reg.rs b/src/vm/reg.rs index ebab219..17a45bd 100644 --- a/src/vm/reg.rs +++ b/src/vm/reg.rs @@ -7,6 +7,15 @@ macro_rules! registers { $( pub const $variant: Reg = $value; )* + + pub fn reg_name(reg: Reg) -> Option<&'static str> { + match reg { + $( + $value => Some(stringify!($variant)), + )* + _ => None, + } + } }; } @@ -91,5 +100,6 @@ registers! { R47 = 61, R48 = 62, R49 = 63, - LAST_REG = R49, } + +pub const LAST_REG: Reg = R49; diff --git a/src/vm/tick.rs b/src/vm/tick.rs index deb5054..ae3e8d7 100644 --- a/src/vm/tick.rs +++ b/src/vm/tick.rs @@ -1,9 +1,8 @@ use crate::vm::{error::*, flags::Flags, inst::*, reg::*, vm::*, visit::*, mem::MemCursor}; -use std::io::stdin; impl Vm { pub fn tick(&mut self) -> Result<()> { - let next_ip = visit_inst(self)?; + let next_ip = self.visit_inst()?; self.set_reg(IP, next_ip); Ok(()) } @@ -23,7 +22,7 @@ impl Vm { } } -impl InstAcceptor for Vm { +impl VisitInst for Vm { type Out = Addr; fn cursor(&self) -> MemCursor { diff --git a/src/vm/visit.rs b/src/vm/visit.rs index c0803bc..bc19725 100644 --- a/src/vm/visit.rs +++ b/src/vm/visit.rs @@ -6,7 +6,7 @@ use crate::vm::{ vm::{HalfWord, Word}, }; -pub trait InstAcceptor { +pub trait VisitInst { type Out; fn cursor(&self) -> MemCursor; @@ -35,61 +35,62 @@ pub trait InstAcceptor { fn store(&mut self, r1: Reg, r2: Reg) -> Result; fn halt(&mut self) -> Result; fn nop(&mut self) -> Result; -} -pub fn visit_inst(acceptor: &mut A) -> Result { - let mut cursor = acceptor.cursor(); - let op = cursor.next_u16()?; + fn visit_inst(&mut self) -> Result { + let mut cursor = self.cursor(); + //panic!("cursor pos: {}", cursor.position()); + let op = cursor.next_u16()?; - macro_rules! r1_r2_inst { - ($fun:ident) => {{ - let (r1, r2) = cursor.next_regs()?; - acceptor.$fun(r1, r2) - }}; - } - macro_rules! r1_inst { - ($fun:ident) => {{ - let r1 = cursor.next_reg()?; - acceptor.$fun(r1) - }}; - } - - match op { - ADD => r1_r2_inst!(add), - MUL => r1_r2_inst!(mul), - DIV => r1_r2_inst!(div), - MOD => r1_r2_inst!(mod_), - INEG => r1_inst!(ineg), - AND => r1_r2_inst!(and), - OR => r1_r2_inst!(or), - INV => r1_inst!(inv), - NOT => r1_inst!(not), - XOR => r1_r2_inst!(xor), - SHL => r1_r2_inst!(shl), - SHR => r1_r2_inst!(shr), - CMPEQ => r1_r2_inst!(cmpeq), - CMPLT => r1_r2_inst!(cmplt), - JMP => r1_inst!(jmp), - JZ => r1_inst!(jz), - JNZ => r1_inst!(jnz), - LOAD => r1_r2_inst!(load), - REGCOPY => r1_r2_inst!(regcopy), - STOREIMM64 => { - let r1 = cursor.next_reg()?; - // skip - cursor.next_u32()?; - let imm = cursor.next_u64()?; - acceptor.storeimm64(r1, imm) + macro_rules! r1_r2_inst { + ($fun:ident) => {{ + let (r1, r2) = cursor.next_regs()?; + self.$fun(r1, r2) + }}; } - STOREIMM32 => { - let r1 = cursor.next_reg()?; - let imm = cursor.next_u32()?; - acceptor.storeimm32(r1, imm) + macro_rules! r1_inst { + ($fun:ident) => {{ + let r1 = cursor.next_reg()?; + self.$fun(r1) + }}; + } + + match op { + ADD => r1_r2_inst!(add), + MUL => r1_r2_inst!(mul), + DIV => r1_r2_inst!(div), + MOD => r1_r2_inst!(mod_), + INEG => r1_inst!(ineg), + AND => r1_r2_inst!(and), + OR => r1_r2_inst!(or), + INV => r1_inst!(inv), + NOT => r1_inst!(not), + XOR => r1_r2_inst!(xor), + SHL => r1_r2_inst!(shl), + SHR => r1_r2_inst!(shr), + CMPEQ => r1_r2_inst!(cmpeq), + CMPLT => r1_r2_inst!(cmplt), + JMP => r1_inst!(jmp), + JZ => r1_inst!(jz), + JNZ => r1_inst!(jnz), + LOAD => r1_r2_inst!(load), + REGCOPY => r1_r2_inst!(regcopy), + STOREIMM64 => { + let r1 = cursor.next_reg()?; + // skip + cursor.next_u32()?; + let imm = cursor.next_u64()?; + self.storeimm64(r1, imm) + } + STOREIMM32 => { + let r1 = cursor.next_reg()?; + let imm = cursor.next_u32()?; + self.storeimm32(r1, imm) + } + MEMCOPY => r1_r2_inst!(memcopy), + STORE => r1_r2_inst!(store), + HALT => self.halt(), + NOP => self.nop(), + _ => Err(VmError::IllegalOp { op }), } - MEMCOPY => r1_r2_inst!(memcopy), - STORE => r1_r2_inst!(store), - HALT => acceptor.halt(), - NOP => acceptor.nop(), - _ => Err(VmError::IllegalOp { op }), } }