diff --git a/src/libvm/Cargo.toml b/src/libvm/Cargo.toml index 5dfd8b1..8c0fadf 100644 --- a/src/libvm/Cargo.toml +++ b/src/libvm/Cargo.toml @@ -18,6 +18,7 @@ cfgrammar = "0.6" lrlex = "0.6" lrpar = "0.6" regex = "*" +prettytable-rs = "0.8" [build-dependencies] cfgrammar = "0.6" diff --git a/src/libvm/src/mem.rs b/src/libvm/src/mem.rs index 8fc5cda..335f24c 100644 --- a/src/libvm/src/mem.rs +++ b/src/libvm/src/mem.rs @@ -2,6 +2,7 @@ use crate::{addr::*, error::*, inst::*, reg::*}; use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use std::io::Cursor; +#[derive(Debug, Clone)] pub struct MemCursor { cursor: Cursor, } @@ -22,6 +23,18 @@ impl MemCursor self.cursor.set_position((position.into()).0) } + pub fn is_end(&self) -> bool { + self.check_addr(self.position()).is_err() + } + + pub fn next_bytes(&mut self, count: usize) -> Result<&[u8]> { + let start = self.position() as usize; + let end = start + count; + self.check_addr(end as u64 - 1)?; + self.cursor.set_position(end as u64); + Ok(&self.cursor.get_ref().as_ref()[start .. end]) + } + pub fn next_u8_unchecked(&mut self) -> u8 { self.cursor.read_u8().unwrap() } @@ -36,7 +49,7 @@ impl MemCursor } pub fn next_u16(&mut self) -> Result { - self.check_addr(self.position()) + self.check_addr(self.position() + 1) .map(|_| self.next_u16_unchecked()) } @@ -45,7 +58,7 @@ impl MemCursor } pub fn next_u32(&mut self) -> Result { - self.check_addr(self.position()) + self.check_addr(self.position() + 3) .map(|_| self.next_u32_unchecked()) } @@ -54,12 +67,12 @@ impl MemCursor } pub fn next_u64(&mut self) -> Result { - self.check_addr(self.position()) + self.check_addr(self.position() + 7) .map(|_| self.next_u64_unchecked()) } pub fn next_addr(&mut self) -> Result { - self.check_addr(self.position()) + self.check_addr(self.position() + 7) .map(|_| self.next_addr_unchecked()) } @@ -68,6 +81,15 @@ impl MemCursor } pub fn next_inst(&mut self) -> Result { + let start = self.position(); + let result = self.next_inst_inner(); + if result.is_err() { + self.set_position(start); + } + result + } + + fn next_inst_inner(&mut self) -> Result { let start = self.position(); let op = self.next_u16()?; @@ -201,7 +223,7 @@ impl MemCursor where T: AsRef<[u8]> { fn check_addr(&self, addr: u64) -> Result<()> { - if addr > (self.cursor.get_ref().as_ref().len() as u64) { + if addr >= (self.cursor.get_ref().as_ref().len() as u64) { Err(VmError::MemOutOfBounds { addr: Addr(addr) }) } else { Ok(()) diff --git a/src/libvm/src/obj/assemble.rs b/src/libvm/src/obj/assemble.rs index f83c53e..99a08df 100644 --- a/src/libvm/src/obj/assemble.rs +++ b/src/libvm/src/obj/assemble.rs @@ -61,10 +61,10 @@ impl Asm for DataSection { session.name_stack.push(names); let content_len = self.len() as u64; let (start, end) = match self.org { - SectionOrg::Start(start) => (start, start + (content_len as u64)), + SectionOrg::Start(start) => (start, start + content_len), SectionOrg::StartEnd(start, end) => (start, end), }; - session.pos = Addr(start); + session.pos = start; if start > end { return Err(AsmError::StartGreaterThanEnd { start, end }); } @@ -78,9 +78,9 @@ impl Asm for DataSection { } let mut contents = Vec::with_capacity(content_len as usize); - for line in self.lines.iter() { + for (pos, line) in self.lines() { contents.extend(line.assemble(session)?); - session.pos += line.len(); + session.pos = start + (pos as u64); } assert_eq!( contents.len() as u64, @@ -88,6 +88,9 @@ impl Asm for DataSection { "in section {}", self.name ); + assert_eq!( + session.pos - start, content_len + ); session.name_stack.pop(); Ok(obj::DataSection { name: self.name.clone(), @@ -299,7 +302,7 @@ impl Asm for Value { .ok_or_else(|| AsmError::UnknownName { name: name.to_string() })?; Ok(value.addr.0.to_le_bytes().to_vec()) } - Value::Here => Ok(session.pos.0.to_le_bytes().to_vec()), + Value::Here => Ok(session.pos.to_le_bytes().to_vec()), Value::Addr(v, _) => { if let Value::Addr(_, _) = &**v { // double deref is not allowed diff --git a/src/libvm/src/obj/assemble/names.rs b/src/libvm/src/obj/assemble/names.rs index c7a30fd..31a56f9 100644 --- a/src/libvm/src/obj/assemble/names.rs +++ b/src/libvm/src/obj/assemble/names.rs @@ -21,7 +21,6 @@ pub struct Name { } pub fn get_section_names(section: &DataSection) -> Result { - let mut pos = Addr(section.org.start()); let mut names = HashMap::new(); let mut exports = HashSet::new(); @@ -39,27 +38,27 @@ pub fn get_section_names(section: &DataSection) -> Result { // are exports whose names are undefined and we can return an UnknownExport error. // get exported names - for line in section.lines.iter() { + for (_, line) in section.lines() { if let DataLine::Export(name) = line { exports.insert(name); } } // get names - for line in section.lines.iter() { + for (pos, line) in section.lines() { if let DataLine::Label(name) = line { if names.contains_key(name) { return Err(AsmError::DuplicateLabel { name: name.clone() }); } let export = exports.remove(name); - + let start = section.org.start(); + names.insert(name.clone(), Name { name: name.clone(), - addr: pos, + addr: Addr(start + (pos as u64)), export, }); } - pos += line.len(); } // all exports map 1:1 with names diff --git a/src/libvm/src/obj/assemble/session.rs b/src/libvm/src/obj/assemble/session.rs index 5ab353c..c749105 100644 --- a/src/libvm/src/obj/assemble/session.rs +++ b/src/libvm/src/obj/assemble/session.rs @@ -24,7 +24,7 @@ pub struct AsmSession { pub (in super) include_search_paths: Vec, pub (in super) include_stack: Vec, pub (in super) name_stack: Vec, - pub (in super) pos: Addr, + pub (in super) pos: u64, } impl AsmSession { diff --git a/src/libvm/src/obj/disassemble.rs b/src/libvm/src/obj/disassemble.rs new file mode 100644 index 0000000..180db83 --- /dev/null +++ b/src/libvm/src/obj/disassemble.rs @@ -0,0 +1,115 @@ +use crate::{mem::MemCursor, obj::obj::*, inst::Inst}; +use prettytable::{Table, row, cell}; +use std::io::{self, Write as Write}; + +const SEP: &str = "================================================================================"; + +pub trait Disasm { + fn disasm(&self, writer: &mut dyn Write) -> io::Result<()>; +} + +impl Disasm for Vec { + fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> { + let obj = Object::from_bytes(self.as_slice()).expect("invalid object bytes"); + obj.disasm(writer) + } +} + +impl Disasm for Object { + fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> { + for section in self.sections.iter() { + section.disasm(writer)?; + } + + Ok(()) + } +} + +impl Disasm for Section { + fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> { + match self { + Section::Data(s) => s.disasm(writer), + Section::Meta(s) => s.disasm(writer), + } + } +} + +// TODO : +// Instruction decoding is borked and I don't know why +// I think it has to do with the DataSection::lines() method, because that's the change that +// introduced it. The 0xdeadbeef program is only getting the value "200" in %r0 when it goes to do +// the comparison, so for some reason that value is being put in there. The disassembler is an +// attempt to make things slightly more readable while I debug. +// +// It appears that instructions are being encoded correctly (at least, as far as instructions +// themselves go) so we will have to look deeper. + +impl Disasm for DataSection { + fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> { + writeln!(writer, "{}", SEP)?; + writeln!(writer, "= DATA SECTION - {}", self.name)?; + writeln!(writer, "{}", SEP)?; + let mut table = Table::new(); + table.add_row(row!["Address", "Bytes", "Info"]); + let mut cursor = MemCursor::new(self.contents.as_slice()); + loop { + if cursor.is_end() { + break; + } + let cursor_pos = cursor.position(); + let pos = self.start + cursor_pos; + if let Ok(inst) = cursor.next_inst() { + let start = cursor_pos as usize; + let end = start + inst.len(); + let data = &self.contents.as_slice()[start .. end]; + table.add_row(row! [ + format!("{:016x} <{}+{:x}>", pos, self.name, cursor_pos), + bytes_hex(data), + format!("{:?}", inst), + ]); + } else { + let mut count = 0; + let mut lookahead = MemCursor::new(self.contents.as_slice()); + lookahead.set_position(cursor_pos); + while !lookahead.is_end() && lookahead.next_inst().is_err() { + count += 1; + lookahead.next_u8_unchecked(); + } + let bytes = cursor.next_bytes(count).unwrap(); + table.add_row(row![ + format!("{:016x} <{}+{:x}>", pos, self.name, cursor_pos), + bytes_hex(bytes), + "", + ]); + } + } + table.print(writer)?; + Ok(()) + } +} + +impl Disasm for MetaSection { + fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> { + let mut table = Table::new(); + writeln!(writer, "{}", SEP)?; + writeln!(writer, "= META SECTION")?; + writeln!(writer, "{}", SEP)?; + table.add_row(row!["Name", "Value"]); + for (name, value) in self.entries.iter() { + table.add_row(row![name, format!("0x{:016x}", value)]); + } + table.print(writer)?; + Ok(()) + } +} + +fn bytes_hex(bytes: &[u8]) -> String { + let mut out = String::new(); + for b in bytes.iter() { + out += &format!("{:02x} ", b); + } + if !out.is_empty() { + out.pop(); + } + out +} diff --git a/src/libvm/src/obj/mod.rs b/src/libvm/src/obj/mod.rs index b7c4760..07b63f2 100644 --- a/src/libvm/src/obj/mod.rs +++ b/src/libvm/src/obj/mod.rs @@ -1,4 +1,5 @@ pub mod assemble; +pub mod disassemble; pub mod error; pub mod obj; pub mod syn; diff --git a/src/libvm/src/obj/syn/ast.rs b/src/libvm/src/obj/syn/ast.rs index 649c668..c87a839 100644 --- a/src/libvm/src/obj/syn/ast.rs +++ b/src/libvm/src/obj/syn/ast.rs @@ -28,23 +28,61 @@ pub struct MetaLine { pub struct DataSection { pub name: String, pub org: SectionOrg, - pub lines: Vec, + pub blocks: Vec, } impl DataSection { - pub fn exports(&self) -> impl Iterator { - self.lines.iter() - .filter_map(|line| if let DataLine::Export(s) = line { - Some(s.as_str()) - } else { - None - }) + pub fn len(&self) -> usize { + self.blocks.iter() + .map(AlignedBlock::len) + .sum() } - pub fn len(&self) -> usize { - self.lines.iter() - .map(DataLine::len) - .sum() + pub fn lines<'a>(&'a self) -> DataLines<'a> { + DataLines::new(&self.blocks) + } +} + +pub struct DataLines<'a> { + blocks: &'a Vec, + block_idx: usize, + line_idx: usize, + pos: usize, +} + +impl<'a> DataLines<'a> { + fn new(blocks: &'a Vec) -> Self { + DataLines { + blocks, + block_idx: 0, + line_idx: 0, + pos: 0, + } + } +} + +impl<'a> Iterator for DataLines<'a> { + type Item = (usize, &'a DataLine); + + fn next(&mut self) -> Option { + if self.block_idx >= self.blocks.len() { + return None; + } + let block = &self.blocks[self.block_idx]; + if self.line_idx >= block.block.len() { + // next block - advance the position by the padding amount + self.block_idx += 1; + self.line_idx = 0; + let pos = self.pos; + self.pos += block.padding_for(pos); + self.next() + } else { + let pos = self.pos; + let line = &block.block[self.line_idx]; + self.line_idx += 1; + self.pos += line.len(); + Some((pos, line)) + } } } @@ -62,6 +100,26 @@ impl SectionOrg { } } +#[derive(Debug, Clone)] +pub struct AlignedBlock { + pub alignment: IntSize, + pub block: Vec, +} + +impl AlignedBlock { + pub fn len(&self) -> usize { + let block_len = self.block.iter() + .map(DataLine::len) + .sum(); + block_len + self.padding_for(block_len) + } + + pub fn padding_for(&self, len: usize) -> usize { + let align = self.alignment.len(); + len % align + } +} + #[derive(Debug, Clone)] pub enum DataLine { ValueDef(ValueDef), diff --git a/src/libvm/src/obj/syn/lexer.l b/src/libvm/src/obj/syn/lexer.l index 06ed8ca..3076309 100644 --- a/src/libvm/src/obj/syn/lexer.l +++ b/src/libvm/src/obj/syn/lexer.l @@ -27,6 +27,7 @@ u64 "U64" \.string "STR_DEF" \.zstring "ZSTR_DEF" \.interrupt "INTERRUPT_DEF" +\.align "ALIGN_DEF" "([^"]|\\[\\nt0"'])*" "STRING" add "ADD" sub "SUB" diff --git a/src/libvm/src/obj/syn/parser.y b/src/libvm/src/obj/syn/parser.y index adea542..7af4b04 100644 --- a/src/libvm/src/obj/syn/parser.y +++ b/src/libvm/src/obj/syn/parser.y @@ -8,11 +8,11 @@ Top -> Vec: Directive -> Directive: 'DIR_META' MetaBlock { Directive::Meta(MetaSection { lines: $2 }) } - | 'DIR_SECTION' Name SectionOrg DataBlock { + | 'DIR_SECTION' Name SectionOrg 'LBRACE' DataBlocks 'RBRACE' { Directive::Data(DataSection { name: $2, org: $3, - lines: $4, + blocks: $5, }) } | 'DIR_INCLUDE' String { Directive::Include($2) } @@ -33,7 +33,22 @@ SectionOrg -> SectionOrg: | Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) } ; -DataBlock -> Vec: 'LBRACE' DataLines 'RBRACE' { $2 }; +DataBlocks -> Vec: + DataLines AlignedBlocks { + let front = AlignedBlock { alignment: IntSize::U8, block: $1 }; + $2.insert(0, front); + $2 + } + ; + +AlignedBlocks -> Vec: + AlignedBlocks AlignedBlock { $1.push($2); $1 } + | { Vec::new() } + ; + +AlignedBlock -> AlignedBlock: + 'ALIGN_DEF' IntSize DataLines { AlignedBlock { alignment: $2, block: $3 } } + ; DataLines -> Vec: DataLines DataLine { $1.push($2); $1 } @@ -55,15 +70,20 @@ ValueDef -> ValueDef: | 'STR_DEF' String { ValueDef::String($2) } | 'ZSTR_DEF' String { ValueDef::ZString($2) } | 'INTERRUPT_DEF' Int 'COMMA' ConstValue { ValueDef::Interrupt($2 != 0, $4) } + //| 'ALIGN_DEF' IntSize { ValueDef::Align($2) } + ; + +IntSize -> IntSize: + 'U8' { IntSize::U8 } + | 'U16' { IntSize::U16 } + | 'U32' { IntSize::U32 } + | 'U64' { IntSize::U64 } ; Value -> Value: ConstValue { $1 } | 'LPAREN' Value 'RPAREN' { Value::Addr(Box::new($2), IntSize::U64) } - | 'LPAREN' Value 'RPAREN' 'U8' { Value::Addr(Box::new($2), IntSize::U8) } - | 'LPAREN' Value 'RPAREN' 'U16' { Value::Addr(Box::new($2), IntSize::U16) } - | 'LPAREN' Value 'RPAREN' 'U32' { Value::Addr(Box::new($2), IntSize::U32) } - | 'LPAREN' Value 'RPAREN' 'U64' { Value::Addr(Box::new($2), IntSize::U64) } + | 'LPAREN' Value 'RPAREN' IntSize { Value::Addr(Box::new($2), $4) } //| 'LBRACKET' ArrayValues 'RBRACKET' { Value::Array($2) } ; diff --git a/src/libvm/src/state.rs b/src/libvm/src/state.rs index 07b2bad..24161bf 100644 --- a/src/libvm/src/state.rs +++ b/src/libvm/src/state.rs @@ -166,11 +166,11 @@ impl State { } /// Invoke an interrupt. - pub fn interrupt(&mut self, return_ip: u64, index: usize, aux: u64) -> Result<()> { + pub fn interrupt(&mut self, return_ip: u64, index: usize, aux: u64) -> Result { assert!(index < IVT_LENGTH, "invalid interrupt index"); let interrupt = self.ivt()?[index]; if !interrupt.enabled() { - return Ok(()); + return Ok(return_ip); } let fp = self.fp(); @@ -193,11 +193,11 @@ impl State { self.set_reg_unchecked(R00, index as u64); self.set_reg_unchecked(R01, aux); - Ok(()) + Ok(self.ip()) } /// Exit/return from the current interrupt. - pub fn exit_interrupt(&mut self) -> Result<()> { + pub fn exit_interrupt(&mut self) -> Result { let fp = self.fp(); let sp = fp + 48; @@ -209,7 +209,7 @@ impl State { self.pop(Dest::Reg(FLAGS))?; self.pop(Dest::Reg(IP))?; self.pop(Dest::Reg(FP))?; - Ok(()) + Ok(self.ip()) } //////////////////////////////////////////////////////////////////////////////// @@ -247,7 +247,7 @@ impl State { Inst::Div(d, s) => { let src = self.load_source(s)?; if src == 0 { - return self.interrupt(next_ip, DIVIDE_BY_ZERO, 0); + next_ip = self.interrupt(next_ip, DIVIDE_BY_ZERO, 0)?; } else { let value = self.load_dest(d)?.wrapping_div(src); self.store_dest(d, value)?; @@ -359,12 +359,10 @@ impl State { Inst::Int(v, a) => { let vector = self.load_source(v)?; let aux = self.load_source(a)?; - // this method immediately jumps, so don't let the next_ip be set below - return self.interrupt(next_ip, vector as usize, aux); + next_ip = self.interrupt(next_ip, vector as usize, aux)?; } Inst::IRet => { - // this method immediately jumps, so don't let the next_ip be set below - return self.exit_interrupt(); + next_ip = self.exit_interrupt()?; } Inst::Mov(d, s) => { let value = self.load_source(s)?; diff --git a/src/main.rs b/src/main.rs index 264fa61..af49e32 100644 --- a/src/main.rs +++ b/src/main.rs @@ -75,9 +75,9 @@ struct Options { #[structopt(short = "o", long)] out: Option, - /// Only run the preprocessor. - #[structopt(short = "E", long)] - preprocess_only: bool, + /// Disassemble object that would be passed to VM and exit before running it. + #[structopt(short = "d", long)] + disassemble: bool, /// Only compile the input file to an object. #[structopt(short = "c", long)] @@ -108,7 +108,7 @@ fn get_writer(path: impl AsRef) -> Result> { fn main() -> Result<()> { use vm::{ state::State, - obj::assemble, + obj::{assemble, disassemble::Disasm}, }; let opt = Options::from_args(); @@ -124,6 +124,13 @@ fn main() -> Result<()> { let mut writer = get_writer(&outfile)?; writer.write(&bytes)?; Ok(()) + } else if opt.disassemble { + let outfile = opt.out.as_ref() + .map(|p| p.as_path()) + .unwrap_or_else(|| Path::new("-")); + let mut writer = get_writer(&outfile)?; + object.disasm(&mut writer)?; + Ok(()) } else { let mut state = State::new(); state.load_object(object, opt.max_mem.unwrap_or(DEFAULT_MAX_MEM))?;