Add alignment syntax, datasection pos/line iterator, disassembler

* Interrupts require functions being aligned on 64-bit boundaries.
  Alignment is now allowed via the .align <intsize> directive, aligning
  the current address to a new alignment.
* Datasection iteration doesn't require keeping track of the current
  position.
* Rudimentary disassembler for breaking down the contents of an object

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-03-07 18:01:31 -05:00
parent 1f93732a7c
commit 90155b0cb3
12 changed files with 275 additions and 50 deletions

View File

@@ -18,6 +18,7 @@ cfgrammar = "0.6"
lrlex = "0.6" lrlex = "0.6"
lrpar = "0.6" lrpar = "0.6"
regex = "*" regex = "*"
prettytable-rs = "0.8"
[build-dependencies] [build-dependencies]
cfgrammar = "0.6" cfgrammar = "0.6"

View File

@@ -2,6 +2,7 @@ use crate::{addr::*, error::*, inst::*, reg::*};
use byteorder::{ReadBytesExt, WriteBytesExt, LE}; use byteorder::{ReadBytesExt, WriteBytesExt, LE};
use std::io::Cursor; use std::io::Cursor;
#[derive(Debug, Clone)]
pub struct MemCursor<T> { pub struct MemCursor<T> {
cursor: Cursor<T>, cursor: Cursor<T>,
} }
@@ -22,6 +23,18 @@ impl<T> MemCursor<T>
self.cursor.set_position((position.into()).0) self.cursor.set_position((position.into()).0)
} }
pub fn is_end(&self) -> bool {
self.check_addr(self.position()).is_err()
}
pub fn next_bytes(&mut self, count: usize) -> Result<&[u8]> {
let start = self.position() as usize;
let end = start + count;
self.check_addr(end as u64 - 1)?;
self.cursor.set_position(end as u64);
Ok(&self.cursor.get_ref().as_ref()[start .. end])
}
pub fn next_u8_unchecked(&mut self) -> u8 { pub fn next_u8_unchecked(&mut self) -> u8 {
self.cursor.read_u8().unwrap() self.cursor.read_u8().unwrap()
} }
@@ -36,7 +49,7 @@ impl<T> MemCursor<T>
} }
pub fn next_u16(&mut self) -> Result<u16> { pub fn next_u16(&mut self) -> Result<u16> {
self.check_addr(self.position()) self.check_addr(self.position() + 1)
.map(|_| self.next_u16_unchecked()) .map(|_| self.next_u16_unchecked())
} }
@@ -45,7 +58,7 @@ impl<T> MemCursor<T>
} }
pub fn next_u32(&mut self) -> Result<u32> { pub fn next_u32(&mut self) -> Result<u32> {
self.check_addr(self.position()) self.check_addr(self.position() + 3)
.map(|_| self.next_u32_unchecked()) .map(|_| self.next_u32_unchecked())
} }
@@ -54,12 +67,12 @@ impl<T> MemCursor<T>
} }
pub fn next_u64(&mut self) -> Result<u64> { pub fn next_u64(&mut self) -> Result<u64> {
self.check_addr(self.position()) self.check_addr(self.position() + 7)
.map(|_| self.next_u64_unchecked()) .map(|_| self.next_u64_unchecked())
} }
pub fn next_addr(&mut self) -> Result<Addr> { pub fn next_addr(&mut self) -> Result<Addr> {
self.check_addr(self.position()) self.check_addr(self.position() + 7)
.map(|_| self.next_addr_unchecked()) .map(|_| self.next_addr_unchecked())
} }
@@ -68,6 +81,15 @@ impl<T> MemCursor<T>
} }
pub fn next_inst(&mut self) -> Result<Inst> { pub fn next_inst(&mut self) -> Result<Inst> {
let start = self.position();
let result = self.next_inst_inner();
if result.is_err() {
self.set_position(start);
}
result
}
fn next_inst_inner(&mut self) -> Result<Inst> {
let start = self.position(); let start = self.position();
let op = self.next_u16()?; let op = self.next_u16()?;
@@ -201,7 +223,7 @@ impl<T> MemCursor<T>
where T: AsRef<[u8]> where T: AsRef<[u8]>
{ {
fn check_addr(&self, addr: u64) -> Result<()> { fn check_addr(&self, addr: u64) -> Result<()> {
if addr > (self.cursor.get_ref().as_ref().len() as u64) { if addr >= (self.cursor.get_ref().as_ref().len() as u64) {
Err(VmError::MemOutOfBounds { addr: Addr(addr) }) Err(VmError::MemOutOfBounds { addr: Addr(addr) })
} else { } else {
Ok(()) Ok(())

View File

@@ -61,10 +61,10 @@ impl Asm for DataSection {
session.name_stack.push(names); session.name_stack.push(names);
let content_len = self.len() as u64; let content_len = self.len() as u64;
let (start, end) = match self.org { let (start, end) = match self.org {
SectionOrg::Start(start) => (start, start + (content_len as u64)), SectionOrg::Start(start) => (start, start + content_len),
SectionOrg::StartEnd(start, end) => (start, end), SectionOrg::StartEnd(start, end) => (start, end),
}; };
session.pos = Addr(start); session.pos = start;
if start > end { if start > end {
return Err(AsmError::StartGreaterThanEnd { start, end }); return Err(AsmError::StartGreaterThanEnd { start, end });
} }
@@ -78,9 +78,9 @@ impl Asm for DataSection {
} }
let mut contents = Vec::with_capacity(content_len as usize); let mut contents = Vec::with_capacity(content_len as usize);
for line in self.lines.iter() { for (pos, line) in self.lines() {
contents.extend(line.assemble(session)?); contents.extend(line.assemble(session)?);
session.pos += line.len(); session.pos = start + (pos as u64);
} }
assert_eq!( assert_eq!(
contents.len() as u64, contents.len() as u64,
@@ -88,6 +88,9 @@ impl Asm for DataSection {
"in section {}", "in section {}",
self.name self.name
); );
assert_eq!(
session.pos - start, content_len
);
session.name_stack.pop(); session.name_stack.pop();
Ok(obj::DataSection { Ok(obj::DataSection {
name: self.name.clone(), name: self.name.clone(),
@@ -299,7 +302,7 @@ impl Asm for Value {
.ok_or_else(|| AsmError::UnknownName { name: name.to_string() })?; .ok_or_else(|| AsmError::UnknownName { name: name.to_string() })?;
Ok(value.addr.0.to_le_bytes().to_vec()) Ok(value.addr.0.to_le_bytes().to_vec())
} }
Value::Here => Ok(session.pos.0.to_le_bytes().to_vec()), Value::Here => Ok(session.pos.to_le_bytes().to_vec()),
Value::Addr(v, _) => { Value::Addr(v, _) => {
if let Value::Addr(_, _) = &**v { if let Value::Addr(_, _) = &**v {
// double deref is not allowed // double deref is not allowed

View File

@@ -21,7 +21,6 @@ pub struct Name {
} }
pub fn get_section_names(section: &DataSection) -> Result<Names> { pub fn get_section_names(section: &DataSection) -> Result<Names> {
let mut pos = Addr(section.org.start());
let mut names = HashMap::new(); let mut names = HashMap::new();
let mut exports = HashSet::new(); let mut exports = HashSet::new();
@@ -39,27 +38,27 @@ pub fn get_section_names(section: &DataSection) -> Result<Names> {
// are exports whose names are undefined and we can return an UnknownExport error. // are exports whose names are undefined and we can return an UnknownExport error.
// get exported names // get exported names
for line in section.lines.iter() { for (_, line) in section.lines() {
if let DataLine::Export(name) = line { if let DataLine::Export(name) = line {
exports.insert(name); exports.insert(name);
} }
} }
// get names // get names
for line in section.lines.iter() { for (pos, line) in section.lines() {
if let DataLine::Label(name) = line { if let DataLine::Label(name) = line {
if names.contains_key(name) { if names.contains_key(name) {
return Err(AsmError::DuplicateLabel { name: name.clone() }); return Err(AsmError::DuplicateLabel { name: name.clone() });
} }
let export = exports.remove(name); let export = exports.remove(name);
let start = section.org.start();
names.insert(name.clone(), Name { names.insert(name.clone(), Name {
name: name.clone(), name: name.clone(),
addr: pos, addr: Addr(start + (pos as u64)),
export, export,
}); });
} }
pos += line.len();
} }
// all exports map 1:1 with names // all exports map 1:1 with names

View File

@@ -24,7 +24,7 @@ pub struct AsmSession {
pub (in super) include_search_paths: Vec<PathBuf>, pub (in super) include_search_paths: Vec<PathBuf>,
pub (in super) include_stack: Vec<PathBuf>, pub (in super) include_stack: Vec<PathBuf>,
pub (in super) name_stack: Vec<Names>, pub (in super) name_stack: Vec<Names>,
pub (in super) pos: Addr, pub (in super) pos: u64,
} }
impl AsmSession { impl AsmSession {

View File

@@ -0,0 +1,115 @@
use crate::{mem::MemCursor, obj::obj::*, inst::Inst};
use prettytable::{Table, row, cell};
use std::io::{self, Write as Write};
const SEP: &str = "================================================================================";
pub trait Disasm {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()>;
}
impl Disasm for Vec<u8> {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
let obj = Object::from_bytes(self.as_slice()).expect("invalid object bytes");
obj.disasm(writer)
}
}
impl Disasm for Object {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
for section in self.sections.iter() {
section.disasm(writer)?;
}
Ok(())
}
}
impl Disasm for Section {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
match self {
Section::Data(s) => s.disasm(writer),
Section::Meta(s) => s.disasm(writer),
}
}
}
// TODO :
// Instruction decoding is borked and I don't know why
// I think it has to do with the DataSection::lines() method, because that's the change that
// introduced it. The 0xdeadbeef program is only getting the value "200" in %r0 when it goes to do
// the comparison, so for some reason that value is being put in there. The disassembler is an
// attempt to make things slightly more readable while I debug.
//
// It appears that instructions are being encoded correctly (at least, as far as instructions
// themselves go) so we will have to look deeper.
impl Disasm for DataSection {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
writeln!(writer, "{}", SEP)?;
writeln!(writer, "= DATA SECTION - {}", self.name)?;
writeln!(writer, "{}", SEP)?;
let mut table = Table::new();
table.add_row(row!["Address", "Bytes", "Info"]);
let mut cursor = MemCursor::new(self.contents.as_slice());
loop {
if cursor.is_end() {
break;
}
let cursor_pos = cursor.position();
let pos = self.start + cursor_pos;
if let Ok(inst) = cursor.next_inst() {
let start = cursor_pos as usize;
let end = start + inst.len();
let data = &self.contents.as_slice()[start .. end];
table.add_row(row! [
format!("{:016x} <{}+{:x}>", pos, self.name, cursor_pos),
bytes_hex(data),
format!("{:?}", inst),
]);
} else {
let mut count = 0;
let mut lookahead = MemCursor::new(self.contents.as_slice());
lookahead.set_position(cursor_pos);
while !lookahead.is_end() && lookahead.next_inst().is_err() {
count += 1;
lookahead.next_u8_unchecked();
}
let bytes = cursor.next_bytes(count).unwrap();
table.add_row(row![
format!("{:016x} <{}+{:x}>", pos, self.name, cursor_pos),
bytes_hex(bytes),
"",
]);
}
}
table.print(writer)?;
Ok(())
}
}
impl Disasm for MetaSection {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
let mut table = Table::new();
writeln!(writer, "{}", SEP)?;
writeln!(writer, "= META SECTION")?;
writeln!(writer, "{}", SEP)?;
table.add_row(row!["Name", "Value"]);
for (name, value) in self.entries.iter() {
table.add_row(row![name, format!("0x{:016x}", value)]);
}
table.print(writer)?;
Ok(())
}
}
fn bytes_hex(bytes: &[u8]) -> String {
let mut out = String::new();
for b in bytes.iter() {
out += &format!("{:02x} ", b);
}
if !out.is_empty() {
out.pop();
}
out
}

View File

@@ -1,4 +1,5 @@
pub mod assemble; pub mod assemble;
pub mod disassemble;
pub mod error; pub mod error;
pub mod obj; pub mod obj;
pub mod syn; pub mod syn;

View File

@@ -28,23 +28,61 @@ pub struct MetaLine {
pub struct DataSection { pub struct DataSection {
pub name: String, pub name: String,
pub org: SectionOrg, pub org: SectionOrg,
pub lines: Vec<DataLine>, pub blocks: Vec<AlignedBlock>,
} }
impl DataSection { impl DataSection {
pub fn exports(&self) -> impl Iterator<Item=&str> { pub fn len(&self) -> usize {
self.lines.iter() self.blocks.iter()
.filter_map(|line| if let DataLine::Export(s) = line { .map(AlignedBlock::len)
Some(s.as_str()) .sum()
} else {
None
})
} }
pub fn len(&self) -> usize { pub fn lines<'a>(&'a self) -> DataLines<'a> {
self.lines.iter() DataLines::new(&self.blocks)
.map(DataLine::len) }
.sum() }
pub struct DataLines<'a> {
blocks: &'a Vec<AlignedBlock>,
block_idx: usize,
line_idx: usize,
pos: usize,
}
impl<'a> DataLines<'a> {
fn new(blocks: &'a Vec<AlignedBlock>) -> Self {
DataLines {
blocks,
block_idx: 0,
line_idx: 0,
pos: 0,
}
}
}
impl<'a> Iterator for DataLines<'a> {
type Item = (usize, &'a DataLine);
fn next(&mut self) -> Option<Self::Item> {
if self.block_idx >= self.blocks.len() {
return None;
}
let block = &self.blocks[self.block_idx];
if self.line_idx >= block.block.len() {
// next block - advance the position by the padding amount
self.block_idx += 1;
self.line_idx = 0;
let pos = self.pos;
self.pos += block.padding_for(pos);
self.next()
} else {
let pos = self.pos;
let line = &block.block[self.line_idx];
self.line_idx += 1;
self.pos += line.len();
Some((pos, line))
}
} }
} }
@@ -62,6 +100,26 @@ impl SectionOrg {
} }
} }
#[derive(Debug, Clone)]
pub struct AlignedBlock {
pub alignment: IntSize,
pub block: Vec<DataLine>,
}
impl AlignedBlock {
pub fn len(&self) -> usize {
let block_len = self.block.iter()
.map(DataLine::len)
.sum();
block_len + self.padding_for(block_len)
}
pub fn padding_for(&self, len: usize) -> usize {
let align = self.alignment.len();
len % align
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum DataLine { pub enum DataLine {
ValueDef(ValueDef), ValueDef(ValueDef),

View File

@@ -27,6 +27,7 @@ u64 "U64"
\.string "STR_DEF" \.string "STR_DEF"
\.zstring "ZSTR_DEF" \.zstring "ZSTR_DEF"
\.interrupt "INTERRUPT_DEF" \.interrupt "INTERRUPT_DEF"
\.align "ALIGN_DEF"
"([^"]|\\[\\nt0"'])*" "STRING" "([^"]|\\[\\nt0"'])*" "STRING"
add "ADD" add "ADD"
sub "SUB" sub "SUB"

View File

@@ -8,11 +8,11 @@ Top -> Vec<Directive>:
Directive -> Directive: Directive -> Directive:
'DIR_META' MetaBlock { Directive::Meta(MetaSection { lines: $2 }) } 'DIR_META' MetaBlock { Directive::Meta(MetaSection { lines: $2 }) }
| 'DIR_SECTION' Name SectionOrg DataBlock { | 'DIR_SECTION' Name SectionOrg 'LBRACE' DataBlocks 'RBRACE' {
Directive::Data(DataSection { Directive::Data(DataSection {
name: $2, name: $2,
org: $3, org: $3,
lines: $4, blocks: $5,
}) })
} }
| 'DIR_INCLUDE' String { Directive::Include($2) } | 'DIR_INCLUDE' String { Directive::Include($2) }
@@ -33,7 +33,22 @@ SectionOrg -> SectionOrg:
| Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) } | Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) }
; ;
DataBlock -> Vec<DataLine>: 'LBRACE' DataLines 'RBRACE' { $2 }; DataBlocks -> Vec<AlignedBlock>:
DataLines AlignedBlocks {
let front = AlignedBlock { alignment: IntSize::U8, block: $1 };
$2.insert(0, front);
$2
}
;
AlignedBlocks -> Vec<AlignedBlock>:
AlignedBlocks AlignedBlock { $1.push($2); $1 }
| { Vec::new() }
;
AlignedBlock -> AlignedBlock:
'ALIGN_DEF' IntSize DataLines { AlignedBlock { alignment: $2, block: $3 } }
;
DataLines -> Vec<DataLine>: DataLines -> Vec<DataLine>:
DataLines DataLine { $1.push($2); $1 } DataLines DataLine { $1.push($2); $1 }
@@ -55,15 +70,20 @@ ValueDef -> ValueDef:
| 'STR_DEF' String { ValueDef::String($2) } | 'STR_DEF' String { ValueDef::String($2) }
| 'ZSTR_DEF' String { ValueDef::ZString($2) } | 'ZSTR_DEF' String { ValueDef::ZString($2) }
| 'INTERRUPT_DEF' Int 'COMMA' ConstValue { ValueDef::Interrupt($2 != 0, $4) } | 'INTERRUPT_DEF' Int 'COMMA' ConstValue { ValueDef::Interrupt($2 != 0, $4) }
//| 'ALIGN_DEF' IntSize { ValueDef::Align($2) }
;
IntSize -> IntSize:
'U8' { IntSize::U8 }
| 'U16' { IntSize::U16 }
| 'U32' { IntSize::U32 }
| 'U64' { IntSize::U64 }
; ;
Value -> Value: Value -> Value:
ConstValue { $1 } ConstValue { $1 }
| 'LPAREN' Value 'RPAREN' { Value::Addr(Box::new($2), IntSize::U64) } | 'LPAREN' Value 'RPAREN' { Value::Addr(Box::new($2), IntSize::U64) }
| 'LPAREN' Value 'RPAREN' 'U8' { Value::Addr(Box::new($2), IntSize::U8) } | 'LPAREN' Value 'RPAREN' IntSize { Value::Addr(Box::new($2), $4) }
| 'LPAREN' Value 'RPAREN' 'U16' { Value::Addr(Box::new($2), IntSize::U16) }
| 'LPAREN' Value 'RPAREN' 'U32' { Value::Addr(Box::new($2), IntSize::U32) }
| 'LPAREN' Value 'RPAREN' 'U64' { Value::Addr(Box::new($2), IntSize::U64) }
//| 'LBRACKET' ArrayValues 'RBRACKET' { Value::Array($2) } //| 'LBRACKET' ArrayValues 'RBRACKET' { Value::Array($2) }
; ;

View File

@@ -166,11 +166,11 @@ impl State {
} }
/// Invoke an interrupt. /// Invoke an interrupt.
pub fn interrupt(&mut self, return_ip: u64, index: usize, aux: u64) -> Result<()> { pub fn interrupt(&mut self, return_ip: u64, index: usize, aux: u64) -> Result<u64> {
assert!(index < IVT_LENGTH, "invalid interrupt index"); assert!(index < IVT_LENGTH, "invalid interrupt index");
let interrupt = self.ivt()?[index]; let interrupt = self.ivt()?[index];
if !interrupt.enabled() { if !interrupt.enabled() {
return Ok(()); return Ok(return_ip);
} }
let fp = self.fp(); let fp = self.fp();
@@ -193,11 +193,11 @@ impl State {
self.set_reg_unchecked(R00, index as u64); self.set_reg_unchecked(R00, index as u64);
self.set_reg_unchecked(R01, aux); self.set_reg_unchecked(R01, aux);
Ok(()) Ok(self.ip())
} }
/// Exit/return from the current interrupt. /// Exit/return from the current interrupt.
pub fn exit_interrupt(&mut self) -> Result<()> { pub fn exit_interrupt(&mut self) -> Result<u64> {
let fp = self.fp(); let fp = self.fp();
let sp = fp + 48; let sp = fp + 48;
@@ -209,7 +209,7 @@ impl State {
self.pop(Dest::Reg(FLAGS))?; self.pop(Dest::Reg(FLAGS))?;
self.pop(Dest::Reg(IP))?; self.pop(Dest::Reg(IP))?;
self.pop(Dest::Reg(FP))?; self.pop(Dest::Reg(FP))?;
Ok(()) Ok(self.ip())
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@@ -247,7 +247,7 @@ impl State {
Inst::Div(d, s) => { Inst::Div(d, s) => {
let src = self.load_source(s)?; let src = self.load_source(s)?;
if src == 0 { if src == 0 {
return self.interrupt(next_ip, DIVIDE_BY_ZERO, 0); next_ip = self.interrupt(next_ip, DIVIDE_BY_ZERO, 0)?;
} else { } else {
let value = self.load_dest(d)?.wrapping_div(src); let value = self.load_dest(d)?.wrapping_div(src);
self.store_dest(d, value)?; self.store_dest(d, value)?;
@@ -359,12 +359,10 @@ impl State {
Inst::Int(v, a) => { Inst::Int(v, a) => {
let vector = self.load_source(v)?; let vector = self.load_source(v)?;
let aux = self.load_source(a)?; let aux = self.load_source(a)?;
// this method immediately jumps, so don't let the next_ip be set below next_ip = self.interrupt(next_ip, vector as usize, aux)?;
return self.interrupt(next_ip, vector as usize, aux);
} }
Inst::IRet => { Inst::IRet => {
// this method immediately jumps, so don't let the next_ip be set below next_ip = self.exit_interrupt()?;
return self.exit_interrupt();
} }
Inst::Mov(d, s) => { Inst::Mov(d, s) => {
let value = self.load_source(s)?; let value = self.load_source(s)?;

View File

@@ -75,9 +75,9 @@ struct Options {
#[structopt(short = "o", long)] #[structopt(short = "o", long)]
out: Option<PathBuf>, out: Option<PathBuf>,
/// Only run the preprocessor. /// Disassemble object that would be passed to VM and exit before running it.
#[structopt(short = "E", long)] #[structopt(short = "d", long)]
preprocess_only: bool, disassemble: bool,
/// Only compile the input file to an object. /// Only compile the input file to an object.
#[structopt(short = "c", long)] #[structopt(short = "c", long)]
@@ -108,7 +108,7 @@ fn get_writer(path: impl AsRef<Path>) -> Result<Box<dyn Write>> {
fn main() -> Result<()> { fn main() -> Result<()> {
use vm::{ use vm::{
state::State, state::State,
obj::assemble, obj::{assemble, disassemble::Disasm},
}; };
let opt = Options::from_args(); let opt = Options::from_args();
@@ -124,6 +124,13 @@ fn main() -> Result<()> {
let mut writer = get_writer(&outfile)?; let mut writer = get_writer(&outfile)?;
writer.write(&bytes)?; writer.write(&bytes)?;
Ok(()) Ok(())
} else if opt.disassemble {
let outfile = opt.out.as_ref()
.map(|p| p.as_path())
.unwrap_or_else(|| Path::new("-"));
let mut writer = get_writer(&outfile)?;
object.disasm(&mut writer)?;
Ok(())
} else { } else {
let mut state = State::new(); let mut state = State::new();
state.load_object(object, opt.max_mem.unwrap_or(DEFAULT_MAX_MEM))?; state.load_object(object, opt.max_mem.unwrap_or(DEFAULT_MAX_MEM))?;