Add alignment syntax, datasection pos/line iterator, disassembler

* Interrupts require functions being aligned on 64-bit boundaries.
  Alignment is now allowed via the .align <intsize> directive, aligning
  the current address to a new alignment.
* Datasection iteration doesn't require keeping track of the current
  position.
* Rudimentary disassembler for breaking down the contents of an object

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-03-07 18:01:31 -05:00
parent 1f93732a7c
commit 90155b0cb3
12 changed files with 275 additions and 50 deletions

View File

@@ -18,6 +18,7 @@ cfgrammar = "0.6"
lrlex = "0.6"
lrpar = "0.6"
regex = "*"
prettytable-rs = "0.8"
[build-dependencies]
cfgrammar = "0.6"

View File

@@ -2,6 +2,7 @@ use crate::{addr::*, error::*, inst::*, reg::*};
use byteorder::{ReadBytesExt, WriteBytesExt, LE};
use std::io::Cursor;
#[derive(Debug, Clone)]
pub struct MemCursor<T> {
cursor: Cursor<T>,
}
@@ -22,6 +23,18 @@ impl<T> MemCursor<T>
self.cursor.set_position((position.into()).0)
}
pub fn is_end(&self) -> bool {
self.check_addr(self.position()).is_err()
}
pub fn next_bytes(&mut self, count: usize) -> Result<&[u8]> {
let start = self.position() as usize;
let end = start + count;
self.check_addr(end as u64 - 1)?;
self.cursor.set_position(end as u64);
Ok(&self.cursor.get_ref().as_ref()[start .. end])
}
pub fn next_u8_unchecked(&mut self) -> u8 {
self.cursor.read_u8().unwrap()
}
@@ -36,7 +49,7 @@ impl<T> MemCursor<T>
}
pub fn next_u16(&mut self) -> Result<u16> {
self.check_addr(self.position())
self.check_addr(self.position() + 1)
.map(|_| self.next_u16_unchecked())
}
@@ -45,7 +58,7 @@ impl<T> MemCursor<T>
}
pub fn next_u32(&mut self) -> Result<u32> {
self.check_addr(self.position())
self.check_addr(self.position() + 3)
.map(|_| self.next_u32_unchecked())
}
@@ -54,12 +67,12 @@ impl<T> MemCursor<T>
}
pub fn next_u64(&mut self) -> Result<u64> {
self.check_addr(self.position())
self.check_addr(self.position() + 7)
.map(|_| self.next_u64_unchecked())
}
pub fn next_addr(&mut self) -> Result<Addr> {
self.check_addr(self.position())
self.check_addr(self.position() + 7)
.map(|_| self.next_addr_unchecked())
}
@@ -68,6 +81,15 @@ impl<T> MemCursor<T>
}
pub fn next_inst(&mut self) -> Result<Inst> {
let start = self.position();
let result = self.next_inst_inner();
if result.is_err() {
self.set_position(start);
}
result
}
fn next_inst_inner(&mut self) -> Result<Inst> {
let start = self.position();
let op = self.next_u16()?;
@@ -201,7 +223,7 @@ impl<T> MemCursor<T>
where T: AsRef<[u8]>
{
fn check_addr(&self, addr: u64) -> Result<()> {
if addr > (self.cursor.get_ref().as_ref().len() as u64) {
if addr >= (self.cursor.get_ref().as_ref().len() as u64) {
Err(VmError::MemOutOfBounds { addr: Addr(addr) })
} else {
Ok(())

View File

@@ -61,10 +61,10 @@ impl Asm for DataSection {
session.name_stack.push(names);
let content_len = self.len() as u64;
let (start, end) = match self.org {
SectionOrg::Start(start) => (start, start + (content_len as u64)),
SectionOrg::Start(start) => (start, start + content_len),
SectionOrg::StartEnd(start, end) => (start, end),
};
session.pos = Addr(start);
session.pos = start;
if start > end {
return Err(AsmError::StartGreaterThanEnd { start, end });
}
@@ -78,9 +78,9 @@ impl Asm for DataSection {
}
let mut contents = Vec::with_capacity(content_len as usize);
for line in self.lines.iter() {
for (pos, line) in self.lines() {
contents.extend(line.assemble(session)?);
session.pos += line.len();
session.pos = start + (pos as u64);
}
assert_eq!(
contents.len() as u64,
@@ -88,6 +88,9 @@ impl Asm for DataSection {
"in section {}",
self.name
);
assert_eq!(
session.pos - start, content_len
);
session.name_stack.pop();
Ok(obj::DataSection {
name: self.name.clone(),
@@ -299,7 +302,7 @@ impl Asm for Value {
.ok_or_else(|| AsmError::UnknownName { name: name.to_string() })?;
Ok(value.addr.0.to_le_bytes().to_vec())
}
Value::Here => Ok(session.pos.0.to_le_bytes().to_vec()),
Value::Here => Ok(session.pos.to_le_bytes().to_vec()),
Value::Addr(v, _) => {
if let Value::Addr(_, _) = &**v {
// double deref is not allowed

View File

@@ -21,7 +21,6 @@ pub struct Name {
}
pub fn get_section_names(section: &DataSection) -> Result<Names> {
let mut pos = Addr(section.org.start());
let mut names = HashMap::new();
let mut exports = HashSet::new();
@@ -39,27 +38,27 @@ pub fn get_section_names(section: &DataSection) -> Result<Names> {
// are exports whose names are undefined and we can return an UnknownExport error.
// get exported names
for line in section.lines.iter() {
for (_, line) in section.lines() {
if let DataLine::Export(name) = line {
exports.insert(name);
}
}
// get names
for line in section.lines.iter() {
for (pos, line) in section.lines() {
if let DataLine::Label(name) = line {
if names.contains_key(name) {
return Err(AsmError::DuplicateLabel { name: name.clone() });
}
let export = exports.remove(name);
let start = section.org.start();
names.insert(name.clone(), Name {
name: name.clone(),
addr: pos,
addr: Addr(start + (pos as u64)),
export,
});
}
pos += line.len();
}
// all exports map 1:1 with names

View File

@@ -24,7 +24,7 @@ pub struct AsmSession {
pub (in super) include_search_paths: Vec<PathBuf>,
pub (in super) include_stack: Vec<PathBuf>,
pub (in super) name_stack: Vec<Names>,
pub (in super) pos: Addr,
pub (in super) pos: u64,
}
impl AsmSession {

View File

@@ -0,0 +1,115 @@
use crate::{mem::MemCursor, obj::obj::*, inst::Inst};
use prettytable::{Table, row, cell};
use std::io::{self, Write as Write};
const SEP: &str = "================================================================================";
pub trait Disasm {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()>;
}
impl Disasm for Vec<u8> {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
let obj = Object::from_bytes(self.as_slice()).expect("invalid object bytes");
obj.disasm(writer)
}
}
impl Disasm for Object {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
for section in self.sections.iter() {
section.disasm(writer)?;
}
Ok(())
}
}
impl Disasm for Section {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
match self {
Section::Data(s) => s.disasm(writer),
Section::Meta(s) => s.disasm(writer),
}
}
}
// TODO :
// Instruction decoding is borked and I don't know why
// I think it has to do with the DataSection::lines() method, because that's the change that
// introduced it. The 0xdeadbeef program is only getting the value "200" in %r0 when it goes to do
// the comparison, so for some reason that value is being put in there. The disassembler is an
// attempt to make things slightly more readable while I debug.
//
// It appears that instructions are being encoded correctly (at least, as far as instructions
// themselves go) so we will have to look deeper.
impl Disasm for DataSection {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
writeln!(writer, "{}", SEP)?;
writeln!(writer, "= DATA SECTION - {}", self.name)?;
writeln!(writer, "{}", SEP)?;
let mut table = Table::new();
table.add_row(row!["Address", "Bytes", "Info"]);
let mut cursor = MemCursor::new(self.contents.as_slice());
loop {
if cursor.is_end() {
break;
}
let cursor_pos = cursor.position();
let pos = self.start + cursor_pos;
if let Ok(inst) = cursor.next_inst() {
let start = cursor_pos as usize;
let end = start + inst.len();
let data = &self.contents.as_slice()[start .. end];
table.add_row(row! [
format!("{:016x} <{}+{:x}>", pos, self.name, cursor_pos),
bytes_hex(data),
format!("{:?}", inst),
]);
} else {
let mut count = 0;
let mut lookahead = MemCursor::new(self.contents.as_slice());
lookahead.set_position(cursor_pos);
while !lookahead.is_end() && lookahead.next_inst().is_err() {
count += 1;
lookahead.next_u8_unchecked();
}
let bytes = cursor.next_bytes(count).unwrap();
table.add_row(row![
format!("{:016x} <{}+{:x}>", pos, self.name, cursor_pos),
bytes_hex(bytes),
"",
]);
}
}
table.print(writer)?;
Ok(())
}
}
impl Disasm for MetaSection {
fn disasm(&self, writer: &mut dyn Write) -> io::Result<()> {
let mut table = Table::new();
writeln!(writer, "{}", SEP)?;
writeln!(writer, "= META SECTION")?;
writeln!(writer, "{}", SEP)?;
table.add_row(row!["Name", "Value"]);
for (name, value) in self.entries.iter() {
table.add_row(row![name, format!("0x{:016x}", value)]);
}
table.print(writer)?;
Ok(())
}
}
fn bytes_hex(bytes: &[u8]) -> String {
let mut out = String::new();
for b in bytes.iter() {
out += &format!("{:02x} ", b);
}
if !out.is_empty() {
out.pop();
}
out
}

View File

@@ -1,4 +1,5 @@
pub mod assemble;
pub mod disassemble;
pub mod error;
pub mod obj;
pub mod syn;

View File

@@ -28,23 +28,61 @@ pub struct MetaLine {
pub struct DataSection {
pub name: String,
pub org: SectionOrg,
pub lines: Vec<DataLine>,
pub blocks: Vec<AlignedBlock>,
}
impl DataSection {
pub fn exports(&self) -> impl Iterator<Item=&str> {
self.lines.iter()
.filter_map(|line| if let DataLine::Export(s) = line {
Some(s.as_str())
} else {
None
})
pub fn len(&self) -> usize {
self.blocks.iter()
.map(AlignedBlock::len)
.sum()
}
pub fn len(&self) -> usize {
self.lines.iter()
.map(DataLine::len)
.sum()
pub fn lines<'a>(&'a self) -> DataLines<'a> {
DataLines::new(&self.blocks)
}
}
pub struct DataLines<'a> {
blocks: &'a Vec<AlignedBlock>,
block_idx: usize,
line_idx: usize,
pos: usize,
}
impl<'a> DataLines<'a> {
fn new(blocks: &'a Vec<AlignedBlock>) -> Self {
DataLines {
blocks,
block_idx: 0,
line_idx: 0,
pos: 0,
}
}
}
impl<'a> Iterator for DataLines<'a> {
type Item = (usize, &'a DataLine);
fn next(&mut self) -> Option<Self::Item> {
if self.block_idx >= self.blocks.len() {
return None;
}
let block = &self.blocks[self.block_idx];
if self.line_idx >= block.block.len() {
// next block - advance the position by the padding amount
self.block_idx += 1;
self.line_idx = 0;
let pos = self.pos;
self.pos += block.padding_for(pos);
self.next()
} else {
let pos = self.pos;
let line = &block.block[self.line_idx];
self.line_idx += 1;
self.pos += line.len();
Some((pos, line))
}
}
}
@@ -62,6 +100,26 @@ impl SectionOrg {
}
}
#[derive(Debug, Clone)]
pub struct AlignedBlock {
pub alignment: IntSize,
pub block: Vec<DataLine>,
}
impl AlignedBlock {
pub fn len(&self) -> usize {
let block_len = self.block.iter()
.map(DataLine::len)
.sum();
block_len + self.padding_for(block_len)
}
pub fn padding_for(&self, len: usize) -> usize {
let align = self.alignment.len();
len % align
}
}
#[derive(Debug, Clone)]
pub enum DataLine {
ValueDef(ValueDef),

View File

@@ -27,6 +27,7 @@ u64 "U64"
\.string "STR_DEF"
\.zstring "ZSTR_DEF"
\.interrupt "INTERRUPT_DEF"
\.align "ALIGN_DEF"
"([^"]|\\[\\nt0"'])*" "STRING"
add "ADD"
sub "SUB"

View File

@@ -8,11 +8,11 @@ Top -> Vec<Directive>:
Directive -> Directive:
'DIR_META' MetaBlock { Directive::Meta(MetaSection { lines: $2 }) }
| 'DIR_SECTION' Name SectionOrg DataBlock {
| 'DIR_SECTION' Name SectionOrg 'LBRACE' DataBlocks 'RBRACE' {
Directive::Data(DataSection {
name: $2,
org: $3,
lines: $4,
blocks: $5,
})
}
| 'DIR_INCLUDE' String { Directive::Include($2) }
@@ -33,7 +33,22 @@ SectionOrg -> SectionOrg:
| Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) }
;
DataBlock -> Vec<DataLine>: 'LBRACE' DataLines 'RBRACE' { $2 };
DataBlocks -> Vec<AlignedBlock>:
DataLines AlignedBlocks {
let front = AlignedBlock { alignment: IntSize::U8, block: $1 };
$2.insert(0, front);
$2
}
;
AlignedBlocks -> Vec<AlignedBlock>:
AlignedBlocks AlignedBlock { $1.push($2); $1 }
| { Vec::new() }
;
AlignedBlock -> AlignedBlock:
'ALIGN_DEF' IntSize DataLines { AlignedBlock { alignment: $2, block: $3 } }
;
DataLines -> Vec<DataLine>:
DataLines DataLine { $1.push($2); $1 }
@@ -55,15 +70,20 @@ ValueDef -> ValueDef:
| 'STR_DEF' String { ValueDef::String($2) }
| 'ZSTR_DEF' String { ValueDef::ZString($2) }
| 'INTERRUPT_DEF' Int 'COMMA' ConstValue { ValueDef::Interrupt($2 != 0, $4) }
//| 'ALIGN_DEF' IntSize { ValueDef::Align($2) }
;
IntSize -> IntSize:
'U8' { IntSize::U8 }
| 'U16' { IntSize::U16 }
| 'U32' { IntSize::U32 }
| 'U64' { IntSize::U64 }
;
Value -> Value:
ConstValue { $1 }
| 'LPAREN' Value 'RPAREN' { Value::Addr(Box::new($2), IntSize::U64) }
| 'LPAREN' Value 'RPAREN' 'U8' { Value::Addr(Box::new($2), IntSize::U8) }
| 'LPAREN' Value 'RPAREN' 'U16' { Value::Addr(Box::new($2), IntSize::U16) }
| 'LPAREN' Value 'RPAREN' 'U32' { Value::Addr(Box::new($2), IntSize::U32) }
| 'LPAREN' Value 'RPAREN' 'U64' { Value::Addr(Box::new($2), IntSize::U64) }
| 'LPAREN' Value 'RPAREN' IntSize { Value::Addr(Box::new($2), $4) }
//| 'LBRACKET' ArrayValues 'RBRACKET' { Value::Array($2) }
;

View File

@@ -166,11 +166,11 @@ impl State {
}
/// Invoke an interrupt.
pub fn interrupt(&mut self, return_ip: u64, index: usize, aux: u64) -> Result<()> {
pub fn interrupt(&mut self, return_ip: u64, index: usize, aux: u64) -> Result<u64> {
assert!(index < IVT_LENGTH, "invalid interrupt index");
let interrupt = self.ivt()?[index];
if !interrupt.enabled() {
return Ok(());
return Ok(return_ip);
}
let fp = self.fp();
@@ -193,11 +193,11 @@ impl State {
self.set_reg_unchecked(R00, index as u64);
self.set_reg_unchecked(R01, aux);
Ok(())
Ok(self.ip())
}
/// Exit/return from the current interrupt.
pub fn exit_interrupt(&mut self) -> Result<()> {
pub fn exit_interrupt(&mut self) -> Result<u64> {
let fp = self.fp();
let sp = fp + 48;
@@ -209,7 +209,7 @@ impl State {
self.pop(Dest::Reg(FLAGS))?;
self.pop(Dest::Reg(IP))?;
self.pop(Dest::Reg(FP))?;
Ok(())
Ok(self.ip())
}
////////////////////////////////////////////////////////////////////////////////
@@ -247,7 +247,7 @@ impl State {
Inst::Div(d, s) => {
let src = self.load_source(s)?;
if src == 0 {
return self.interrupt(next_ip, DIVIDE_BY_ZERO, 0);
next_ip = self.interrupt(next_ip, DIVIDE_BY_ZERO, 0)?;
} else {
let value = self.load_dest(d)?.wrapping_div(src);
self.store_dest(d, value)?;
@@ -359,12 +359,10 @@ impl State {
Inst::Int(v, a) => {
let vector = self.load_source(v)?;
let aux = self.load_source(a)?;
// this method immediately jumps, so don't let the next_ip be set below
return self.interrupt(next_ip, vector as usize, aux);
next_ip = self.interrupt(next_ip, vector as usize, aux)?;
}
Inst::IRet => {
// this method immediately jumps, so don't let the next_ip be set below
return self.exit_interrupt();
next_ip = self.exit_interrupt()?;
}
Inst::Mov(d, s) => {
let value = self.load_source(s)?;

View File

@@ -75,9 +75,9 @@ struct Options {
#[structopt(short = "o", long)]
out: Option<PathBuf>,
/// Only run the preprocessor.
#[structopt(short = "E", long)]
preprocess_only: bool,
/// Disassemble object that would be passed to VM and exit before running it.
#[structopt(short = "d", long)]
disassemble: bool,
/// Only compile the input file to an object.
#[structopt(short = "c", long)]
@@ -108,7 +108,7 @@ fn get_writer(path: impl AsRef<Path>) -> Result<Box<dyn Write>> {
fn main() -> Result<()> {
use vm::{
state::State,
obj::assemble,
obj::{assemble, disassemble::Disasm},
};
let opt = Options::from_args();
@@ -124,6 +124,13 @@ fn main() -> Result<()> {
let mut writer = get_writer(&outfile)?;
writer.write(&bytes)?;
Ok(())
} else if opt.disassemble {
let outfile = opt.out.as_ref()
.map(|p| p.as_path())
.unwrap_or_else(|| Path::new("-"));
let mut writer = get_writer(&outfile)?;
object.disasm(&mut writer)?;
Ok(())
} else {
let mut state = State::new();
state.load_object(object, opt.max_mem.unwrap_or(DEFAULT_MAX_MEM))?;