Finish up parser and assembler with more-or-less complete syntax

Major changes inlude:

* Bit the bullet and now instructions have their length hard-coded
* Move from_utf8 object parsing to be done by their objects (instead of
  a Parser god object)
* A list of AST sections are assembled into an Object using the new
  vm::obj::assemble module.
* Changed the object layout some in the spec, and adjusted code to match
  this.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-02-09 13:04:56 -05:00
parent 329e61e087
commit e198da5825
19 changed files with 739 additions and 311 deletions

View File

@@ -0,0 +1,19 @@
use snafu::Snafu;
use std::{
fmt::Debug,
io,
};
#[derive(Debug, Snafu)]
pub enum AssembleError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
}
pub type Result<T, E = AssembleError> = std::result::Result<T, E>;

View File

344
src/vm/obj/assemble/mod.rs Normal file
View File

@@ -0,0 +1,344 @@
pub mod error;
use crate::vm::{
inst::*,
obj::{assemble::error::*, obj::*, syn::ast::*},
reg::Reg,
};
use byteorder::{WriteBytesExt, LE};
use std::{
collections::{HashMap, HashSet},
convert::TryFrom,
io::Cursor,
mem,
};
pub const LAYOUT_VERSION: u32 = 0;
impl TryFrom<&'_ Vec<SectionBlock>> for Object {
type Error = AssembleError;
fn try_from(other: &Vec<SectionBlock>) -> Result<Self, Self::Error> {
// Assemble an AST to an object
Assemble::new(&other).assemble()
}
}
pub struct Assemble<'a> {
ast: &'a Vec<SectionBlock>,
symbols: SymbolTable,
}
impl<'a> Assemble<'a> {
pub fn new(ast: &'a Vec<SectionBlock>) -> Self {
Assemble {
ast,
symbols: Default::default(),
}
}
pub fn assemble(&mut self) -> Result<Object> {
let mut pos = 0;
let mut sections = Vec::new();
// gather global symbols
for block in self.ast.iter() {
let exports = Self::gather_symbols(block, true)?;
// check if there are any duplicated exports
{
let export_keys = exports.keys().collect::<HashSet<&String>>();
let global_keys = self.symbols.globals().keys().collect::<HashSet<&String>>();
if let Some(key) = export_keys.intersection(&global_keys).next() {
return Err(AssembleError::DuplicateExportName {
name: key.to_string(),
});
}
}
self.symbols.globals_mut().extend(exports);
}
for block in self.ast.iter() {
let locals = Self::gather_symbols(block, false)?;
self.symbols.replace_locals(locals);
match block {
SectionBlock::Data { org, body } | SectionBlock::Code { org, body } => {
let mut bytes = Vec::new();
for line in body {
match line {
Line::Inst(inst) => {
bytes.extend(self.assemble_inst(inst));
}
Line::LabelDef(_) => { /* no-op */ }
Line::ImmValue(value) => {
let value =
self.get_value(value).expect("TODO : value label not found");
bytes.extend(&value.to_le_bytes());
}
Line::Export(_) => { /* no-op */ }
}
}
let (start, end) = match org {
Some(SectionOrg::Start(start)) => (*start, start + bytes.len() as u64),
Some(SectionOrg::Range(start, end)) => (*start, *end),
None => (pos, pos + bytes.len() as u64),
};
pos = end;
let section = match block {
SectionBlock::Data { .. } => Section::Data {
start,
end,
contents: bytes,
},
SectionBlock::Code { .. } => Section::Code {
start,
end,
contents: bytes,
},
SectionBlock::Meta { .. } => unreachable!(),
};
sections.push(section);
}
SectionBlock::Meta { entries } => {
let entries = entries
.iter()
.map(|(name, value)| {
(
name.to_string(),
self.get_value(value).expect("TODO : value label not found"),
)
})
.collect();
sections.push(Section::Meta { entries });
}
}
}
Ok(Object {
version: LAYOUT_VERSION,
sections,
})
}
fn gather_symbols(block: &SectionBlock, export: bool) -> Result<HashMap<String, u64>> {
match block {
SectionBlock::Data { body, .. } | SectionBlock::Code { body, .. } => {
let mut exports = HashSet::new();
let mut labels = HashMap::new();
let mut pos = 0;
for line in body.iter() {
match line {
Line::Inst(inst) => {
pos += inst.len();
}
Line::LabelDef(label) => {
if labels.contains_key(label) {
return Err(AssembleError::DuplicateName {
name: label.to_string(),
});
} else {
labels.insert(label.to_string(), pos as u64);
}
}
Line::ImmValue(_) => {
pos += 8;
}
Line::Export(name) => {
if export {
exports.insert(name);
}
}
}
}
// only return exports if specified
if export {
labels.retain(|k, _| exports.contains(k));
}
Ok(labels)
}
SectionBlock::Meta { .. } => Ok(Default::default()),
}
}
fn get_value(&self, value: &ImmValue) -> Option<u64> {
match value {
ImmValue::Number(n) => Some(*n),
ImmValue::Label(s) => self.symbols.get(s),
}
}
fn assemble_inst(&self, inst: &Inst) -> Vec<u8> {
let mut builder = InstBuilder::default();
builder = match inst {
Inst::Add(r1, r2) => builder.op(ADD).r1(*r1).r2(*r2),
Inst::Mul(r1, r2) => builder.op(MUL).r1(*r1).r2(*r2),
Inst::Div(r1, r2) => builder.op(DIV).r1(*r1).r2(*r2),
Inst::Mod(r1, r2) => builder.op(MOD).r1(*r1).r2(*r2),
Inst::INeg(r1) => builder.op(INEG).r1(*r1),
Inst::And(r1, r2) => builder.op(AND).r1(*r1).r2(*r2),
Inst::Or(r1, r2) => builder.op(OR).r1(*r1).r2(*r2),
Inst::Inv(r1) => builder.op(INV).r1(*r1),
Inst::Not(r1) => builder.op(NOT).r1(*r1),
Inst::Xor(r1, r2) => builder.op(XOR).r1(*r1).r2(*r2),
Inst::Shl(r1, r2) => builder.op(SHL).r1(*r1).r2(*r2),
Inst::Shr(r1, r2) => builder.op(SHR).r1(*r1).r2(*r2),
Inst::CmpEq(r1, r2) => builder.op(CMPEQ).r1(*r1).r2(*r2),
Inst::CmpLt(r1, r2) => builder.op(CMPLT).r1(*r1).r2(*r2),
Inst::Jmp(r1) => builder.op(JMP).r1(*r1),
Inst::Jz(r1) => builder.op(JZ).r1(*r1),
Inst::Jnz(r1) => builder.op(JNZ).r1(*r1),
Inst::Load(r1, r2) => builder.op(LOAD).r1(*r1).r2(*r2),
Inst::Store(r1, r2) => builder.op(STORE).r1(*r1).r2(*r2),
Inst::StoreImm(r1, imm) => {
let imm = match imm {
ImmValue::Number(num) => *num,
ImmValue::Label(name) => {
self.symbols.get(name).expect("TODO: value label not found")
}
};
if imm <= (u32::max_value() as u64) {
builder.op(STOREIMM32).r1(*r1).imm32(imm as u32)
} else {
builder.op(STOREIMM64).r1(*r1).imm64(imm)
}
}
Inst::MemCopy(r1, r2) => builder.op(MEMCOPY).r1(*r1).r2(*r2),
Inst::RegCopy(r1, r2) => builder.op(REGCOPY).r1(*r1).r2(*r2),
Inst::Nop => builder.op(NOP),
Inst::Halt => builder.op(HALT),
};
builder.finish()
}
}
#[derive(Debug, Default)]
struct InstBuilder {
op: Option<InstOp>,
r1: Option<Reg>,
r2: Option<Reg>,
imm32: Option<u32>,
imm64: Option<u64>,
}
impl InstBuilder {
fn op(mut self, op: InstOp) -> Self {
self.op = Some(op);
self
}
fn r1(mut self, r1: Reg) -> Self {
self.r1 = Some(r1);
self
}
fn r2(mut self, r2: Reg) -> Self {
self.r2 = Some(r2);
self
}
fn imm32(mut self, imm32: u32) -> Self {
self.imm32 = Some(imm32);
self
}
fn imm64(mut self, imm64: u64) -> Self {
self.imm64 = Some(imm64);
self
}
fn finish(self) -> Vec<u8> {
let mut cursor = Cursor::new(Vec::new());
let InstBuilder {
op,
r1,
r2,
imm32,
imm64,
} = self;
let op = op.expect("no op specified");
cursor.write_u16::<LE>(op).unwrap();
match (r1, r2, imm32, imm64) {
(Some(r1), Some(r2), None, None) => {
let tail = ((r1 as u16) << 10) | ((r2 as u16) << 4);
cursor.write_u16::<LE>(tail).unwrap();
}
(Some(r1), None, None, None) => {
let tail = (r1 as u16) << 10;
cursor.write_u16::<LE>(tail).unwrap();
}
(Some(r1), None, Some(imm32), None) => {
let tail = (r1 as u16) << 10;
cursor.write_u16::<LE>(tail).unwrap();
cursor.write_u32::<LE>(imm32).unwrap();
}
(Some(r1), None, None, Some(imm64)) => {
let tail = (r1 as u16) << 10;
cursor.write_u16::<LE>(tail).unwrap();
cursor.write_u32::<LE>(0).unwrap();
cursor.write_u64::<LE>(imm64).unwrap();
}
(_, _, _, _) if op == HALT || op == NOP => {}
(_, _, _, _) => {
panic!(
r#"invalid instruction combo for opcode 0x{:04x}:
r1 : {:?}
r2 : {:?}
imm32 : {:?}
imm64 : {:?}"#,
op, r1, r2, imm32, imm64
);
}
}
cursor.into_inner()
}
}
#[derive(Debug, Clone, Default)]
struct SymbolTable {
globals: HashMap<String, u64>,
locals: HashMap<String, u64>,
}
impl SymbolTable {
pub fn new() -> Self {
Default::default()
}
pub fn globals(&self) -> &HashMap<String, u64> {
&self.globals
}
pub fn locals(&self) -> &HashMap<String, u64> {
&self.locals
}
pub fn globals_mut(&mut self) -> &mut HashMap<String, u64> {
&mut self.globals
}
pub fn locals_mut(&mut self) -> &mut HashMap<String, u64> {
&mut self.locals
}
pub fn insert_global(&mut self, name: String, value: u64) -> Option<u64> {
self.globals_mut().insert(name, value)
}
pub fn insert_local(&mut self, name: String, value: u64) -> Option<u64> {
self.locals_mut().insert(name, value)
}
pub fn replace_locals(&mut self, locals: HashMap<String, u64>) -> HashMap<String, u64> {
mem::replace(self.locals_mut(), locals)
}
pub fn get(&self, name: &String) -> Option<u64> {
self.locals
.get(name)
.or_else(|| self.globals.get(name))
.copied()
}
}