Files
rasp/src/vm/obj/assemble.rs
2020-02-25 12:07:24 -05:00

430 lines
14 KiB
Rust

use crate::vm::{
addr::*,
inst,
obj::{obj::*, syn::ast::*},
};
use byteorder::{WriteBytesExt, LE};
use snafu::Snafu;
use std::collections::HashMap;
pub trait Assemble {
type Out;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out>;
}
#[derive(Debug, Default)]
pub struct Asm {
names: Vec<HashMap<String, Addr>>,
pos: Addr,
}
impl Asm {
/// Gets all names defined in a data section, their positions, and puts them into a hashmap.
fn gather_names(&self, section: &DataSection) -> Result<HashMap<String, Addr>> {
let mut names = HashMap::new();
let mut addr = Addr(section.org.start());
for line in section.lines.iter() {
match line {
DataLine::ValueDef(v) => addr += v.len(),
DataLine::Inst(inst) => addr += inst.len(),
DataLine::Export(_) => {}
DataLine::Label(label) => {
if let Some(_) = names.insert(label.to_string(), addr) {
return Err(AssembleError::DuplicateLabel {
name: label.to_string(),
});
}
}
}
}
assert_eq!(addr, Addr(section.org.start() + (section.len() as u64)));
Ok(names)
}
/// Gets an address value from a name, if it exists. Searches local -> global.
fn lookup_name(&self, name: &str) -> Result<Addr> {
self.names
.iter()
.rev()
.filter_map(|names| names.get(name).copied())
.next()
.ok_or_else(|| AssembleError::UnknownName {
name: name.to_string(),
})
}
}
impl Assemble for Vec<SectionDef> {
type Out = Object;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
// collect globals
let mut globals = HashMap::new();
for section in self.iter() {
let section = if let SectionDef::Data(d) = section {
d
} else {
continue;
};
let names = asm.gather_names(section)?;
for export in section.exports() {
let addr = *names
.get(export)
.ok_or_else(|| AssembleError::UnknownExport {
name: export.to_string(),
})?;
if globals.contains_key(export) {
return Err(AssembleError::DuplicateExport {
name: export.to_string(),
})?;
}
globals.insert(export.to_string(), addr);
}
}
// TODO : detect section overlap
// TODO : single meta section
asm.names.clear();
asm.names.push(globals);
let sections = self
.iter()
.map(|section| section.assemble(asm))
.collect::<Result<_>>()?;
Ok(Object {
version: OBJ_VERSION,
sections,
})
}
}
impl Assemble for SectionDef {
type Out = Section;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
match self {
SectionDef::Data(section) => section.assemble(asm),
SectionDef::Meta(section) => section.assemble(asm),
}
}
}
impl Assemble for DataSection {
type Out = Section;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
let names = asm.gather_names(self)?;
asm.names.push(names);
let section_len = self.len() as u64;
let (start, end) = match self.org {
SectionOrg::Start(start) => (start, start + (section_len as u64)),
SectionOrg::StartEnd(start, end) => (start, end),
};
asm.pos = Addr(start);
if start > end {
return Err(AssembleError::StartGreaterThanEnd { start, end });
}
let len = end - start - 1;
if len > section_len {
return Err(AssembleError::SectionTooShort {
section_end: end,
section_size: start + section_len,
});
}
let mut contents = Vec::with_capacity(section_len as usize);
for line in self.lines.iter() {
contents.extend(line.assemble(asm)?);
asm.pos += line.len();
}
assert_eq!(
contents.len() as u64,
section_len,
"in section {}",
self.name
);
asm.names.pop();
Ok(Section::Data {
start,
len: section_len,
contents,
})
}
}
impl Assemble for MetaSection {
type Out = Section;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
let mut entries = HashMap::new();
for line in self.lines.iter() {
if entries.contains_key(&line.name) {
return Err(AssembleError::DuplicateMetaName {
name: line.name.to_string(),
});
}
let value = match &line.value {
Value::Int(i) => *i,
Value::Name(s) => asm.lookup_name(s.as_str())?.0,
Value::Reg(_) | Value::Here | Value::Addr(_, _) => {
return Err(AssembleError::IllegalMetaValue {
name: line.name.to_string(),
value: line.value.clone(),
})
} // TODO :
// * deref constexpr?
// * pre-startup static init?
};
entries.insert(line.name.to_string(), value);
}
Ok(Section::Meta { entries })
}
}
impl Assemble for DataLine {
type Out = Vec<u8>;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
match self {
DataLine::ValueDef(v) => v.assemble(asm),
DataLine::Inst(i) => i.assemble(asm),
DataLine::Export(_) | DataLine::Label(_) => Ok(Vec::new()),
}
}
}
impl Assemble for ValueDef {
type Out = Vec<u8>;
fn assemble(&self, _: &mut Asm) -> Result<Self::Out> {
match self {
ValueDef::Int(x) => Ok(x.to_le_bytes().to_vec()),
ValueDef::String(s) => {
let bytes = s.bytes();
let mut out = s.len().to_le_bytes().to_vec();
out.extend(bytes);
Ok(out)
}
ValueDef::ZString(z) => {
let bytes = z.bytes();
let mut out = z.len().to_le_bytes().to_vec();
out.extend(bytes);
Ok(out)
}
}
}
}
impl Assemble for Inst {
type Out = Vec<u8>;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
let len = self.len();
macro_rules! map_inst {
($op:expr, $dest:expr, $source:expr) => {{
let mut bytes = Vec::with_capacity(len);
bytes.write_u16::<LE>($op).unwrap();
let dest = $dest;
let dest_encoding =
dest.dest_encoding()
.ok_or_else(|| AssembleError::IllegalDestValue {
value: dest.clone(),
})?;
let source = $source;
let source_encoding = source.source_encoding();
bytes
.write_u8((dest_encoding << 4) | source_encoding)
.unwrap();
bytes.extend(dest.assemble(asm)?);
bytes.extend(source.assemble(asm)?);
assert_eq!(
self.len(),
bytes.len(),
"instruction size mismatch in {} instruction - {:?} produces these bytes {:?}",
stringify!($op),
self,
bytes
);
Ok(bytes)
}};
($op:expr, $source:expr) => {{
let mut bytes = Vec::with_capacity(len);
bytes.write_u16::<LE>($op).unwrap();
let source = $source;
let source_encoding = source.source_encoding() << 4;
bytes.write_u8(source_encoding).unwrap();
bytes.extend(source.assemble(asm)?);
assert_eq!(
self.len(),
bytes.len(),
"instruction size mismatch in {} instruction - {:?} produces these bytes {:?}",
stringify!($op),
self,
bytes
);
Ok(bytes)
}};
($op:expr) => {{
let mut bytes = Vec::with_capacity(len);
bytes.write_u16::<LE>($op).unwrap();
assert_eq!(
self.len(),
bytes.len(),
"instruction size mismatch in {} instruction - {:?} produces these bytes {:?}",
stringify!($op),
self,
bytes
);
Ok(bytes)
}};
}
match self {
Inst::Add(v1, v2) => map_inst!(inst::ADD, v1, v2),
Inst::Sub(v1, v2) => map_inst!(inst::SUB, v1, v2),
Inst::Mul(v1, v2) => map_inst!(inst::MUL, v1, v2),
Inst::Div(v1, v2) => map_inst!(inst::DIV, v1, v2),
Inst::Mod(v1, v2) => map_inst!(inst::MOD, v1, v2),
Inst::And(v1, v2) => map_inst!(inst::AND, v1, v2),
Inst::Or(v1, v2) => map_inst!(inst::OR, v1, v2),
Inst::Xor(v1, v2) => map_inst!(inst::XOR, v1, v2),
Inst::Shl(v1, v2) => map_inst!(inst::SHL, v1, v2),
Inst::Shr(v1, v2) => map_inst!(inst::SHR, v1, v2),
Inst::INeg(v1, v2) => map_inst!(inst::INEG, v1, v2),
Inst::Inv(v1, v2) => map_inst!(inst::INV, v1, v2),
Inst::Not(v1, v2) => map_inst!(inst::NOT, v1, v2),
// TODO/BUG: CmpEq and CmpLt both take two sources instead of a source and destination
Inst::CmpEq(v1, v2) => map_inst!(inst::CMPEQ, v1, v2),
Inst::CmpLt(v1, v2) => map_inst!(inst::CMPLT, v1, v2),
Inst::Mov(v1, v2) => map_inst!(inst::MOV, v1, v2),
Inst::Jmp(v) => map_inst!(inst::JMP, v),
Inst::Jz(v) => map_inst!(inst::JZ, v),
Inst::Jnz(v) => map_inst!(inst::JNZ, v),
Inst::Halt => map_inst!(inst::HALT),
Inst::Nop => map_inst!(inst::NOP),
Inst::Dump => map_inst!(inst::DUMP),
}
}
}
impl Assemble for Value {
type Out = Vec<u8>;
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
match self {
Value::Int(i) => Ok(i.to_le_bytes().to_vec()),
Value::Reg(r) => Ok(vec![*r]),
Value::Name(name) => {
let value = asm.lookup_name(name.as_str())?;
Ok(value.0.to_le_bytes().to_vec())
}
Value::Here => Ok(asm.pos.0.to_le_bytes().to_vec()),
Value::Addr(v, _) => if let Value::Addr(_, _) = &**v {
// double deref is not allowed
todo!()
} else {
v.assemble(asm)
}
}
}
}
#[derive(Debug, Snafu)]
pub enum AssembleError {
#[snafu(display("unknown name: {}", name))]
UnknownName { name: String },
#[snafu(display("unknown export name: {}", name))]
UnknownExport { name: String },
#[snafu(display("duplicate label definition: {}", name))]
DuplicateLabel { name: String },
#[snafu(display("duplicate meta entry name: {}", name))]
DuplicateMetaName { name: String },
#[snafu(display("illegal meta value for entry name {}: {:?}", name, value))]
IllegalMetaValue { name: String, value: Value },
#[snafu(display("duplicate exported name: {}", name))]
DuplicateExport { name: String },
#[snafu(display("section start ({:#x}) is greater than end ({:#x})", start, end))]
StartGreaterThanEnd { start: u64, end: u64 },
#[snafu(display(
"section end ({:#x}) too short for section content size ({:#x})",
section_end,
section_size
))]
SectionTooShort { section_end: u64, section_size: u64 },
#[snafu(display("illegal instruction destination value: {:?}", value))]
IllegalDestValue { value: Value },
#[snafu(display("deref of a deref value is not allowed"))]
DoubleDeref { value: Value },
}
pub type Result<T, E = AssembleError> = std::result::Result<T, E>;
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_inst_len() {
let mut asm = Asm::default();
asm.names.push(vec![("test".to_string(), Addr(0u64))].into_iter().collect());
macro_rules! assert_len {
($inst:expr) => {{
let inst = $inst;
let asm_size = $inst.assemble(&mut asm).unwrap().len();
assert_eq!(inst.len(), asm_size, "Instruction {:?}.len() indicates it should be {} bytes long but was assembled as {} bytes", inst, inst.len(), asm_size);
}}
}
use Inst::*;
let dummy_dests = &[
Value::Reg(0),
Value::Addr(Box::new(Value::Reg(0)), IntSize::U8),
Value::Addr(Box::new(Value::Here), IntSize::U16),
Value::Addr(Box::new(Value::Name("test".to_string())), IntSize::U32),
Value::Addr(Box::new(Value::Int(0)), IntSize::U64),
];
let dummy_sources = &[
Value::Int(0),
Value::Reg(0),
Value::Name("test".to_string()),
Value::Here,
Value::Addr(Box::new(Value::Reg(0)), IntSize::U8),
Value::Addr(Box::new(Value::Here), IntSize::U16),
Value::Addr(Box::new(Value::Name("test".to_string())), IntSize::U32),
Value::Addr(Box::new(Value::Int(0)), IntSize::U32),
];
for v1 in dummy_dests {
for v2 in dummy_sources {
assert_len!(Add(v1.clone(), v2.clone()));
assert_len!(Sub(v1.clone(), v2.clone()));
assert_len!(Mul(v1.clone(), v2.clone()));
assert_len!(Div(v1.clone(), v2.clone()));
assert_len!(Mod(v1.clone(), v2.clone()));
assert_len!(And(v1.clone(), v2.clone()));
assert_len!(Or(v1.clone(), v2.clone()));
assert_len!(Xor(v1.clone(), v2.clone()));
assert_len!(Shl(v1.clone(), v2.clone()));
assert_len!(Shr(v1.clone(), v2.clone()));
assert_len!(INeg(v1.clone(), v2.clone()));
assert_len!(Inv(v1.clone(), v2.clone()));
assert_len!(Not(v1.clone(), v2.clone()));
assert_len!(Mov(v1.clone(), v2.clone()));
// TODO more length tests
}
}
}
}