diff --git a/src/main.rs b/src/main.rs index 882d1ad..f820a7c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,14 +21,29 @@ fn get_input_string() -> io::Result { } fn main() -> Result<()> { - use vm::obj::syn::{lexer, parser}; + use vm::{ + state::State, + obj::{ + assemble::{Asm, Assemble}, + syn::{lexer, parser}, + } + }; let text = get_input_string()?; let lexerdef = lexer::lexerdef(); let lexer = lexerdef.lexer(&text); let (res, errors) = parser::parse(&lexer); - for err in errors { + for err in errors.iter() { println!("{}", err.pp(&lexer, &parser::token_epp)); } - println!("{:?}", res); + if !errors.is_empty() { + return Ok(()); + } + let res = res.unwrap(); + let mut asm = Asm::default(); + let object = res.assemble(&mut asm)?; + let mut state = State::new(); + state.load_object(object, 64 * 1024 * 1024)?; + let status = state.exec()?; + println!("exit status: {}", status); Ok(()) } diff --git a/src/vm/addr.rs b/src/vm/addr.rs index 0b102aa..e13ab95 100644 --- a/src/vm/addr.rs +++ b/src/vm/addr.rs @@ -4,7 +4,7 @@ use std::{ ops::{Add, AddAssign}, }; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Addr(pub u64); impl LowerHex for Addr { diff --git a/src/vm/error.rs b/src/vm/error.rs index 9d5db5a..3eeb6fd 100644 --- a/src/vm/error.rs +++ b/src/vm/error.rs @@ -5,14 +5,21 @@ use snafu::Snafu; pub enum VmError { #[snafu(display("illegal register: 0x{:02x}", reg))] IllegalReg { reg: Reg }, + #[snafu(display("memory address out of bounds: 0x{:016x}", addr))] MemOutOfBounds { addr: Addr }, + #[snafu(display("illegal instruction opcode: 0x{:04x}", op))] IllegalOp { op: InstOp }, + #[snafu(display("illegal destination specification: 0b{:08b}", spec))] IllegalDestSpec { spec: u8 }, + #[snafu(display("illegal source specification: 0b{:08b}", spec))] IllegalSourceSpec { spec: u8 }, + + #[snafu(display("object to load spans too much memory"))] + ObjectTooLarge { object_size: usize, max_mem: usize }, } pub type Result = std::result::Result; diff --git a/src/vm/inst.rs b/src/vm/inst.rs index 309f6c6..13539e3 100644 --- a/src/vm/inst.rs +++ b/src/vm/inst.rs @@ -46,6 +46,7 @@ instructions! { DUMP = 0xF002, } +#[derive(Debug, Clone, Copy)] pub enum Inst { Add(Dest, Source), Sub(Dest, Source), @@ -98,6 +99,33 @@ impl Inst { Inst::Dump => DUMP, } } + + pub fn len(&self) -> usize { + match self { + Inst::Add(dest, source) + | Inst::Sub(dest, source) + | Inst::Mul(dest, source) + | Inst::Div(dest, source) + | Inst::Mod(dest, source) + | Inst::And(dest, source) + | Inst::Or(dest, source) + | Inst::Xor(dest, source) + | Inst::Shl(dest, source) + | Inst::Shr(dest, source) + | Inst::INeg(dest, source) + | Inst::Inv(dest, source) + | Inst::Not(dest, source) + | Inst::Mov(dest, source) => { 3 + dest.len() + source.len() } + Inst::CmpEq(s1, s2) + | Inst::CmpLt(s1, s2) => { 3 + s1.len() + s2.len() } + Inst::Jmp(v) + | Inst::Jz(v) + | Inst::Jnz(v) => { 3 + v.len() } + Inst::Halt + | Inst::Nop + | Inst::Dump => { 2 } + } + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -110,6 +138,16 @@ pub enum Source { Imm(u64), } +impl Source { + pub fn len(&self) -> usize { + match self { + Source::Addr64(_) | Source::Addr32(_) | Source::Addr16(_) | Source::Addr8(_) => 8, + Source::Reg(_) => 1, + Source::Imm(_) => 8, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Dest { Addr64(Addr), @@ -119,6 +157,15 @@ pub enum Dest { Reg(Reg), } +impl Dest { + pub fn len(&self) -> usize { + match self { + Dest::Addr64(_) | Dest::Addr32(_) | Dest::Addr16(_) | Dest::Addr8(_) => 8, + Dest::Reg(_) => 1, + } + } +} + pub const DEST_ADDR64: u8 = 0b0000; pub const DEST_ADDR32: u8 = 0b0001; pub const DEST_ADDR16: u8 = 0b0010; diff --git a/src/vm/mem.rs b/src/vm/mem.rs index 9071a58..79fcb15 100644 --- a/src/vm/mem.rs +++ b/src/vm/mem.rs @@ -1,78 +1,65 @@ use crate::vm::{addr::*, error::*, inst::*, reg::*}; -use std::{convert::TryInto, ops::Index, mem}; +use byteorder::{ReadBytesExt, WriteBytesExt, LE}; +use std::io::Cursor; pub struct MemCursor { - pos: Addr, - mem: T, + cursor: Cursor, } impl MemCursor - where T: AsRef<[u8]> + where Cursor: ReadBytesExt, + T: AsRef<[u8]> { pub fn new(mem: T) -> Self { - MemCursor { pos: Addr(0), mem } + MemCursor { cursor: Cursor::new(mem) } } - pub fn position(&self) -> Addr { - self.pos + pub fn position(&self) -> u64 { + self.cursor.position() } pub fn set_position>(&mut self, position: P) { - self.pos = position.into(); + self.cursor.set_position((position.into()).0) } pub fn next_u8_unchecked(&mut self) -> u8 { - let val = self[self.pos]; - self.pos += 1u64; - val + self.cursor.read_u8().unwrap() } pub fn next_u8(&mut self) -> Result { - self.check_addr(self.pos) + self.check_addr(self.position()) .map(|_| self.next_u8_unchecked()) } pub fn next_u16_unchecked(&mut self) -> u16 { - let (int_bytes, _) = self.mem.as_ref() - .split_at(mem::size_of::()); - let val = u16::from_le_bytes(int_bytes.try_into().unwrap()); - self.pos += 2u64; - val + self.cursor.read_u16::().unwrap() } pub fn next_u16(&mut self) -> Result { - self.check_addr(self.pos) + self.check_addr(self.position()) .map(|_| self.next_u16_unchecked()) } pub fn next_u32_unchecked(&mut self) -> u32 { - let (int_bytes, _) = self.mem.as_ref() - .split_at(mem::size_of::()); - let val = u32::from_le_bytes(int_bytes.try_into().unwrap()); - self.pos += 4u64; - val + self.cursor.read_u32::().unwrap() } pub fn next_u32(&mut self) -> Result { - self.check_addr(self.pos) + self.check_addr(self.position()) .map(|_| self.next_u32_unchecked()) } pub fn next_u64_unchecked(&mut self) -> u64 { - let (int_bytes, _) = self.mem.as_ref() - .split_at(mem::size_of::()); - let val = u64::from_le_bytes(int_bytes.try_into().unwrap()); - self.pos += 8u64; - val + self.cursor.read_u64::().unwrap() } pub fn next_u64(&mut self) -> Result { - self.check_addr(self.pos) + self.check_addr(self.position()) .map(|_| self.next_u64_unchecked()) } pub fn next_addr(&mut self) -> Result { - self.check_addr(self.pos) + self.check_addr(self.position()) .map(|_| self.next_addr_unchecked()) } @@ -81,6 +68,7 @@ impl MemCursor } pub fn next_inst(&mut self) -> Result { + let start = self.position(); let op = self.next_u16()?; macro_rules! dest_source { @@ -102,7 +90,7 @@ impl MemCursor Ok(Inst::$variant(source)) }}; } - match op { + let inst = match op { ADD => dest_source!(Add), SUB => dest_source!(Sub), MUL => dest_source!(Mul), @@ -126,7 +114,11 @@ impl MemCursor NOP => Ok(Inst::Nop), DUMP => Ok(Inst::Dump), _ => Err(VmError::IllegalOp { op }), - } + }?; + let end = self.position(); + let len = (end - start) as usize; + assert_eq!(len, inst.len()); + Ok(inst) } fn next_source_source(&mut self) -> Result<(Source, Source)> { @@ -181,16 +173,62 @@ impl MemCursor Ok(reg) } } +} - fn check_addr(&self, addr: Addr) -> Result<()> { - if addr > self.mem.as_ref().len() { - Err(VmError::MemOutOfBounds { addr }) +impl MemCursor + where T: AsRef<[u8]> +{ + fn check_addr(&self, addr: u64) -> Result<()> { + if addr > (self.cursor.get_ref().as_ref().len() as u64) { + Err(VmError::MemOutOfBounds { addr: Addr(addr) }) } else { Ok(()) } } } +impl MemCursor + where Cursor: WriteBytesExt, + T: AsRef<[u8]> +{ + pub fn write_u8_unchecked(&mut self, value: u8) { + self.cursor.write_u8(value).unwrap(); + } + + pub fn write_u8(&mut self, value: u8) -> Result<()> { + self.check_addr(self.position()) + .map(|_| self.write_u8_unchecked(value)) + } + + pub fn write_u16_unchecked(&mut self, value: u16) { + self.cursor.write_u16::(value).unwrap(); + } + + pub fn write_u16(&mut self, value: u16) -> Result<()> { + self.check_addr(self.position()) + .map(|_| self.write_u16_unchecked(value)) + } + + pub fn write_u32_unchecked(&mut self, value: u32) { + self.cursor.write_u32::(value).unwrap(); + } + + pub fn write_u32(&mut self, value: u32) -> Result<()> { + self.check_addr(self.position()) + .map(|_| self.write_u32_unchecked(value)) + } + + pub fn write_u64_unchecked(&mut self, value: u64) { + self.cursor.write_u64::(value).unwrap(); + } + + pub fn write_u64(&mut self, value: u64) -> Result<()> { + self.check_addr(self.position()) + .map(|_| self.write_u64_unchecked(value)) + } +} + +/* //////////////////////////////////////////////////////////////////////////////// // Index impl //////////////////////////////////////////////////////////////////////////////// @@ -217,3 +255,4 @@ impl> Index for MemCursor { self.index(addr.0) } } +*/ diff --git a/src/vm/obj/assemble.rs b/src/vm/obj/assemble.rs new file mode 100644 index 0000000..b59a352 --- /dev/null +++ b/src/vm/obj/assemble.rs @@ -0,0 +1,325 @@ +use crate::vm::{ + addr::*, + inst, + obj::{obj::*, syn::ast::*}, +}; +use byteorder::{WriteBytesExt, LE}; +use snafu::Snafu; +use std::collections::HashMap; + +pub trait Assemble { + type Out; + fn assemble(&self, asm: &mut Asm) -> Result; +} + +#[derive(Debug, Default)] +pub struct Asm { + names: Vec>, + pos: Addr, +} + +impl Asm { + /// Gets all names defined in a data section, their positions, and puts them into a hashmap. + fn gather_names(&self, section: &DataSection) -> Result> { + let mut names = HashMap::new(); + let mut addr = Addr(0); + for line in section.lines.iter() { + match line { + DataLine::ValueDef(v) => addr += v.len(), + DataLine::Inst(inst) => addr += inst.len(), + DataLine::Export(_) => {} + DataLine::Label(label) => { + if let Some(_) = names.insert(label.to_string(), addr) { + return Err(AssembleError::DuplicateLabel { + name: label.to_string(), + }); + } + } + } + } + assert_eq!(addr, section.len()); + Ok(names) + } + + /// Gets an address value from a name, if it exists. Searches local -> global. + fn lookup_name(&self, name: &str) -> Result { + self.names + .iter() + .rev() + .filter_map(|names| names.get(name).copied()) + .next() + .ok_or_else(|| AssembleError::UnknownName { name: name.to_string() }) + } +} + +impl Assemble for Vec { + type Out = Object; + fn assemble(&self, asm: &mut Asm) -> Result { + // collect globals + let mut globals = HashMap::new(); + for section in self.iter() { + let section = if let SectionDef::Data(d) = section { + d + } else { + continue; + }; + let names = asm.gather_names(section)?; + for export in section.exports() { + let addr = *names + .get(export) + .ok_or_else(|| AssembleError::UnknownExport { + name: export.to_string(), + })?; + if globals.contains_key(export) { + return Err(AssembleError::DuplicateExport { + name: export.to_string(), + })?; + } + globals.insert(export.to_string(), addr); + } + } + // TODO : detect section overlap + // TODO : single meta section + asm.names.clear(); + asm.names.push(globals); + + let sections = self.iter() + .map(|section| section.assemble(asm)) + .collect::>()?; + Ok(Object { version: OBJ_VERSION, sections, }) + } +} + +impl Assemble for SectionDef { + type Out = Section; + + fn assemble(&self, asm: &mut Asm) -> Result { + match self { + SectionDef::Data(section) => section.assemble(asm), + SectionDef::Meta(section) => section.assemble(asm), + } + } +} + +impl Assemble for DataSection { + type Out = Section; + + fn assemble(&self, asm: &mut Asm) -> Result { + let names = asm.gather_names(self)?; + asm.names.push(names); + asm.pos = Addr(0); + let section_len = self.len() as u64; + let (start, end) = match self.org { + SectionOrg::Start(start) => (start, start + (section_len as u64)), + SectionOrg::StartEnd(start, end) => (start, end), + }; + if start > end { + return Err(AssembleError::StartGreaterThanEnd { start, end, }); + } + let len = end - start - 1; + if len > section_len { + return Err(AssembleError::SectionTooShort { + section_end: end, + section_size: start + section_len, + }); + } + + let mut contents = Vec::with_capacity(section_len as usize); + for line in self.lines.iter() { + contents.extend(line.assemble(asm)?); + asm.pos += line.len(); + } + assert_eq!(contents.len() as u64, section_len, "in section {}", self.name); + asm.names.pop(); + Ok(Section::Data { + start, + len: section_len, + contents, + }) + } +} + +impl Assemble for MetaSection { + type Out = Section; + + fn assemble(&self, asm: &mut Asm) -> Result { + let mut entries = HashMap::new(); + for line in self.lines.iter() { + if entries.contains_key(&line.name) { + return Err(AssembleError::DuplicateMetaName { name: line.name.to_string() }); + } + let value = match &line.value { + Value::Int(i) => *i, + Value::Name(s) => asm.lookup_name(s.as_str())?.0, + Value::Reg(_) | Value::Here => return Err(AssembleError::IllegalMetaValue { + name: line.name.to_string(), + value: line.value.clone(), + }), + }; + entries.insert(line.name.to_string(), value); + } + Ok(Section::Meta { entries }) + } +} + +impl Assemble for DataLine { + type Out = Vec; + + fn assemble(&self, asm: &mut Asm) -> Result { + match self { + DataLine::ValueDef(v) => v.assemble(asm), + DataLine::Inst(i) => i.assemble(asm), + DataLine::Export(_) | DataLine::Label(_) => Ok(Vec::new()), + } + } +} + +impl Assemble for ValueDef { + type Out = Vec; + + fn assemble(&self, _: &mut Asm) -> Result { + match self { + ValueDef::Int(x) => Ok(x.to_le_bytes().to_vec()), + ValueDef::String(s) => { + let bytes = s.bytes(); + let mut out = s.len().to_le_bytes().to_vec(); + out.extend(bytes); + Ok(out) + } + ValueDef::ZString(z) => { + let bytes = z.bytes(); + let mut out = z.len().to_le_bytes().to_vec(); + out.extend(bytes); + Ok(out) + } + } + } +} + +impl Assemble for Inst { + type Out = Vec; + + fn assemble(&self, asm: &mut Asm) -> Result { + let len = self.len(); + + macro_rules! map_inst { + ($op:expr, $dest:expr, $source:expr) => {{ + let mut bytes = Vec::with_capacity(len); + bytes.write_u16::($op).unwrap(); + let dest = $dest; + let dest_encoding = dest.dest_encoding() + .ok_or_else(|| AssembleError::IllegalDestValue { value: dest.clone(), })?; + let source = $source; + let source_encoding = source.source_encoding(); + bytes.write_u8((dest_encoding << 4) | source_encoding).unwrap(); + bytes.extend(dest.assemble(asm)?); + bytes.extend(source.assemble(asm)?); + assert_eq!( + self.len(), bytes.len(), + "instruction size mismatch in {} instruction - {:?} produces these bytes {:?}", + stringify!($op), self, bytes + ); + Ok(bytes) + }}; + + ($op:expr, $source:expr) => {{ + let mut bytes = Vec::with_capacity(len); + bytes.write_u16::($op).unwrap(); + let source = $source; + let source_encoding = source.source_encoding(); + bytes.write_u8(source_encoding).unwrap(); + bytes.extend(source.assemble(asm)?); + assert_eq!( + self.len(), bytes.len(), + "instruction size mismatch in {} instruction - {:?} produces these bytes {:?}", + stringify!($op), self, bytes + ); + Ok(bytes) + }}; + + ($op:expr) => {{ + let mut bytes = Vec::with_capacity(len); + bytes.write_u16::($op).unwrap(); + assert_eq!( + self.len(), bytes.len(), + "instruction size mismatch in {} instruction - {:?} produces these bytes {:?}", + stringify!($op), self, bytes + ); + Ok(bytes) + }}; + } + match self { + Inst::Add(v1, v2) => map_inst!(inst::ADD, v1, v2), + Inst::Sub(v1, v2) => map_inst!(inst::SUB, v1, v2), + Inst::Mul(v1, v2) => map_inst!(inst::MUL, v1, v2), + Inst::Div(v1, v2) => map_inst!(inst::DIV, v1, v2), + Inst::Mod(v1, v2) => map_inst!(inst::MOD, v1, v2), + Inst::And(v1, v2) => map_inst!(inst::AND, v1, v2), + Inst::Or(v1, v2) => map_inst!(inst::OR, v1, v2), + Inst::Xor(v1, v2) => map_inst!(inst::XOR, v1, v2), + Inst::Shl(v1, v2) => map_inst!(inst::SHL, v1, v2), + Inst::Shr(v1, v2) => map_inst!(inst::SHR, v1, v2), + Inst::INeg(v1, v2) => map_inst!(inst::INEG, v1, v2), + Inst::Inv(v1, v2) => map_inst!(inst::INV, v1, v2), + Inst::Not(v1, v2) => map_inst!(inst::NOT, v1, v2), + // TODO/BUG: CmpEq and CmpLt both take two sources instead of a source and destination + Inst::CmpEq(v1, v2) => map_inst!(inst::CMPEQ, v1, v2), + Inst::CmpLt(v1, v2) => map_inst!(inst::CMPLT, v1, v2), + Inst::Mov(v1, v2) => map_inst!(inst::MOV, v1, v2), + Inst::Jmp(v) => map_inst!(inst::JMP, v), + Inst::Jz(v) => map_inst!(inst::JZ, v), + Inst::Jnz(v) => map_inst!(inst::JNZ, v), + Inst::Halt => map_inst!(inst::HALT), + Inst::Nop => map_inst!(inst::NOP), + Inst::Dump => map_inst!(inst::DUMP), + } + } +} + +impl Assemble for Value { + type Out = Vec; + + fn assemble(&self, asm: &mut Asm) -> Result { + match self { + Value::Int(i) => Ok(i.to_le_bytes().to_vec()), + Value::Reg(r) => Ok(vec![*r]), + Value::Name(name) => { + let value = asm.lookup_name(name.as_str())?; + Ok(value.0.to_le_bytes().to_vec()) + } + Value::Here => Ok(asm.pos.0.to_le_bytes().to_vec()), + } + } +} + +#[derive(Debug, Snafu)] +pub enum AssembleError { + #[snafu(display("unknown name: {}", name))] + UnknownName { name: String }, + + #[snafu(display("unknown export name: {}", name))] + UnknownExport { name: String }, + + #[snafu(display("duplicate label definition: {}", name))] + DuplicateLabel { name: String }, + + #[snafu(display("duplicate meta entry name: {}", name))] + DuplicateMetaName { name: String }, + + #[snafu(display("illegal meta value for entry name {}: {:?}", name, value))] + IllegalMetaValue { name: String, value: Value }, + + #[snafu(display("duplicate exported name: {}", name))] + DuplicateExport { name: String }, + + #[snafu(display("section start ({:#x}) is greater than end ({:#x})", start, end))] + StartGreaterThanEnd { start: u64, end: u64 }, + + #[snafu(display("section end ({:#x}) too short for section content size ({:#x})", section_end, section_size))] + SectionTooShort { section_end: u64, section_size: u64 }, + + #[snafu(display("illegal instruction destination value: {:?}", value))] + IllegalDestValue { value: Value, }, +} + +pub type Result = std::result::Result; diff --git a/src/vm/obj/mod.rs b/src/vm/obj/mod.rs index 4f04478..b7c4760 100644 --- a/src/vm/obj/mod.rs +++ b/src/vm/obj/mod.rs @@ -1,3 +1,4 @@ +pub mod assemble; pub mod error; pub mod obj; pub mod syn; diff --git a/src/vm/obj/obj.rs b/src/vm/obj/obj.rs index 05a8ba8..b8138f1 100644 --- a/src/vm/obj/obj.rs +++ b/src/vm/obj/obj.rs @@ -8,6 +8,7 @@ use std::{ }; pub const MAGIC: u64 = 0xDEAD_BEA7_BA5E_BA11; +pub const OBJ_VERSION: u32 = 0; const OBJECT_HEADER_LEN: usize = 16; // 8 + 4 + 4 #[derive(Debug)] @@ -20,7 +21,7 @@ impl Object { pub fn from_bytes(bytes: &[u8]) -> Result { let mut cursor = Cursor::new(bytes); let magic = cursor.read_u64::()?; - if magic != magic { + if magic != MAGIC { return Err(ParseError::WrongMagic); } let version = cursor.read_u32::()?; @@ -33,6 +34,14 @@ impl Object { } Ok(Object { version, sections }) } + + pub fn virtual_len(&self) -> usize { + self.sections.iter() + .map(|s| match s { + Section::Data { start, len, .. } => { (start + len) as usize } + Section::Meta { .. } => { 0 } + }).max().unwrap_or(0) + } } macro_rules! section_kind { @@ -66,7 +75,6 @@ macro_rules! section_kind { section_kind! { pub enum SectionKind { Data = 0x00, - Code = 0x10, Meta = 0xFF, } } @@ -75,12 +83,7 @@ section_kind! { pub enum Section { Data { start: u64, - end: u64, - contents: Vec, - }, - Code { - start: u64, - end: u64, + len: u64, contents: Vec, }, Meta { @@ -98,7 +101,6 @@ impl Section { let kind: SectionKind = cursor.read_u8()?.try_into()?; match kind { SectionKind::Data => Section::data_section_from_bytes(bytes), - SectionKind::Code => Section::code_section_from_bytes(bytes), SectionKind::Meta => Section::meta_section_from_bytes(bytes), } } @@ -106,23 +108,11 @@ impl Section { fn data_section_from_bytes(bytes: &[u8]) -> Result { let mut cursor = Cursor::new(bytes); let start = cursor.read_u64::()?; - let end = cursor.read_u64::()?; + let len = cursor.read_u64::()?; let contents = &bytes[cursor.position() as usize..]; Ok(Section::Data { start, - end, - contents: From::from(contents), - }) - } - - fn code_section_from_bytes(bytes: &[u8]) -> Result { - let mut cursor = Cursor::new(bytes); - let start = cursor.read_u64::()?; - let end = cursor.read_u64::()?; - let contents = &bytes[cursor.position() as usize..]; - Ok(Section::Code { - start, - end, + len, contents: From::from(contents), }) } diff --git a/src/vm/obj/syn/ast.rs b/src/vm/obj/syn/ast.rs index cef91f7..0a39693 100644 --- a/src/vm/obj/syn/ast.rs +++ b/src/vm/obj/syn/ast.rs @@ -1,4 +1,7 @@ -use crate::vm::reg::Reg; +use crate::vm::{ + inst, + reg::Reg, +}; #[derive(Debug, Clone)] pub enum SectionDef { @@ -8,7 +11,7 @@ pub enum SectionDef { #[derive(Debug, Clone)] pub struct MetaSection { - pub values: Vec, + pub lines: Vec, } #[derive(Debug, Clone)] @@ -20,10 +23,27 @@ pub struct MetaLine { #[derive(Debug, Clone)] pub struct DataSection { pub name: String, - pub org: Option, + pub org: SectionOrg, pub lines: Vec, } +impl DataSection { + pub fn exports(&self) -> impl Iterator { + self.lines.iter() + .filter_map(|line| if let DataLine::Export(s) = line { + Some(s.as_str()) + } else { + None + }) + } + + pub fn len(&self) -> usize { + self.lines.iter() + .map(DataLine::len) + .sum() + } +} + #[derive(Debug, Clone)] pub enum SectionOrg { Start(u64), @@ -38,6 +58,16 @@ pub enum DataLine { Label(String), } +impl DataLine { + pub fn len(&self) -> usize { + match self { + DataLine::ValueDef(v) => v.len(), + DataLine::Inst(i) => i.len(), + DataLine::Export(_) | DataLine::Label(_) => 0, + } + } +} + #[derive(Debug, Clone)] pub enum ValueDef { Int(u64), @@ -45,6 +75,16 @@ pub enum ValueDef { ZString(String), } +impl ValueDef { + pub fn len(&self) -> usize { + match self { + ValueDef::Int(_) => 8, + ValueDef::String(s) => 8 + s.as_bytes().len(), + ValueDef::ZString(s) => s.as_bytes().len() + 1, + } + } +} + #[derive(Debug, Clone)] pub enum Value { Int(u64), @@ -52,7 +92,34 @@ pub enum Value { Name(String), Here, //Array(Vec), - //Deref(Value, Size + //Deref(Value, IntSize), +} + +impl Value { + pub fn len(&self) -> usize { + match self { + // TODO : immediate int sizes + Value::Int(_) => 8, + Value::Reg(_) => 1, + Value::Name(_) => 8, + Value::Here => 8, + } + } + + pub fn dest_encoding(&self) -> Option { + match self { + Value::Int(_) | Value::Name(_) | Value::Here => None, + Value::Reg(_) => Some(inst::DEST_REG), + } + } + + pub fn source_encoding(&self) -> u8 { + match self { + Value::Int(_) => inst::SOURCE_IMM64, + Value::Reg(_) => inst::SOURCE_REG, + Value::Name(_) | Value::Here => inst::SOURCE_IMM64, + } + } } #[derive(Debug, Clone)] @@ -80,3 +147,32 @@ pub enum Inst { Nop, Dump, } + +impl Inst { + pub fn len(&self) -> usize { + match self { + Inst::Add(v1, v2) + | Inst::Sub(v1, v2) + | Inst::Mul(v1, v2) + | Inst::Div(v1, v2) + | Inst::Mod(v1, v2) + | Inst::And(v1, v2) + | Inst::Or(v1, v2) + | Inst::Xor(v1, v2) + | Inst::Shl(v1, v2) + | Inst::Shr(v1, v2) + | Inst::INeg(v1, v2) + | Inst::Inv(v1, v2) + | Inst::Not(v1, v2) + | Inst::CmpEq(v1, v2) + | Inst::CmpLt(v1, v2) + | Inst::Mov(v1, v2) => { 3 + v1.len() + v2.len() } + Inst::Jmp(v) + | Inst::Jz(v) + | Inst::Jnz(v) => { 3 + v.len() } + Inst::Halt + | Inst::Nop + | Inst::Dump => { 2 } + } + } +} diff --git a/src/vm/obj/syn/parser.y b/src/vm/obj/syn/parser.y index fd011dd..a151209 100644 --- a/src/vm/obj/syn/parser.y +++ b/src/vm/obj/syn/parser.y @@ -7,8 +7,8 @@ SectionDefs -> Vec: ; SectionDef -> SectionDef: - 'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { values: $2 }) } - | 'DIR_SECTION' Name MaybeSectionOrg DataBlock { + 'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { lines: $2 }) } + | 'DIR_SECTION' Name SectionOrg DataBlock { SectionDef::Data(DataSection { name: $2, org: $3, @@ -27,11 +27,6 @@ MetaLines -> Vec: MetaLine -> MetaLine: Name 'COLON' Value { MetaLine { name: $1, value: $3 } }; -MaybeSectionOrg -> Option: - SectionOrg { Some($1) } - | { None } - ; - SectionOrg -> SectionOrg: Int { SectionOrg::Start($1) } | Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) } diff --git a/src/vm/state.rs b/src/vm/state.rs index c147b1f..0ce5cde 100644 --- a/src/vm/state.rs +++ b/src/vm/state.rs @@ -1,4 +1,4 @@ -use crate::vm::{addr::*, error::*, flags::*, mem::*, reg::*}; +use crate::vm::{addr::*, error::*, flags::*, inst::*, mem::*, obj::obj::*, reg::*}; pub struct State { regs: [u64; NUM_REGS], @@ -13,15 +13,49 @@ impl State { } } + pub fn load_object(&mut self, object: Object, max_mem: usize) -> Result<()> { + // TODO : detecting section overlap + let mem_len = object.virtual_len(); + if mem_len > max_mem { + return Err(VmError::ObjectTooLarge { + object_size: mem_len, + max_mem, + }); + } + let mut mem = vec![0u8; max_mem]; + for section in object.sections { + match section { + Section::Data { + start, + len, + contents, + } => { + for offset in 0..len { + mem[(start + offset) as usize] = contents[offset as usize]; + } + } + Section::Meta { entries } => { + if let Some(addr) = entries.get("entry") { + self.set_reg_unchecked(IP, *addr); + } + } + } + } + self.mem = mem; + + Ok(()) + } + pub fn mem_cursor(&self, addr: Addr) -> MemCursor<&[u8]> { let mut cursor = MemCursor::new(self.mem.as_slice()); cursor.set_position(addr); cursor } - pub fn run(&mut self) -> Result { - - Ok(self.get_reg_unchecked(STATUS)) + pub fn mem_cursor_mut(&mut self, addr: Addr) -> MemCursor<&mut [u8]> { + let mut cursor = MemCursor::new(self.mem.as_mut_slice()); + cursor.set_position(addr); + cursor } //////////////////////////////////////////////////////////////////////////////// @@ -51,6 +85,10 @@ impl State { } } + pub fn ip(&self) -> u64 { + self.get_reg_unchecked(IP) + } + //////////////////////////////////////////////////////////////////////////////// // Flags //////////////////////////////////////////////////////////////////////////////// @@ -60,6 +98,10 @@ impl State { unsafe { Flags::from_bits_unchecked(self.get_reg_unchecked(FLAGS)) } } + pub fn contains_flags(&self, flags: Flags) -> bool { + self.flags().contains(flags) + } + pub fn insert_flags(&mut self, flags: Flags) { let mut new_flags = self.flags(); new_flags.insert(flags); @@ -75,4 +117,157 @@ impl State { pub fn set_flags(&mut self, flags: Flags) { self.set_reg_unchecked(FLAGS, flags.bits()); } + + //////////////////////////////////////////////////////////////////////////////// + // Execution + //////////////////////////////////////////////////////////////////////////////// + pub fn is_halted(&self) -> bool { + self.contains_flags(Flags::HALT) + } + + pub fn exec(&mut self) -> Result { + while !self.is_halted() { + self.tick()?; + } + + Ok(self.get_reg_unchecked(STATUS)) + } + + fn tick(&mut self) -> Result<()> { + let mut cursor = self.mem_cursor(Addr(self.ip())); + let inst = cursor.next_inst()?; + let mut next_ip = self.ip() + (inst.len() as u64); + match inst { + Inst::Add(d, s) => { + let value = self.load_dest(d)?.wrapping_add(self.load_source(s)?); + self.store_dest(d, value)?; + } + Inst::Sub(d, s) => { + let value = self.load_dest(d)?.wrapping_sub(self.load_source(s)?); + self.store_dest(d, value)?; + } + Inst::Mul(d, s) => { + let value = self.load_dest(d)?.wrapping_mul(self.load_source(s)?); + self.store_dest(d, value)?; + } + Inst::Div(d, s) => { + // TODO : catch divide by zero + let value = self.load_dest(d)?.wrapping_div(self.load_source(s)?); + self.store_dest(d, value)?; + } + Inst::Mod(d, s) => { + let value = self.load_dest(d)? % self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::And(d, s) => { + let value = self.load_dest(d)? & self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::Or(d, s) => { + let value = self.load_dest(d)? | self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::Xor(d, s) => { + let value = self.load_dest(d)? ^ self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::Shl(d, s) => { + let value = self.load_dest(d)? << self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::Shr(d, s) => { + let value = self.load_dest(d)? >> self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::INeg(d, s) => { + let value = (!self.load_source(s)?).wrapping_add(1); + self.store_dest(d, value)?; + } + Inst::Inv(d, s) => { + let value = !self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::Not(d, s) => { + let value = (self.load_source(s)? == 0) as u64; + self.store_dest(d, value)?; + } + Inst::CmpEq(s1, s2) => { + let cmp = self.load_source(s1)? == self.load_source(s2)?; + if cmp { + self.insert_flags(Flags::COMPARE); + } else { + self.remove_flags(Flags::COMPARE); + } + } + Inst::CmpLt(s1, s2) => { + let cmp = self.load_source(s1)? < self.load_source(s2)?; + if cmp { + self.insert_flags(Flags::COMPARE); + } else { + self.remove_flags(Flags::COMPARE); + } + } + Inst::Jmp(s) => { + next_ip = self.load_source(s)?; + } + Inst::Jz(s) => { + if !self.contains_flags(Flags::COMPARE) { + next_ip = self.load_source(s)?; + } + } + Inst::Jnz(s) => { + if self.contains_flags(Flags::COMPARE) { + next_ip = self.load_source(s)?; + } + } + Inst::Mov(d, s) => { + let value = self.load_source(s)?; + self.store_dest(d, value)?; + } + Inst::Halt => { + self.insert_flags(Flags::HALT); + } + Inst::Nop => {} + Inst::Dump => { + // TODO - dump + } + } + self.set_reg_unchecked(IP, next_ip); + Ok(()) + } + + fn store_dest(&mut self, dest: Dest, value: u64) -> Result<()> { + match dest { + Dest::Addr64(a) => self.mem_cursor_mut(a).write_u64(value), + Dest::Addr32(a) => self + .mem_cursor_mut(a) + .write_u32((value & 0xffff_ffff) as u32), + Dest::Addr16(a) => self.mem_cursor_mut(a).write_u16((value & 0xffff) as u16), + Dest::Addr8(a) => self.mem_cursor_mut(a).write_u8((value & 0xff) as u8), + Dest::Reg(reg) => self.set_reg(reg, value), + } + } + + fn load_source(&self, source: Source) -> Result { + let value = match source { + Source::Addr64(a) => self.mem_cursor(a).next_u64()?, + Source::Addr32(a) => self.mem_cursor(a).next_u32()? as u64, + Source::Addr16(a) => self.mem_cursor(a).next_u16()? as u64, + Source::Addr8(a) => self.mem_cursor(a).next_u8()? as u64, + Source::Reg(reg) => self.get_reg(reg)?, + Source::Imm(u) => u, + }; + Ok(value) + } + + fn load_dest(&self, dest: Dest) -> Result { + let value = match dest { + Dest::Addr64(a) => self.mem_cursor(a).next_u64()?, + Dest::Addr32(a) => self.mem_cursor(a).next_u32()? as u64, + Dest::Addr16(a) => self.mem_cursor(a).next_u16()? as u64, + Dest::Addr8(a) => self.mem_cursor(a).next_u8()? as u64, + Dest::Reg(reg) => self.get_reg(reg)?, + }; + Ok(value) + } }