From e198da58250f3f53c5f76a18206b13f5f9141e4f Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Sun, 9 Feb 2020 13:04:56 -0500 Subject: [PATCH] Finish up parser and assembler with more-or-less complete syntax Major changes inlude: * Bit the bullet and now instructions have their length hard-coded * Move from_utf8 object parsing to be done by their objects (instead of a Parser god object) * A list of AST sections are assembled into an Object using the new vm::obj::assemble module. * Changed the object layout some in the spec, and adjusted code to match this. Signed-off-by: Alek Ratzloff --- src/main.rs | 8 +- src/vm/inst.rs | 37 ++- src/vm/mod.rs | 1 - src/vm/obj/assemble/error.rs | 19 ++ src/vm/obj/assemble/flatten.rs | 0 src/vm/obj/assemble/mod.rs | 344 ++++++++++++++++++++++++++++ src/vm/obj/error.rs | 42 ---- src/vm/obj/mod.rs | 4 +- src/vm/obj/obj.rs | 152 +++++++----- src/vm/obj/parser.rs | 108 --------- src/vm/obj/syn/ast.rs | 114 +++++++++ src/vm/obj/syn/convert.rs | 2 + src/vm/obj/syn/error.rs | 47 ++++ src/vm/obj/syn/mod.rs | 5 + src/vm/{ => obj}/syn/parser.lalrpop | 47 ++-- src/vm/syn/ast.rs | 49 ---- src/vm/syn/mod.rs | 4 - test.asm | 43 ++-- vm.md | 24 +- 19 files changed, 739 insertions(+), 311 deletions(-) create mode 100644 src/vm/obj/assemble/error.rs create mode 100644 src/vm/obj/assemble/flatten.rs create mode 100644 src/vm/obj/assemble/mod.rs delete mode 100644 src/vm/obj/parser.rs create mode 100644 src/vm/obj/syn/ast.rs create mode 100644 src/vm/obj/syn/convert.rs create mode 100644 src/vm/obj/syn/error.rs create mode 100644 src/vm/obj/syn/mod.rs rename src/vm/{ => obj}/syn/parser.lalrpop (64%) delete mode 100644 src/vm/syn/ast.rs delete mode 100644 src/vm/syn/mod.rs diff --git a/src/main.rs b/src/main.rs index 2d9d174..a31b315 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ mod common; mod vm; use std::{ + convert::TryFrom, io::{self, Read}, process, }; @@ -18,15 +19,16 @@ fn get_input_string() -> io::Result { } fn main() -> Result<()> { - use vm::syn::parser::ProgramParser; + use vm::obj::syn::parser::SectionsParser; let contents = get_input_string()?; - let ast = match ProgramParser::new().parse(&contents) { + let ast = match SectionsParser::new().parse(&contents) { Ok(ast) => ast, Err(err) => { eprintln!("{}", err); process::exit(1); }, }; - println!("{:#?}", ast); + let obj = vm::obj::obj::Object::try_from(&ast)?; + println!("{:#?}", obj); Ok(()) } diff --git a/src/vm/inst.rs b/src/vm/inst.rs index 8f6f515..f2e246a 100644 --- a/src/vm/inst.rs +++ b/src/vm/inst.rs @@ -2,13 +2,14 @@ macro_rules! instructions { { $($variant:ident = $value:expr),* $(,)? } => { - pub type InstOp = u16; $( pub const $variant: InstOp = $value; )* }; } +pub type InstOp = u16; + instructions! { ADD = 0x0000, @@ -37,3 +38,37 @@ instructions! { HALT = 0xF000, NOP = 0xF001, } + +pub fn inst_len(op: InstOp) -> usize { + match op { + // 2 bytes + INEG + | INV + | NOT + | HALT + | NOP => 2, + // 4 bytes + ADD + | MUL + | DIV + | MOD + | AND + | OR + | XOR + | SHL + | SHR + | CMPEQ + | CMPLT + | JMP + | JZ + | JNZ + | LOAD + | REGCOPY + | MEMCOPY + | STORE => 4, + // Immediates - 4+ bytes + STOREIMM64 => 16, + STOREIMM32 => 8, + _ => panic!("unknown instruction op 0x{:04x}", op), + } +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 3cfc6c4..425f2f9 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -4,6 +4,5 @@ pub mod inst; pub mod mem; pub mod obj; pub mod reg; -pub mod syn; mod tick; pub mod vm; diff --git a/src/vm/obj/assemble/error.rs b/src/vm/obj/assemble/error.rs new file mode 100644 index 0000000..58a1737 --- /dev/null +++ b/src/vm/obj/assemble/error.rs @@ -0,0 +1,19 @@ +use snafu::Snafu; +use std::{ + fmt::Debug, + io, +}; + +#[derive(Debug, Snafu)] +pub enum AssembleError { + #[snafu(display("IO error: {}", source))] + Io { source: io::Error }, + + #[snafu(display("duplicate symbol name: {}", name))] + DuplicateName { name: String }, + + #[snafu(display("duplicate exported symbol name: {}", name))] + DuplicateExportName { name: String }, +} + +pub type Result = std::result::Result; diff --git a/src/vm/obj/assemble/flatten.rs b/src/vm/obj/assemble/flatten.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/vm/obj/assemble/mod.rs b/src/vm/obj/assemble/mod.rs new file mode 100644 index 0000000..48d6687 --- /dev/null +++ b/src/vm/obj/assemble/mod.rs @@ -0,0 +1,344 @@ +pub mod error; + +use crate::vm::{ + inst::*, + obj::{assemble::error::*, obj::*, syn::ast::*}, + reg::Reg, +}; +use byteorder::{WriteBytesExt, LE}; +use std::{ + collections::{HashMap, HashSet}, + convert::TryFrom, + io::Cursor, + mem, +}; + +pub const LAYOUT_VERSION: u32 = 0; + +impl TryFrom<&'_ Vec> for Object { + type Error = AssembleError; + + fn try_from(other: &Vec) -> Result { + // Assemble an AST to an object + Assemble::new(&other).assemble() + } +} + +pub struct Assemble<'a> { + ast: &'a Vec, + symbols: SymbolTable, +} + +impl<'a> Assemble<'a> { + pub fn new(ast: &'a Vec) -> Self { + Assemble { + ast, + symbols: Default::default(), + } + } + + pub fn assemble(&mut self) -> Result { + let mut pos = 0; + let mut sections = Vec::new(); + + // gather global symbols + for block in self.ast.iter() { + let exports = Self::gather_symbols(block, true)?; + // check if there are any duplicated exports + { + let export_keys = exports.keys().collect::>(); + let global_keys = self.symbols.globals().keys().collect::>(); + if let Some(key) = export_keys.intersection(&global_keys).next() { + return Err(AssembleError::DuplicateExportName { + name: key.to_string(), + }); + } + } + + self.symbols.globals_mut().extend(exports); + } + + for block in self.ast.iter() { + let locals = Self::gather_symbols(block, false)?; + self.symbols.replace_locals(locals); + + match block { + SectionBlock::Data { org, body } | SectionBlock::Code { org, body } => { + let mut bytes = Vec::new(); + for line in body { + match line { + Line::Inst(inst) => { + bytes.extend(self.assemble_inst(inst)); + } + Line::LabelDef(_) => { /* no-op */ } + Line::ImmValue(value) => { + let value = + self.get_value(value).expect("TODO : value label not found"); + bytes.extend(&value.to_le_bytes()); + } + Line::Export(_) => { /* no-op */ } + } + } + let (start, end) = match org { + Some(SectionOrg::Start(start)) => (*start, start + bytes.len() as u64), + Some(SectionOrg::Range(start, end)) => (*start, *end), + None => (pos, pos + bytes.len() as u64), + }; + pos = end; + + let section = match block { + SectionBlock::Data { .. } => Section::Data { + start, + end, + contents: bytes, + }, + SectionBlock::Code { .. } => Section::Code { + start, + end, + contents: bytes, + }, + SectionBlock::Meta { .. } => unreachable!(), + }; + sections.push(section); + } + SectionBlock::Meta { entries } => { + let entries = entries + .iter() + .map(|(name, value)| { + ( + name.to_string(), + self.get_value(value).expect("TODO : value label not found"), + ) + }) + .collect(); + sections.push(Section::Meta { entries }); + } + } + } + Ok(Object { + version: LAYOUT_VERSION, + sections, + }) + } + + fn gather_symbols(block: &SectionBlock, export: bool) -> Result> { + match block { + SectionBlock::Data { body, .. } | SectionBlock::Code { body, .. } => { + let mut exports = HashSet::new(); + let mut labels = HashMap::new(); + let mut pos = 0; + for line in body.iter() { + match line { + Line::Inst(inst) => { + pos += inst.len(); + } + Line::LabelDef(label) => { + if labels.contains_key(label) { + return Err(AssembleError::DuplicateName { + name: label.to_string(), + }); + } else { + labels.insert(label.to_string(), pos as u64); + } + } + Line::ImmValue(_) => { + pos += 8; + } + Line::Export(name) => { + if export { + exports.insert(name); + } + } + } + } + // only return exports if specified + if export { + labels.retain(|k, _| exports.contains(k)); + } + Ok(labels) + } + SectionBlock::Meta { .. } => Ok(Default::default()), + } + } + + fn get_value(&self, value: &ImmValue) -> Option { + match value { + ImmValue::Number(n) => Some(*n), + ImmValue::Label(s) => self.symbols.get(s), + } + } + + fn assemble_inst(&self, inst: &Inst) -> Vec { + let mut builder = InstBuilder::default(); + builder = match inst { + Inst::Add(r1, r2) => builder.op(ADD).r1(*r1).r2(*r2), + Inst::Mul(r1, r2) => builder.op(MUL).r1(*r1).r2(*r2), + Inst::Div(r1, r2) => builder.op(DIV).r1(*r1).r2(*r2), + Inst::Mod(r1, r2) => builder.op(MOD).r1(*r1).r2(*r2), + Inst::INeg(r1) => builder.op(INEG).r1(*r1), + Inst::And(r1, r2) => builder.op(AND).r1(*r1).r2(*r2), + Inst::Or(r1, r2) => builder.op(OR).r1(*r1).r2(*r2), + Inst::Inv(r1) => builder.op(INV).r1(*r1), + Inst::Not(r1) => builder.op(NOT).r1(*r1), + Inst::Xor(r1, r2) => builder.op(XOR).r1(*r1).r2(*r2), + Inst::Shl(r1, r2) => builder.op(SHL).r1(*r1).r2(*r2), + Inst::Shr(r1, r2) => builder.op(SHR).r1(*r1).r2(*r2), + Inst::CmpEq(r1, r2) => builder.op(CMPEQ).r1(*r1).r2(*r2), + Inst::CmpLt(r1, r2) => builder.op(CMPLT).r1(*r1).r2(*r2), + Inst::Jmp(r1) => builder.op(JMP).r1(*r1), + Inst::Jz(r1) => builder.op(JZ).r1(*r1), + Inst::Jnz(r1) => builder.op(JNZ).r1(*r1), + Inst::Load(r1, r2) => builder.op(LOAD).r1(*r1).r2(*r2), + Inst::Store(r1, r2) => builder.op(STORE).r1(*r1).r2(*r2), + Inst::StoreImm(r1, imm) => { + let imm = match imm { + ImmValue::Number(num) => *num, + ImmValue::Label(name) => { + self.symbols.get(name).expect("TODO: value label not found") + } + }; + if imm <= (u32::max_value() as u64) { + builder.op(STOREIMM32).r1(*r1).imm32(imm as u32) + } else { + builder.op(STOREIMM64).r1(*r1).imm64(imm) + } + } + Inst::MemCopy(r1, r2) => builder.op(MEMCOPY).r1(*r1).r2(*r2), + Inst::RegCopy(r1, r2) => builder.op(REGCOPY).r1(*r1).r2(*r2), + Inst::Nop => builder.op(NOP), + Inst::Halt => builder.op(HALT), + }; + + builder.finish() + } +} + +#[derive(Debug, Default)] +struct InstBuilder { + op: Option, + r1: Option, + r2: Option, + imm32: Option, + imm64: Option, +} + +impl InstBuilder { + fn op(mut self, op: InstOp) -> Self { + self.op = Some(op); + self + } + + fn r1(mut self, r1: Reg) -> Self { + self.r1 = Some(r1); + self + } + + fn r2(mut self, r2: Reg) -> Self { + self.r2 = Some(r2); + self + } + + fn imm32(mut self, imm32: u32) -> Self { + self.imm32 = Some(imm32); + self + } + + fn imm64(mut self, imm64: u64) -> Self { + self.imm64 = Some(imm64); + self + } + + fn finish(self) -> Vec { + let mut cursor = Cursor::new(Vec::new()); + let InstBuilder { + op, + r1, + r2, + imm32, + imm64, + } = self; + + let op = op.expect("no op specified"); + cursor.write_u16::(op).unwrap(); + match (r1, r2, imm32, imm64) { + (Some(r1), Some(r2), None, None) => { + let tail = ((r1 as u16) << 10) | ((r2 as u16) << 4); + cursor.write_u16::(tail).unwrap(); + } + (Some(r1), None, None, None) => { + let tail = (r1 as u16) << 10; + cursor.write_u16::(tail).unwrap(); + } + (Some(r1), None, Some(imm32), None) => { + let tail = (r1 as u16) << 10; + cursor.write_u16::(tail).unwrap(); + cursor.write_u32::(imm32).unwrap(); + } + (Some(r1), None, None, Some(imm64)) => { + let tail = (r1 as u16) << 10; + cursor.write_u16::(tail).unwrap(); + cursor.write_u32::(0).unwrap(); + cursor.write_u64::(imm64).unwrap(); + } + (_, _, _, _) if op == HALT || op == NOP => {} + (_, _, _, _) => { + panic!( + r#"invalid instruction combo for opcode 0x{:04x}: +r1 : {:?} +r2 : {:?} +imm32 : {:?} +imm64 : {:?}"#, + op, r1, r2, imm32, imm64 + ); + } + } + cursor.into_inner() + } +} + +#[derive(Debug, Clone, Default)] +struct SymbolTable { + globals: HashMap, + locals: HashMap, +} + +impl SymbolTable { + pub fn new() -> Self { + Default::default() + } + + pub fn globals(&self) -> &HashMap { + &self.globals + } + + pub fn locals(&self) -> &HashMap { + &self.locals + } + + pub fn globals_mut(&mut self) -> &mut HashMap { + &mut self.globals + } + + pub fn locals_mut(&mut self) -> &mut HashMap { + &mut self.locals + } + + pub fn insert_global(&mut self, name: String, value: u64) -> Option { + self.globals_mut().insert(name, value) + } + + pub fn insert_local(&mut self, name: String, value: u64) -> Option { + self.locals_mut().insert(name, value) + } + + pub fn replace_locals(&mut self, locals: HashMap) -> HashMap { + mem::replace(self.locals_mut(), locals) + } + + pub fn get(&self, name: &String) -> Option { + self.locals + .get(name) + .or_else(|| self.globals.get(name)) + .copied() + } +} diff --git a/src/vm/obj/error.rs b/src/vm/obj/error.rs index fcec7a9..e69de29 100644 --- a/src/vm/obj/error.rs +++ b/src/vm/obj/error.rs @@ -1,42 +0,0 @@ -use snafu::Snafu; -use std::{ - fmt::Debug, - io, -}; - -#[derive(Debug, Snafu)] -pub enum ParseError { - #[snafu(display("IO error: {}", source))] - Io { source: io::Error }, - - #[snafu(display("wrong magic number"))] - WrongMagic, - - #[snafu(display("unknown section kind: 0x{:02x}", kind))] - UnknownSectionKind { kind: u8 }, - - #[snafu(display("invalid UTF-8 string: {}", source))] - InvalidUtf8String { source: std::string::FromUtf8Error }, -} - -macro_rules! into_parse_error { - ( - $($type:ty : $variant:ident),* $(,)? - ) => { - $( - impl From<$type> for ParseError { - fn from(other: $type) -> Self { - ParseError::$variant { source: other } - } - } - )* - } -} - -into_parse_error! { - io::Error: Io, - std::string::FromUtf8Error: InvalidUtf8String, -} - -pub type Result = std::result::Result; - diff --git a/src/vm/obj/mod.rs b/src/vm/obj/mod.rs index 4c5ad61..2daefad 100644 --- a/src/vm/obj/mod.rs +++ b/src/vm/obj/mod.rs @@ -1,3 +1,3 @@ +pub mod assemble; +pub mod syn; pub mod obj; -pub mod parser; -pub mod error; diff --git a/src/vm/obj/obj.rs b/src/vm/obj/obj.rs index 49253c2..5860ee9 100644 --- a/src/vm/obj/obj.rs +++ b/src/vm/obj/obj.rs @@ -1,21 +1,38 @@ -use crate::vm::obj::error::ParseError; +use byteorder::{ReadBytesExt, LE}; +use crate::vm::obj::syn::error::{Result, ParseError}; use std::{ - convert::TryFrom, + collections::HashMap, + convert::{TryFrom, TryInto}, fmt::Debug, + io::{Cursor, Read}, }; pub const MAGIC: u64 = 0xDEAD_BEA7_BA5E_BA11; +const OBJECT_HEADER_LEN: usize = 16; // 8 + 4 + 4 #[derive(Debug)] pub struct Object { - pub header: Header, - pub sections: Vec>, + pub version: u32, + pub sections: Vec
, } -#[derive(Debug, Clone, Copy)] -pub struct Header { - pub version: u16, - pub sections: u16, +impl Object { + pub fn from_bytes(bytes: &[u8]) -> Result { + let mut cursor = Cursor::new(bytes); + let magic = cursor.read_u64::()?; + if magic != magic { + return Err(ParseError::WrongMagic); + } + let version = cursor.read_u32::()?; + let section_count = cursor.read_u32::()?; + + let mut sections = Vec::new(); + for _ in 0 .. section_count { + let section = Section::from_bytes(&mut cursor)?; + sections.push(section); + } + Ok(Object { version, sections }) + } } macro_rules! section_kind { @@ -25,7 +42,7 @@ macro_rules! section_kind { } ) => { - #[derive(Debug, Clone, Copy)] + #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] pub enum $enum_name { $($name = $value),* @@ -54,54 +71,79 @@ section_kind! { } } -pub trait Section: Debug { - fn header(&self) -> SectionHeader; -} - -#[derive(Debug, Clone, Copy)] -pub struct SectionHeader { - pub kind: SectionKind, - pub checksum: u32, - pub len: u64, -} - #[derive(Debug, Clone)] -pub struct DataSection { - pub header: SectionHeader, - pub load_location: u64, - pub contents: Vec, +pub enum Section { + Data { + start: u64, + end: u64, + contents: Vec, + }, + Code { + start: u64, + end: u64, + contents: Vec, + }, + Meta { + entries: HashMap, + }, } -impl Section for DataSection { - fn header(&self) -> SectionHeader { - self.header +impl Section { + fn from_bytes(cursor: &mut Cursor<&[u8]>) -> Result { + let len = cursor.read_u64::()?; + let start = cursor.position() as usize; + let end = start + len as usize; + + let bytes = &cursor.get_ref()[start .. end]; + let kind: SectionKind = cursor.read_u8()?.try_into()?; + match kind { + SectionKind::Data => Section::data_section_from_bytes(bytes), + SectionKind::Code => Section::code_section_from_bytes(bytes), + SectionKind::Meta => Section::meta_section_from_bytes(bytes), + } + } + + fn data_section_from_bytes(bytes: &[u8]) -> Result { + let mut cursor = Cursor::new(bytes); + let start = cursor.read_u64::()?; + let end = cursor.read_u64::()?; + let contents = &bytes[cursor.position() as usize..]; + Ok(Section::Data { + start, + end, + contents: From::from(contents), + }) + } + + fn code_section_from_bytes(bytes: &[u8]) -> Result { + let mut cursor = Cursor::new(bytes); + let start = cursor.read_u64::()?; + let end = cursor.read_u64::()?; + let contents = &bytes[cursor.position() as usize..]; + Ok(Section::Code { + start, + end, + contents: From::from(contents), + }) + } + + fn meta_section_from_bytes(bytes: &[u8]) -> Result { + let mut cursor = Cursor::new(bytes); + let entry_count = cursor.read_u64::()?; + let mut entries = HashMap::new(); + for _ in 0 .. entry_count { + // key + let key_len = cursor.read_u64::()?; + let mut key_bytes = vec![0u8; key_len as usize]; + cursor.read_exact(&mut key_bytes)?; + let key = String::from_utf8(key_bytes)?; + + // value + let value = cursor.read_u64::()?; + entries.insert(key, value); + } + Ok(Section::Meta { + entries + }) } } - -#[derive(Debug, Clone)] -pub struct CodeSection { - pub header: SectionHeader, - pub load_location: u64, - pub contents: Vec, -} - -impl Section for CodeSection { - fn header(&self) -> SectionHeader { - self.header - } -} - -#[derive(Debug, Clone)] -pub struct MetaSection { - pub header: SectionHeader, - pub entry_count: u64, - pub entries: Vec<(String, Vec)>, -} - -impl Section for MetaSection { - fn header(&self) -> SectionHeader { - self.header - } -} - - diff --git a/src/vm/obj/parser.rs b/src/vm/obj/parser.rs deleted file mode 100644 index ccfa90b..0000000 --- a/src/vm/obj/parser.rs +++ /dev/null @@ -1,108 +0,0 @@ -use byteorder::{ReadBytesExt, LE}; -use crate::vm::obj::{ - obj::*, - error::*, -}; -use std::{ - convert::TryInto, - fmt::Debug, - io::{Cursor, Read}, -}; - -#[derive(Debug, Clone)] -pub struct ObjectParser { - bytes: Cursor>, -} - -impl ObjectParser { - pub fn parse(&mut self) -> Result { - let header = self.parse_header()?; - let sections = self.parse_sections(header)?; - Ok(Object { header, sections }) - } - - fn parse_header(&mut self) -> Result
{ - let magic = self.bytes.read_u64::()?; - if magic != magic { - return Err(ParseError::WrongMagic); - } - let version = self.bytes.read_u16::()?; - let sections = self.bytes.read_u16::()?; - Ok(Header { version, sections }) - } - - fn parse_sections(&mut self, header: Header) -> Result>> { - (0..header.sections) - .map(|_| self.parse_section()) - .collect() - } - - fn parse_section(&mut self) -> Result> { - let header = self.parse_section_header()?; - let section: Box = match header.kind { - SectionKind::Data => self.parse_data_section(header).map(Box::new)?, - SectionKind::Code => self.parse_code_section(header).map(Box::new)?, - SectionKind::Meta => self.parse_meta_section(header).map(Box::new)?, - }; - Ok(section) - } - - fn parse_section_header(&mut self) -> Result { - let kind: SectionKind = self.bytes.read_u8()?.try_into()?; - let checksum = self.bytes.read_u32::()?; - let len = self.bytes.read_u64::()?; - Ok(SectionHeader { - kind, - checksum, - len, - }) - } - - fn parse_data_section(&mut self, header: SectionHeader) -> Result { - let load_location = self.bytes.read_u64::()?; - let contents = self.take_bytes(header.len)?; - Ok(DataSection { - header, - load_location, - contents, - }) - } - - fn parse_code_section(&mut self, header: SectionHeader) -> Result { - let load_location = self.bytes.read_u64::()?; - let contents = self.take_bytes(header.len)?; - Ok(CodeSection { - header, - load_location, - contents, - }) - } - - fn parse_meta_section(&mut self, header: SectionHeader) -> Result { - let entry_count = self.bytes.read_u64::()?; - let mut entries = Vec::with_capacity(entry_count as usize); - for _ in 0 .. entry_count { - let key_bytes = self.parse_sized_data()?; - let key = String::from_utf8(key_bytes)?; - let value = self.parse_sized_data()?; - entries.push((key, value)); - } - Ok(MetaSection { - header, - entry_count, - entries, - }) - } - - fn parse_sized_data(&mut self) -> Result> { - let size = self.bytes.read_u64::()?; - self.take_bytes(size) - } - - fn take_bytes(&mut self, count: u64) -> Result> { - let mut contents = vec!(0u8; count as usize); - self.bytes.read_exact(&mut contents)?; - Ok(contents) - } -} - diff --git a/src/vm/obj/syn/ast.rs b/src/vm/obj/syn/ast.rs new file mode 100644 index 0000000..27fd306 --- /dev/null +++ b/src/vm/obj/syn/ast.rs @@ -0,0 +1,114 @@ +use crate::vm::{reg::Reg, inst::*}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SectionBlock { + Data { + org: Option, + body: Vec, + }, + Code { + org: Option, + body: Vec, + }, + Meta { + entries: Vec<(String, ImmValue)>, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SectionOrg { + Start(u64), + Range(u64, u64), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Line { + Inst(Inst), + LabelDef(String), + ImmValue(ImmValue), + Export(String), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ImmValue { + Number(u64), + Label(String), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Inst { + Add(Reg, Reg), + Mul(Reg, Reg), + Div(Reg, Reg), + Mod(Reg, Reg), + INeg(Reg), + And(Reg, Reg), + Or(Reg, Reg), + Inv(Reg), + Not(Reg), + Xor(Reg, Reg), + Shl(Reg, Reg), + Shr(Reg, Reg), + + CmpEq(Reg, Reg), + CmpLt(Reg, Reg), + Jmp(Reg), + Jz(Reg), + Jnz(Reg), + + Load(Reg, Reg), + Store(Reg, Reg), + StoreImm(Reg, ImmValue), + MemCopy(Reg, Reg), + RegCopy(Reg, Reg), + + Nop, + Halt, +} + +impl Inst { + pub fn op(&self) -> InstOp { + match self { + Inst::Add(_, _) => { ADD } + Inst::Mul(_, _) => { MUL } + Inst::Div(_, _) => { DIV } + Inst::Mod(_, _) => { MOD } + Inst::INeg(_) => { INEG } + Inst::And(_, _) => { AND } + Inst::Or(_, _) => { OR } + Inst::Inv(_) => { INV } + Inst::Not(_) => { NOT } + Inst::Xor(_, _) => { XOR } + Inst::Shl(_, _) => { SHL } + Inst::Shr(_, _) => { SHR } + + Inst::CmpEq(_, _) => { CMPEQ } + Inst::CmpLt(_, _) => { CMPLT } + Inst::Jmp(_) => { JMP } + Inst::Jz(_) => { JZ } + Inst::Jnz(_) => { JNZ } + + Inst::Load(_, _) => { LOAD } + Inst::Store(_, _) => { STORE } + Inst::StoreImm(_, imm) => { + if let ImmValue::Number(imm) = imm { + if *imm > (u32::max_value() as u64) { + STOREIMM64 + } else { + STOREIMM32 + } + } else { + STOREIMM64 + } + } + Inst::MemCopy(_, _) => { MEMCOPY } + Inst::RegCopy(_, _) => { REGCOPY } + + Inst::Nop => { NOP } + Inst::Halt => { HALT } + } + } + pub fn len(&self) -> usize { + inst_len(self.op()) + } +} diff --git a/src/vm/obj/syn/convert.rs b/src/vm/obj/syn/convert.rs new file mode 100644 index 0000000..ad913b7 --- /dev/null +++ b/src/vm/obj/syn/convert.rs @@ -0,0 +1,2 @@ +struct GetLayout { +} diff --git a/src/vm/obj/syn/error.rs b/src/vm/obj/syn/error.rs new file mode 100644 index 0000000..3fdcb22 --- /dev/null +++ b/src/vm/obj/syn/error.rs @@ -0,0 +1,47 @@ +use snafu::Snafu; +use std::{ + fmt::Debug, + io, +}; + +#[derive(Debug, Snafu)] +pub enum ParseError { + #[snafu(display("IO error: {}", source))] + Io { source: io::Error }, + + #[snafu(display("wrong magic number"))] + WrongMagic, + + #[snafu(display("unknown section kind: 0x{:02x}", kind))] + UnknownSectionKind { kind: u8 }, + + #[snafu(display("invalid UTF-8 string: {}", source))] + InvalidUtf8String { source: std::string::FromUtf8Error }, + + #[snafu(display("duplicate symbol name: {}", name))] + DuplicateName { name: String }, + + #[snafu(display("duplicate exported symbol name: {}", name))] + DuplicateExportName { name: String }, +} + +macro_rules! into_parse_error { + ( + $($type:ty : $variant:ident),* $(,)? + ) => { + $( + impl From<$type> for ParseError { + fn from(other: $type) -> Self { + ParseError::$variant { source: other } + } + } + )* + } +} + +into_parse_error! { + io::Error: Io, + std::string::FromUtf8Error: InvalidUtf8String, +} + +pub type Result = std::result::Result; diff --git a/src/vm/obj/syn/mod.rs b/src/vm/obj/syn/mod.rs new file mode 100644 index 0000000..e4b69f8 --- /dev/null +++ b/src/vm/obj/syn/mod.rs @@ -0,0 +1,5 @@ +use lalrpop_util::lalrpop_mod; + +lalrpop_mod!(pub parser, "/vm/obj/syn/parser.rs"); +pub mod ast; +pub mod error; diff --git a/src/vm/syn/parser.lalrpop b/src/vm/obj/syn/parser.lalrpop similarity index 64% rename from src/vm/syn/parser.lalrpop rename to src/vm/obj/syn/parser.lalrpop index 7a0931c..76fcd95 100644 --- a/src/vm/syn/parser.lalrpop +++ b/src/vm/obj/syn/parser.lalrpop @@ -1,6 +1,6 @@ use std::str::FromStr; use crate::vm::{ - syn::ast::*, + obj::syn::ast::*, reg::*, }; @@ -10,9 +10,11 @@ LabelDef: String = {