diff --git a/build.rs b/build.rs index c05769f..efc0667 100644 --- a/build.rs +++ b/build.rs @@ -10,8 +10,6 @@ fn main() -> Result<(), Box> { LexerBuilder::new() .rule_ids_map(lex_rule_ids_map) .process_file_in_src("vm/obj/syn/lexer.l")?; - LexerBuilder::::new() - .process_file_in_src("vm/obj/syn/preprocessor.l")?; rerun_except(&[ "examples/*.asm", "tests/*.asm", diff --git a/examples/constants.asm b/examples/constants.asm new file mode 100644 index 0000000..b27b870 --- /dev/null +++ b/examples/constants.asm @@ -0,0 +1,7 @@ +.section data $0x1000 { + dead: .u16 $0xDEAD + beef: .u16 $0xBEEF + + .export dead + .export beef +} diff --git a/examples/deadbeef.asm b/examples/deadbeef.asm index f5e32a9..54027be 100644 --- a/examples/deadbeef.asm +++ b/examples/deadbeef.asm @@ -1,14 +1,4 @@ -.section data $0x1000 { - beef: .u16 $0xBEEF - ; TODO(syntax) - ; bytes: .u8 [ - ; $0xEF, - ; $0xBE, - ; $0xAD, - ; $0xDE, - ; ] - .export beef -} +.include "constants.asm" .section code $0x0 { main: @@ -20,7 +10,7 @@ or %r0, %r01 cmpeq %r0, $0xDEADBEEF ; jump to the address 'end' - jz end + jnz end mov %status, $1 end: halt diff --git a/src/main.rs b/src/main.rs index bc08f77..7922937 100644 --- a/src/main.rs +++ b/src/main.rs @@ -107,54 +107,13 @@ fn get_writer(path: impl AsRef) -> Result> { fn main() -> Result<()> { use vm::{ state::State, - obj::{ - assemble::{Asm, Assemble}, - syn::{lexer, parser, preprocessor}, - } + obj::assemble::AsmSession, }; let opt = Options::from_args(); - - // Load assembler - let text = { - let mut reader = get_reader(&opt.input)?; - let mut text = String::new(); - reader.read_to_string(&mut text)?; - text - }; - - // Preprocess - let text = { - preprocessor::preprocess(&text)? - }; - - // Preprocess only - exit early - if opt.preprocess_only { - let outfile = opt.out.clone().unwrap_or_else(|| { - let mut outfile = opt.input.clone(); - assert!(outfile.set_extension("pasm")); - outfile - }); - let mut writer = get_writer(&outfile)?; - writer.write(&text.as_bytes())?; - return Ok(()); - } - - let lexerdef = lexer::lexerdef(); - let lexer = lexerdef.lexer(&text); - let (res, errors) = parser::parse(&lexer); - - // print errors - for err in errors.iter() { - println!("{}", err.pp(&lexer, &parser::token_epp)); - } - if !errors.is_empty() { - process::exit(1); - } - - let res = res.unwrap(); - let mut asm = Asm::default(); - let object = res.assemble(&mut asm)?; + let mut asm_session = AsmSession::default(); + asm_session.include(&opt.input)?; + let object = asm_session.assemble()?; if opt.compile_only { let outfile = opt.out.clone().unwrap_or_else(|| { diff --git a/src/vm/obj/assemble.rs b/src/vm/obj/assemble.rs index 017a4e2..46d5e7f 100644 --- a/src/vm/obj/assemble.rs +++ b/src/vm/obj/assemble.rs @@ -1,17 +1,119 @@ use crate::vm::{ addr::*, inst, - obj::{obj, syn::ast::*}, + obj::{ + obj, + syn::{ast::*, lexer, parser}, + }, }; use byteorder::{WriteBytesExt, LE}; use snafu::Snafu; -use std::collections::HashMap; +use std::{ + collections::{BTreeSet, HashMap}, + path::{Path, PathBuf}, +}; + +pub type LexError = lrpar::LexError; +pub type ParseError = lrpar::ParseError; +pub type LexParseError = lrpar::LexParseError; pub trait Assemble { type Out; fn assemble(&self, asm: &mut Asm) -> Result; } +/// A shared session for the assembler. +#[derive(Debug, Default)] +pub struct AsmSession { + include_paths: Vec, + included_files: BTreeSet, + include_stack: Vec, + directives: Vec, +} + +impl AsmSession { + pub fn assemble(self) -> Result { + let mut asm = Asm::default(); + self.directives.assemble(&mut asm) + } + + pub fn include(&mut self, path: impl AsRef) -> Result<()> { + let path = path.as_ref(); + let path = path.canonicalize() + .map_err(|_| AssembleError::BadPath { path: path.to_path_buf() })?; + if self.include_paths.contains(&path) { + return Ok(()); + } + + let text = std::fs::read_to_string(&path) + .map_err(|_| AssembleError::BadPath { path: path.to_path_buf() })?; + + // Add file to included paths and the path stack + self.included_files.insert(path.clone()); + self.include_stack.push(path); + + self.include_text(&text)?; + + self.include_stack.pop(); + + Ok(()) + } + + pub fn include_stack(&self) -> &Vec { + &self.include_stack + } + + pub fn current_include_path(&self) -> Option<&Path> { + self.include_stack.last().map(PathBuf::as_path) + } + + fn include_text(&mut self, text: &str) -> Result<()> { + let lexerdef = lexer::lexerdef(); + let lexer = lexerdef.lexer(&text); + let (res, errors) = parser::parse(&lexer); + + if !errors.is_empty() { + return Err(AssembleError::Syntax { + source: errors.into(), + }); + } + + let ast = res.unwrap(); + self.include_ast(ast) + } + + fn include_ast(&mut self, ast: Vec) -> Result<()> { + for dir in ast.iter() { + if let Directive::Include(path) = dir { + let path = self.resolve_include_path(path) + .ok_or_else(|| AssembleError::BadPath { path: path.into() })?; + self.include(path)?; + } + } + self.directives.extend(ast); + Ok(()) + } + + pub fn include_paths(&self) -> &Vec { + &self.include_paths + } + + pub fn include_paths_mut(&mut self) -> &mut Vec { + &mut self.include_paths + } + + fn resolve_include_path(&self, path: impl AsRef) -> Option { + let path = path.as_ref(); + self.current_include_path() + .and_then(|last_path| last_path.parent()) + .map(|last_dir| last_dir.join(path)) + .or_else(|| self.include_paths() + .iter() + .filter_map(|include| include.join(path).canonicalize().ok()) + .next()) + } +} + #[derive(Debug, Default)] pub struct Asm { names: Vec>, @@ -19,6 +121,18 @@ pub struct Asm { } impl Asm { + /// Gets an address value from a name, if it exists. Searches local -> global. + fn lookup_name(&self, name: &str) -> Result { + self.names + .iter() + .rev() + .filter_map(|names| names.get(name).copied()) + .next() + .ok_or_else(|| AssembleError::UnknownName { + name: name.to_string(), + }) + } + /// Gets all names defined in a data section, their positions, and puts them into a hashmap. fn gather_names(&self, section: &DataSection) -> Result> { let mut names = HashMap::new(); @@ -40,27 +154,15 @@ impl Asm { assert_eq!(addr, Addr(section.org.start() + (section.len() as u64))); Ok(names) } - - /// Gets an address value from a name, if it exists. Searches local -> global. - fn lookup_name(&self, name: &str) -> Result { - self.names - .iter() - .rev() - .filter_map(|names| names.get(name).copied()) - .next() - .ok_or_else(|| AssembleError::UnknownName { - name: name.to_string(), - }) - } } -impl Assemble for Vec { +impl Assemble for Vec { type Out = obj::Object; fn assemble(&self, asm: &mut Asm) -> Result { // collect globals let mut globals = HashMap::new(); for section in self.iter() { - let section = if let SectionDef::Data(d) = section { + let section = if let Directive::Data(d) = section { d } else { continue; @@ -87,7 +189,7 @@ impl Assemble for Vec { let sections = self .iter() - .map(|section| section.assemble(asm)) + .filter_map(|section| section.assemble(asm).transpose()) .collect::>()?; Ok(obj::Object { version: obj::OBJ_VERSION, @@ -96,13 +198,14 @@ impl Assemble for Vec { } } -impl Assemble for SectionDef { - type Out = obj::Section; +impl Assemble for Directive { + type Out = Option; fn assemble(&self, asm: &mut Asm) -> Result { match self { - SectionDef::Data(section) => Ok(obj::Section::Data(section.assemble(asm)?)), - SectionDef::Meta(section) => section.assemble(asm), + Directive::Data(section) => Ok(Some(obj::Section::Data(section.assemble(asm)?))), + Directive::Meta(section) => section.assemble(asm).map(Some), + Directive::Include(_) => { Ok(None) } } } } @@ -339,11 +442,13 @@ impl Assemble for Value { Ok(value.0.to_le_bytes().to_vec()) } Value::Here => Ok(asm.pos.0.to_le_bytes().to_vec()), - Value::Addr(v, _) => if let Value::Addr(_, _) = &**v { - // double deref is not allowed - todo!() - } else { - v.assemble(asm) + Value::Addr(v, _) => { + if let Value::Addr(_, _) = &**v { + // double deref is not allowed + todo!() + } else { + v.assemble(asm) + } } } } @@ -352,38 +457,104 @@ impl Assemble for Value { #[derive(Debug, Snafu)] pub enum AssembleError { #[snafu(display("unknown name: {}", name))] - UnknownName { name: String }, + UnknownName { + name: String, + }, #[snafu(display("unknown export name: {}", name))] - UnknownExport { name: String }, + UnknownExport { + name: String, + }, #[snafu(display("duplicate label definition: {}", name))] - DuplicateLabel { name: String }, + DuplicateLabel { + name: String, + }, #[snafu(display("duplicate meta entry name: {}", name))] - DuplicateMetaName { name: String }, + DuplicateMetaName { + name: String, + }, #[snafu(display("illegal meta value for entry name {}: {:?}", name, value))] - IllegalMetaValue { name: String, value: Value }, + IllegalMetaValue { + name: String, + value: Value, + }, #[snafu(display("duplicate exported name: {}", name))] - DuplicateExport { name: String }, + DuplicateExport { + name: String, + }, #[snafu(display("section start ({:#x}) is greater than end ({:#x})", start, end))] - StartGreaterThanEnd { start: u64, end: u64 }, + StartGreaterThanEnd { + start: u64, + end: u64, + }, #[snafu(display( "section end ({:#x}) too short for section content size ({:#x})", section_end, section_size ))] - SectionTooShort { section_end: u64, section_size: u64 }, + SectionTooShort { + section_end: u64, + section_size: u64, + }, #[snafu(display("illegal instruction destination value: {:?}", value))] - IllegalDestValue { value: Value }, + IllegalDestValue { + value: Value, + }, #[snafu(display("deref of a deref value is not allowed"))] - DoubleDeref { value: Value }, + DoubleDeref { + value: Value, + }, + + #[snafu(display("could not read path: {}", path.display()))] + BadPath { + path: PathBuf, + }, + + Syntax { + source: SyntaxError, + }, +} + +#[derive(Debug, Snafu)] +pub enum SyntaxError { + Lex { source: LexError }, + Parse { source: ParseError }, + Multi { errors: Vec }, +} + +impl From for SyntaxError { + fn from(source: LexError) -> Self { + SyntaxError::Lex { source } + } +} + +impl From for SyntaxError { + fn from(source: ParseError) -> Self { + SyntaxError::Parse { source } + } +} + +impl From> for SyntaxError { + fn from(errors: Vec) -> Self { + SyntaxError::Multi { errors } + } +} + +impl From for SyntaxError { + fn from(source: LexParseError) -> Self { + match source { + LexParseError::LexError(e) => SyntaxError::Lex { source: e }, + LexParseError::ParseError(e) => SyntaxError::Parse { source: e }, + } + } } pub type Result = std::result::Result; @@ -395,7 +566,8 @@ mod test { #[test] fn test_inst_len() { let mut asm = Asm::default(); - asm.names.push(vec![("test".to_string(), Addr(0u64))].into_iter().collect()); + asm.names + .push(vec![("test".to_string(), Addr(0u64))].into_iter().collect()); macro_rules! assert_len { ($inst:expr) => {{ diff --git a/src/vm/obj/syn/ast.rs b/src/vm/obj/syn/ast.rs index 6cc4781..3617c95 100644 --- a/src/vm/obj/syn/ast.rs +++ b/src/vm/obj/syn/ast.rs @@ -4,9 +4,10 @@ use crate::vm::{ }; #[derive(Debug, Clone)] -pub enum SectionDef { +pub enum Directive { Meta(MetaSection), Data(DataSection), + Include(String), } #[derive(Debug, Clone)] diff --git a/src/vm/obj/syn/error.rs b/src/vm/obj/syn/error.rs deleted file mode 100644 index 0c53f62..0000000 --- a/src/vm/obj/syn/error.rs +++ /dev/null @@ -1,16 +0,0 @@ -use snafu::Snafu; -//use std::{fmt::Debug, io}; - -#[derive(Debug, Snafu)] -pub enum SyntaxError { - //#[snafu(display("IO error: {}", source))] - //Io { source: io::Error }, - - #[snafu(display("unexpected {}", what))] - Unexpected { what: String }, - - #[snafu(display("expected {}, but got {} instead", expected, got))] - ExpectedGot { expected: String, got: String }, -} - -pub type Result = std::result::Result; diff --git a/src/vm/obj/syn/lexer.l b/src/vm/obj/syn/lexer.l index 21633ca..1f1c497 100644 --- a/src/vm/obj/syn/lexer.l +++ b/src/vm/obj/syn/lexer.l @@ -1,11 +1,11 @@ %% -#define "DEFINE" \$[0-9]+ "DEC_INT" \$0[Xx][0-9a-fA-F]+ "HEX_INT" \$0[Bb][01]+ "BIN_INT" \.meta "DIR_META" \.section "DIR_SECTION" \.export "DIR_EXPORT" +\.include "DIR_INCLUDE" \( "LPAREN" \) "RPAREN" \{ "LBRACE" diff --git a/src/vm/obj/syn/mod.rs b/src/vm/obj/syn/mod.rs index 1957d45..033263a 100644 --- a/src/vm/obj/syn/mod.rs +++ b/src/vm/obj/syn/mod.rs @@ -1,6 +1,4 @@ pub mod ast; -pub mod error; -pub mod preprocessor; pub mod parser { use lrpar::lrpar_mod; diff --git a/src/vm/obj/syn/parser.y b/src/vm/obj/syn/parser.y index fc7491e..eb313c3 100644 --- a/src/vm/obj/syn/parser.y +++ b/src/vm/obj/syn/parser.y @@ -1,20 +1,21 @@ -%start SectionDefs +%start Top %% -SectionDefs -> Vec: - SectionDefs SectionDef { $1.push($2); $1 } - | { Vec::new() } +Top -> Vec: + Top Directive { $1.push($2); $1 } + | { Vec::new() } ; -SectionDef -> SectionDef: - 'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { lines: $2 }) } +Directive -> Directive: + 'DIR_META' MetaBlock { Directive::Meta(MetaSection { lines: $2 }) } | 'DIR_SECTION' Name SectionOrg DataBlock { - SectionDef::Data(DataSection { + Directive::Data(DataSection { name: $2, org: $3, lines: $4, }) } + | 'DIR_INCLUDE' String { Directive::Include($2) } ; MetaBlock -> Vec: 'LBRACE' MetaLines 'RBRACE' { $2 }; @@ -158,7 +159,7 @@ use crate::vm::{ fn parse_string(input: &str) -> String { let mut s = String::new(); - let input = &input[1..input.bytes().len() - 2]; + let input = &input[1..input.bytes().len() - 1]; let mut chars = input.chars(); while let Some(c) = chars.next() { if c == '\\' { diff --git a/src/vm/obj/syn/preprocessor.l b/src/vm/obj/syn/preprocessor.l deleted file mode 100644 index 60e9f90..0000000 --- a/src/vm/obj/syn/preprocessor.l +++ /dev/null @@ -1,13 +0,0 @@ -%% -\$[0-9]+ "DEC_INT" -\$0[Xx][0-9a-fA-F]+ "HEX_INT" -\$0[Bb][01]+ "BIN_INT" -#define "DEFINE" -%[a-z0-9]+ "REG" -\.[a-zA-Z0-9]+ "DIRECTIVE" -[a-zA-Z_][a-zA-Z0-9_]* "NAME" -;[^\n]* "COMMENT" -[ \t]+ "WHITESPACE" -\n "EOL" -"([^"]|\\[\\nt0"'])*" "STRING" -. "OTHER" diff --git a/src/vm/obj/syn/preprocessor.rs b/src/vm/obj/syn/preprocessor.rs deleted file mode 100644 index 51fe005..0000000 --- a/src/vm/obj/syn/preprocessor.rs +++ /dev/null @@ -1,142 +0,0 @@ -mod lexer { - use lrlex::lrlex_mod; - lrlex_mod!("vm/obj/syn/preprocessor.l"); - - pub use self::preprocessor_l::*; -} - -use lrlex; -use lrpar::{self, LexError, Lexer, Span}; -use std::{collections::HashMap, mem}; - -type StorageT = u32; -type Lexeme = lrpar::Lexeme; -type LexerDef = lrlex::LexerDef; -type Result = std::result::Result; - -pub fn preprocess(text: &str) -> Result { - let lexerdef = lexer::lexerdef(); - let lexer = lexerdef.lexer(text); - Preprocess::new(&lexer).preprocess() -} - -struct Preprocess<'t> { - names: HashMap, - lexer: &'t dyn Lexer, - tokens: Box> + 't>, - curr: Option, - span: Span, - out: String, - lexer_def: LexerDef, -} - -impl<'t> Preprocess<'t> { - pub fn new(lexer: &'t dyn Lexer) -> Self { - Preprocess { - names: Default::default(), - lexer, - tokens: lexer.iter(), - curr: None, - span: Span::new(0, 0), - out: String::new(), - lexer_def: lexer::lexerdef(), - } - } - - pub fn preprocess(mut self) -> Result { - // load the first token in, start parsing - self.adv_token()?; - - while let Some(curr) = self.curr.clone() { - let rule = self.lexer_def.get_rule_by_id(curr.tok_id()); - let token_text = self.lexer.span_str(curr.span()); - match rule.name.as_ref().map(|s| s.as_str()) { - Some("DEFINE") => { - self.next_define()?; - } - Some("NAME") => { - if let Some(value) = self.names.get(token_text) { - self.out += value; - } else { - self.out += token_text; - } - self.adv_token()?; - } - Some("COMMENT") => { - // ignore comments - self.adv_token()?; - } - _ => { - // Preserve everything else - self.out += token_text; - self.adv_token()?; - } - } - } - Ok(self.out) - } - - fn next_define(&mut self) -> Result<()> { - self.expect_token("DEFINE")?; - // skip whitespace and comments - while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { } - - let name_token = self.expect_token("NAME")?; - - // skip whitespace and comments - while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { } - - let start = self.span.start(); - let mut end = self.span.end(); - - while let Some(curr) = self.adv_token()? { - if self.rule_name(curr) == Some("EOL") { - self.out += "\n"; - break; - } - end = self.span.end(); - } - - let value = self.lexer.span_str(Span::new(start, end)).trim().to_string(); - let name = self.lexer.span_str(name_token.span()).to_string(); - self.names.insert(name, value); - - Ok(()) - } - - fn rule_name(&self, token: Lexeme) -> Option<&str> { - let rule = self.lexer_def.get_rule_by_id(token.tok_id()); - rule.name.as_ref().map(String::as_str) - } - - fn expect_token(&mut self, rule_name: &str) -> Result { - self.match_token(rule_name)? - .ok_or_else(|| LexError::new(self.span)) - } - - fn skip_token(&mut self, rule_name: &str) -> Result { - self.match_token(rule_name) - .map(|t| t.is_some()) - } - - fn match_token(&mut self, rule_name: &str) -> Result> { - let curr = if let Some(curr) = self.curr.clone() { - curr - } else { - return Ok(None); - }; - if self.rule_name(curr) == Some(rule_name) { - self.adv_token() - } else { - Ok(None) - } - } - - fn adv_token(&mut self) -> Result> { - let curr = self.tokens.next().transpose()?; - if let Some(curr) = curr.as_ref() { - self.span = curr.span(); - } - Ok(mem::replace(&mut self.curr, curr)) - } -}