diff --git a/src/main.rs b/src/main.rs index a2197f9..bc08f77 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ mod vm; use structopt::StructOpt; use snafu::Snafu; -use std::{fs, io::{stdin, stdout, Write, Read}, process, path::PathBuf}; +use std::{fs, io::{stdin, stdout, Write, Read}, path::{Path, PathBuf}, process}; const DEFAULT_MAX_MEM: usize = 64 * 1024 * 1024; @@ -69,39 +69,77 @@ struct Options { #[structopt(long, parse(try_from_str = from_bytes_size))] max_mem: Option, - /// Whether to compile the input file to an object. - #[structopt(short, long)] - compile: bool, - - /// The output file for compiled files. + /// The output file for generated files (preprocessed, compiled, etc). /// /// If not supplied, a name will be inferred from the input file. Supplying - for the path will /// output to STDOUT. #[structopt(short = "o", long)] - compile_out: Option + out: Option, + + /// Only run the preprocessor. + #[structopt(short = "E", long)] + preprocess_only: bool, + + /// Only compile the input file to an object. + #[structopt(short = "c", long)] + compile_only: bool, + } type Result = std::result::Result>; +fn get_reader(path: impl AsRef) -> Result> { + if let Some("-") = path.as_ref().to_str() { + Ok(Box::new(stdin())) + } else { + Ok(Box::new(fs::File::open(path.as_ref())?)) + } +} + +fn get_writer(path: impl AsRef) -> Result> { + if let Some("-") = path.as_ref().to_str() { + Ok(Box::new(stdout())) + } else { + Ok(Box::new(fs::File::create(path.as_ref())?)) + } +} + fn main() -> Result<()> { use vm::{ state::State, obj::{ assemble::{Asm, Assemble}, - syn::{lexer, parser}, + syn::{lexer, parser, preprocessor}, } }; - // get the object either from reading it, or from parsing an assembly file let opt = Options::from_args(); - let text = if Some("-") == opt.input.to_str() { + + // Load assembler + let text = { + let mut reader = get_reader(&opt.input)?; let mut text = String::new(); - let stdin = stdin(); - stdin.lock().read_to_string(&mut text)?; + reader.read_to_string(&mut text)?; text - } else { - fs::read_to_string(&opt.input)? }; + + // Preprocess + let text = { + preprocessor::preprocess(&text)? + }; + + // Preprocess only - exit early + if opt.preprocess_only { + let outfile = opt.out.clone().unwrap_or_else(|| { + let mut outfile = opt.input.clone(); + assert!(outfile.set_extension("pasm")); + outfile + }); + let mut writer = get_writer(&outfile)?; + writer.write(&text.as_bytes())?; + return Ok(()); + } + let lexerdef = lexer::lexerdef(); let lexer = lexerdef.lexer(&text); let (res, errors) = parser::parse(&lexer); @@ -113,23 +151,20 @@ fn main() -> Result<()> { if !errors.is_empty() { process::exit(1); } + let res = res.unwrap(); let mut asm = Asm::default(); let object = res.assemble(&mut asm)?; - if opt.compile { - let outfile = opt.compile_out.clone().unwrap_or_else(|| { + + if opt.compile_only { + let outfile = opt.out.clone().unwrap_or_else(|| { let mut outfile = opt.input.clone(); assert!(outfile.set_extension("obj")); outfile }); let bytes = object.to_bytes(); - if Some("-") == outfile.to_str() { - let stdout = stdout(); - stdout.lock().write(&bytes)?; - } else { - // write compiled file here - fs::write(outfile, &bytes)?; - } + let mut writer = get_writer(&outfile)?; + writer.write(&bytes)?; Ok(()) } else { let mut state = State::new(); diff --git a/src/vm/obj/syn/lexer.l b/src/vm/obj/syn/lexer.l index 25d7618..21633ca 100644 --- a/src/vm/obj/syn/lexer.l +++ b/src/vm/obj/syn/lexer.l @@ -1,4 +1,5 @@ %% +#define "DEFINE" \$[0-9]+ "DEC_INT" \$0[Xx][0-9a-fA-F]+ "HEX_INT" \$0[Bb][01]+ "BIN_INT" diff --git a/src/vm/obj/syn/mod.rs b/src/vm/obj/syn/mod.rs index 3c9eb9c..1957d45 100644 --- a/src/vm/obj/syn/mod.rs +++ b/src/vm/obj/syn/mod.rs @@ -1,16 +1,17 @@ pub mod ast; pub mod error; - -use lrlex::lrlex_mod; -use lrpar::lrpar_mod; - -lrlex_mod!("vm/obj/syn/lexer.l"); -lrpar_mod!("vm/obj/syn/parser.y"); +pub mod preprocessor; pub mod parser { - pub use super::parser_y::*; + use lrpar::lrpar_mod; + lrpar_mod!("vm/obj/syn/parser.y"); + + pub use self::parser_y::*; } pub mod lexer { - pub use super::lexer_l::*; + use lrlex::lrlex_mod; + lrlex_mod!("vm/obj/syn/lexer.l"); + + pub use self::lexer_l::*; } diff --git a/src/vm/obj/syn/preprocessor.l b/src/vm/obj/syn/preprocessor.l new file mode 100644 index 0000000..60e9f90 --- /dev/null +++ b/src/vm/obj/syn/preprocessor.l @@ -0,0 +1,13 @@ +%% +\$[0-9]+ "DEC_INT" +\$0[Xx][0-9a-fA-F]+ "HEX_INT" +\$0[Bb][01]+ "BIN_INT" +#define "DEFINE" +%[a-z0-9]+ "REG" +\.[a-zA-Z0-9]+ "DIRECTIVE" +[a-zA-Z_][a-zA-Z0-9_]* "NAME" +;[^\n]* "COMMENT" +[ \t]+ "WHITESPACE" +\n "EOL" +"([^"]|\\[\\nt0"'])*" "STRING" +. "OTHER" diff --git a/src/vm/obj/syn/preprocessor.rs b/src/vm/obj/syn/preprocessor.rs new file mode 100644 index 0000000..51fe005 --- /dev/null +++ b/src/vm/obj/syn/preprocessor.rs @@ -0,0 +1,142 @@ +mod lexer { + use lrlex::lrlex_mod; + lrlex_mod!("vm/obj/syn/preprocessor.l"); + + pub use self::preprocessor_l::*; +} + +use lrlex; +use lrpar::{self, LexError, Lexer, Span}; +use std::{collections::HashMap, mem}; + +type StorageT = u32; +type Lexeme = lrpar::Lexeme; +type LexerDef = lrlex::LexerDef; +type Result = std::result::Result; + +pub fn preprocess(text: &str) -> Result { + let lexerdef = lexer::lexerdef(); + let lexer = lexerdef.lexer(text); + Preprocess::new(&lexer).preprocess() +} + +struct Preprocess<'t> { + names: HashMap, + lexer: &'t dyn Lexer, + tokens: Box> + 't>, + curr: Option, + span: Span, + out: String, + lexer_def: LexerDef, +} + +impl<'t> Preprocess<'t> { + pub fn new(lexer: &'t dyn Lexer) -> Self { + Preprocess { + names: Default::default(), + lexer, + tokens: lexer.iter(), + curr: None, + span: Span::new(0, 0), + out: String::new(), + lexer_def: lexer::lexerdef(), + } + } + + pub fn preprocess(mut self) -> Result { + // load the first token in, start parsing + self.adv_token()?; + + while let Some(curr) = self.curr.clone() { + let rule = self.lexer_def.get_rule_by_id(curr.tok_id()); + let token_text = self.lexer.span_str(curr.span()); + match rule.name.as_ref().map(|s| s.as_str()) { + Some("DEFINE") => { + self.next_define()?; + } + Some("NAME") => { + if let Some(value) = self.names.get(token_text) { + self.out += value; + } else { + self.out += token_text; + } + self.adv_token()?; + } + Some("COMMENT") => { + // ignore comments + self.adv_token()?; + } + _ => { + // Preserve everything else + self.out += token_text; + self.adv_token()?; + } + } + } + Ok(self.out) + } + + fn next_define(&mut self) -> Result<()> { + self.expect_token("DEFINE")?; + // skip whitespace and comments + while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { } + + let name_token = self.expect_token("NAME")?; + + // skip whitespace and comments + while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { } + + let start = self.span.start(); + let mut end = self.span.end(); + + while let Some(curr) = self.adv_token()? { + if self.rule_name(curr) == Some("EOL") { + self.out += "\n"; + break; + } + end = self.span.end(); + } + + let value = self.lexer.span_str(Span::new(start, end)).trim().to_string(); + let name = self.lexer.span_str(name_token.span()).to_string(); + self.names.insert(name, value); + + Ok(()) + } + + fn rule_name(&self, token: Lexeme) -> Option<&str> { + let rule = self.lexer_def.get_rule_by_id(token.tok_id()); + rule.name.as_ref().map(String::as_str) + } + + fn expect_token(&mut self, rule_name: &str) -> Result { + self.match_token(rule_name)? + .ok_or_else(|| LexError::new(self.span)) + } + + fn skip_token(&mut self, rule_name: &str) -> Result { + self.match_token(rule_name) + .map(|t| t.is_some()) + } + + fn match_token(&mut self, rule_name: &str) -> Result> { + let curr = if let Some(curr) = self.curr.clone() { + curr + } else { + return Ok(None); + }; + if self.rule_name(curr) == Some(rule_name) { + self.adv_token() + } else { + Ok(None) + } + } + + fn adv_token(&mut self) -> Result> { + let curr = self.tokens.next().transpose()?; + if let Some(curr) = curr.as_ref() { + self.span = curr.span(); + } + Ok(mem::replace(&mut self.curr, curr)) + } +}