Clean up main.rs some, add preprocessor

* Preprocessor uses an LRPAR generated lexer and a custom parser to filter comments and set defines. Includes and conditional compilation will come next. Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2020-02-26 14:24:30 -05:00
parent 79d37a4e7b
commit 381a59f5fe
5 changed files with 223 additions and 31 deletions
--- a/src/main.rs
+++ b/src/main.rs
@@ -5,7 +5,7 @@ mod vm;
 use structopt::StructOpt;
 use snafu::Snafu;
-use std::{fs, io::{stdin, stdout, Write, Read}, process, path::PathBuf};
+use std::{fs, io::{stdin, stdout, Write, Read}, path::{Path, PathBuf}, process};
 const DEFAULT_MAX_MEM: usize = 64 * 1024 * 1024;
@@ -69,39 +69,77 @@ struct Options {
    #[structopt(long, parse(try_from_str = from_bytes_size))]
    max_mem: Option<usize>,
-    /// Whether to compile the input file to an object.
+    /// The output file for generated files (preprocessed, compiled, etc).
    #[structopt(short, long)]
    compile: bool,
    /// The output file for compiled files.
    ///
    /// If not supplied, a name will be inferred from the input file. Supplying - for the path will
    /// output to STDOUT.
    #[structopt(short = "o", long)]
-    compile_out: Option<PathBuf>
+    out: Option<PathBuf>,
    /// Only run the preprocessor.
    #[structopt(short = "E", long)]
    preprocess_only: bool,
    /// Only compile the input file to an object.
    #[structopt(short = "c", long)]
    compile_only: bool,
 }
 type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
 fn get_reader(path: impl AsRef<Path>) -> Result<Box<dyn Read>> {
    if let Some("-") = path.as_ref().to_str() {
        Ok(Box::new(stdin()))
    } else {
        Ok(Box::new(fs::File::open(path.as_ref())?))
    }
 }
 fn get_writer(path: impl AsRef<Path>) -> Result<Box<dyn Write>> {
    if let Some("-") = path.as_ref().to_str() {
        Ok(Box::new(stdout()))
    } else {
        Ok(Box::new(fs::File::create(path.as_ref())?))
    }
 }
 fn main() -> Result<()> {
    use vm::{
        state::State,
        obj::{
            assemble::{Asm, Assemble},
-            syn::{lexer, parser},
+            syn::{lexer, parser, preprocessor},
        }
    };
    // get the object either from reading it, or from parsing an assembly file
    let opt = Options::from_args();
-    let text = if Some("-") == opt.input.to_str() {
+
    // Load assembler
    let text = {
        let mut reader = get_reader(&opt.input)?;
        let mut text = String::new();
-        let stdin = stdin();
+        reader.read_to_string(&mut text)?;
        stdin.lock().read_to_string(&mut text)?;
        text
    } else {
        fs::read_to_string(&opt.input)?
    };
    // Preprocess
    let text = {
        preprocessor::preprocess(&text)?
    };
    // Preprocess only - exit early
    if opt.preprocess_only {
        let outfile = opt.out.clone().unwrap_or_else(|| {
            let mut outfile = opt.input.clone();
            assert!(outfile.set_extension("pasm"));
            outfile
        });
        let mut writer = get_writer(&outfile)?;
        writer.write(&text.as_bytes())?;
        return Ok(());
    }
    let lexerdef = lexer::lexerdef();
    let lexer = lexerdef.lexer(&text);
    let (res, errors) = parser::parse(&lexer);
@@ -113,23 +151,20 @@ fn main() -> Result<()> {
    if !errors.is_empty() {
        process::exit(1);
    }
    let res = res.unwrap();
    let mut asm = Asm::default();
    let object = res.assemble(&mut asm)?;
-    if opt.compile {
+
-        let outfile = opt.compile_out.clone().unwrap_or_else(|| {
+    if opt.compile_only {
        let outfile = opt.out.clone().unwrap_or_else(|| {
            let mut outfile = opt.input.clone();
            assert!(outfile.set_extension("obj"));
            outfile
        });
        let bytes = object.to_bytes();
-        if Some("-") == outfile.to_str() {
+        let mut writer = get_writer(&outfile)?;
-            let stdout = stdout();
+        writer.write(&bytes)?;
            stdout.lock().write(&bytes)?;
        } else {
            // write compiled file here
            fs::write(outfile, &bytes)?;
        }
        Ok(())
    } else {
        let mut state = State::new();
--- a/src/vm/obj/syn/lexer.l
+++ b/src/vm/obj/syn/lexer.l
@@ -1,4 +1,5 @@
 %%
 #define                             "DEFINE"
 \$[0-9]+                            "DEC_INT"
 \$0[Xx][0-9a-fA-F]+                 "HEX_INT"
 \$0[Bb][01]+                        "BIN_INT"
--- a/src/vm/obj/syn/mod.rs
+++ b/src/vm/obj/syn/mod.rs
@@ -1,16 +1,17 @@
 pub mod ast;
 pub mod error;
-
+pub mod preprocessor;
 use lrlex::lrlex_mod;
 use lrpar::lrpar_mod;
 lrlex_mod!("vm/obj/syn/lexer.l");
 lrpar_mod!("vm/obj/syn/parser.y");
 pub mod parser {
-    pub use super::parser_y::*;
+    use lrpar::lrpar_mod;
    lrpar_mod!("vm/obj/syn/parser.y");
    pub use self::parser_y::*;
 }
 pub mod lexer {
-    pub use super::lexer_l::*;
+    use lrlex::lrlex_mod;
    lrlex_mod!("vm/obj/syn/lexer.l");
    pub use self::lexer_l::*;
 }
--- a/src/vm/obj/syn/preprocessor.l
+++ b/src/vm/obj/syn/preprocessor.l
@@ -0,0 +1,13 @@
 %%
 \$[0-9]+                            "DEC_INT"
 \$0[Xx][0-9a-fA-F]+                 "HEX_INT"
 \$0[Bb][01]+                        "BIN_INT"
 #define                             "DEFINE"
 %[a-z0-9]+                          "REG"
 \.[a-zA-Z0-9]+                      "DIRECTIVE"
 [a-zA-Z_][a-zA-Z0-9_]*              "NAME"
 ;[^\n]*                             "COMMENT"
 [ \t]+                              "WHITESPACE"
 \n                                  "EOL"
 "([^"]|\\[\\nt0"'])*"               "STRING"
 .                                   "OTHER"
--- a/src/vm/obj/syn/preprocessor.rs
+++ b/src/vm/obj/syn/preprocessor.rs
@@ -0,0 +1,142 @@
 mod lexer {
    use lrlex::lrlex_mod;
    lrlex_mod!("vm/obj/syn/preprocessor.l");
    pub use self::preprocessor_l::*;
 }
 use lrlex;
 use lrpar::{self, LexError, Lexer, Span};
 use std::{collections::HashMap, mem};
 type StorageT = u32;
 type Lexeme = lrpar::Lexeme<StorageT>;
 type LexerDef = lrlex::LexerDef<StorageT>;
 type Result<T, E = LexError> = std::result::Result<T, E>;
 pub fn preprocess(text: &str) -> Result<String> {
    let lexerdef = lexer::lexerdef();
    let lexer = lexerdef.lexer(text);
    Preprocess::new(&lexer).preprocess()
 }
 struct Preprocess<'t> {
    names: HashMap<String, String>,
    lexer: &'t dyn Lexer<StorageT>,
    tokens: Box<dyn Iterator<Item = Result<Lexeme>> + 't>,
    curr: Option<Lexeme>,
    span: Span,
    out: String,
    lexer_def: LexerDef,
 }
 impl<'t> Preprocess<'t> {
    pub fn new(lexer: &'t dyn Lexer<StorageT>) -> Self {
        Preprocess {
            names: Default::default(),
            lexer,
            tokens: lexer.iter(),
            curr: None,
            span: Span::new(0, 0),
            out: String::new(),
            lexer_def: lexer::lexerdef(),
        }
    }
    pub fn preprocess(mut self) -> Result<String> {
        // load the first token in, start parsing
        self.adv_token()?;
        while let Some(curr) = self.curr.clone() {
            let rule = self.lexer_def.get_rule_by_id(curr.tok_id());
            let token_text = self.lexer.span_str(curr.span());
            match rule.name.as_ref().map(|s| s.as_str()) {
                Some("DEFINE") => {
                    self.next_define()?;
                }
                Some("NAME") => {
                    if let Some(value) = self.names.get(token_text) {
                        self.out += value;
                    } else {
                        self.out += token_text;
                    }
                    self.adv_token()?;
                }
                Some("COMMENT") => {
                    // ignore comments
                    self.adv_token()?;
                }
                _ => {
                    // Preserve everything else
                    self.out += token_text;
                    self.adv_token()?;
                }
            }
        }
        Ok(self.out)
    }
    fn next_define(&mut self) -> Result<()> {
        self.expect_token("DEFINE")?;
        // skip whitespace and comments
        while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { }
        let name_token = self.expect_token("NAME")?;
        // skip whitespace and comments
        while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { }
        let start = self.span.start();
        let mut end = self.span.end();
        while let Some(curr) = self.adv_token()? {
            if self.rule_name(curr) == Some("EOL") {
                self.out += "\n";
                break;
            }
            end = self.span.end();
        }
        let value = self.lexer.span_str(Span::new(start, end)).trim().to_string();
        let name = self.lexer.span_str(name_token.span()).to_string();
        self.names.insert(name, value);
        Ok(())
    }
    fn rule_name(&self, token: Lexeme) -> Option<&str> {
        let rule = self.lexer_def.get_rule_by_id(token.tok_id());
        rule.name.as_ref().map(String::as_str)
    }
    fn expect_token(&mut self, rule_name: &str) -> Result<Lexeme> {
        self.match_token(rule_name)?
            .ok_or_else(|| LexError::new(self.span))
    }
    fn skip_token(&mut self, rule_name: &str) -> Result<bool> {
        self.match_token(rule_name)
            .map(|t| t.is_some())
    }
    fn match_token(&mut self, rule_name: &str) -> Result<Option<Lexeme>> {
        let curr = if let Some(curr) = self.curr.clone() {
            curr
        } else {
            return Ok(None);
        };
        if self.rule_name(curr) == Some(rule_name) {
            self.adv_token()
        } else {
            Ok(None)
        }
    }
    fn adv_token(&mut self) -> Result<Option<Lexeme>> {
        let curr = self.tokens.next().transpose()?;
        if let Some(curr) = curr.as_ref() {
            self.span = curr.span();
        }
        Ok(mem::replace(&mut self.curr, curr))
    }
 }