Clean up main.rs some, add preprocessor
* Preprocessor uses an LRPAR generated lexer and a custom parser to filter comments and set defines. Includes and conditional compilation will come next. Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
81
src/main.rs
81
src/main.rs
@@ -5,7 +5,7 @@ mod vm;
|
|||||||
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use snafu::Snafu;
|
use snafu::Snafu;
|
||||||
use std::{fs, io::{stdin, stdout, Write, Read}, process, path::PathBuf};
|
use std::{fs, io::{stdin, stdout, Write, Read}, path::{Path, PathBuf}, process};
|
||||||
|
|
||||||
const DEFAULT_MAX_MEM: usize = 64 * 1024 * 1024;
|
const DEFAULT_MAX_MEM: usize = 64 * 1024 * 1024;
|
||||||
|
|
||||||
@@ -69,39 +69,77 @@ struct Options {
|
|||||||
#[structopt(long, parse(try_from_str = from_bytes_size))]
|
#[structopt(long, parse(try_from_str = from_bytes_size))]
|
||||||
max_mem: Option<usize>,
|
max_mem: Option<usize>,
|
||||||
|
|
||||||
/// Whether to compile the input file to an object.
|
/// The output file for generated files (preprocessed, compiled, etc).
|
||||||
#[structopt(short, long)]
|
|
||||||
compile: bool,
|
|
||||||
|
|
||||||
/// The output file for compiled files.
|
|
||||||
///
|
///
|
||||||
/// If not supplied, a name will be inferred from the input file. Supplying - for the path will
|
/// If not supplied, a name will be inferred from the input file. Supplying - for the path will
|
||||||
/// output to STDOUT.
|
/// output to STDOUT.
|
||||||
#[structopt(short = "o", long)]
|
#[structopt(short = "o", long)]
|
||||||
compile_out: Option<PathBuf>
|
out: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// Only run the preprocessor.
|
||||||
|
#[structopt(short = "E", long)]
|
||||||
|
preprocess_only: bool,
|
||||||
|
|
||||||
|
/// Only compile the input file to an object.
|
||||||
|
#[structopt(short = "c", long)]
|
||||||
|
compile_only: bool,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
||||||
|
|
||||||
|
fn get_reader(path: impl AsRef<Path>) -> Result<Box<dyn Read>> {
|
||||||
|
if let Some("-") = path.as_ref().to_str() {
|
||||||
|
Ok(Box::new(stdin()))
|
||||||
|
} else {
|
||||||
|
Ok(Box::new(fs::File::open(path.as_ref())?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_writer(path: impl AsRef<Path>) -> Result<Box<dyn Write>> {
|
||||||
|
if let Some("-") = path.as_ref().to_str() {
|
||||||
|
Ok(Box::new(stdout()))
|
||||||
|
} else {
|
||||||
|
Ok(Box::new(fs::File::create(path.as_ref())?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
use vm::{
|
use vm::{
|
||||||
state::State,
|
state::State,
|
||||||
obj::{
|
obj::{
|
||||||
assemble::{Asm, Assemble},
|
assemble::{Asm, Assemble},
|
||||||
syn::{lexer, parser},
|
syn::{lexer, parser, preprocessor},
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// get the object either from reading it, or from parsing an assembly file
|
|
||||||
let opt = Options::from_args();
|
let opt = Options::from_args();
|
||||||
let text = if Some("-") == opt.input.to_str() {
|
|
||||||
|
// Load assembler
|
||||||
|
let text = {
|
||||||
|
let mut reader = get_reader(&opt.input)?;
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
let stdin = stdin();
|
reader.read_to_string(&mut text)?;
|
||||||
stdin.lock().read_to_string(&mut text)?;
|
|
||||||
text
|
text
|
||||||
} else {
|
|
||||||
fs::read_to_string(&opt.input)?
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Preprocess
|
||||||
|
let text = {
|
||||||
|
preprocessor::preprocess(&text)?
|
||||||
|
};
|
||||||
|
|
||||||
|
// Preprocess only - exit early
|
||||||
|
if opt.preprocess_only {
|
||||||
|
let outfile = opt.out.clone().unwrap_or_else(|| {
|
||||||
|
let mut outfile = opt.input.clone();
|
||||||
|
assert!(outfile.set_extension("pasm"));
|
||||||
|
outfile
|
||||||
|
});
|
||||||
|
let mut writer = get_writer(&outfile)?;
|
||||||
|
writer.write(&text.as_bytes())?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let lexerdef = lexer::lexerdef();
|
let lexerdef = lexer::lexerdef();
|
||||||
let lexer = lexerdef.lexer(&text);
|
let lexer = lexerdef.lexer(&text);
|
||||||
let (res, errors) = parser::parse(&lexer);
|
let (res, errors) = parser::parse(&lexer);
|
||||||
@@ -113,23 +151,20 @@ fn main() -> Result<()> {
|
|||||||
if !errors.is_empty() {
|
if !errors.is_empty() {
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let res = res.unwrap();
|
let res = res.unwrap();
|
||||||
let mut asm = Asm::default();
|
let mut asm = Asm::default();
|
||||||
let object = res.assemble(&mut asm)?;
|
let object = res.assemble(&mut asm)?;
|
||||||
if opt.compile {
|
|
||||||
let outfile = opt.compile_out.clone().unwrap_or_else(|| {
|
if opt.compile_only {
|
||||||
|
let outfile = opt.out.clone().unwrap_or_else(|| {
|
||||||
let mut outfile = opt.input.clone();
|
let mut outfile = opt.input.clone();
|
||||||
assert!(outfile.set_extension("obj"));
|
assert!(outfile.set_extension("obj"));
|
||||||
outfile
|
outfile
|
||||||
});
|
});
|
||||||
let bytes = object.to_bytes();
|
let bytes = object.to_bytes();
|
||||||
if Some("-") == outfile.to_str() {
|
let mut writer = get_writer(&outfile)?;
|
||||||
let stdout = stdout();
|
writer.write(&bytes)?;
|
||||||
stdout.lock().write(&bytes)?;
|
|
||||||
} else {
|
|
||||||
// write compiled file here
|
|
||||||
fs::write(outfile, &bytes)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
let mut state = State::new();
|
let mut state = State::new();
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
%%
|
%%
|
||||||
|
#define "DEFINE"
|
||||||
\$[0-9]+ "DEC_INT"
|
\$[0-9]+ "DEC_INT"
|
||||||
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
|
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
|
||||||
\$0[Bb][01]+ "BIN_INT"
|
\$0[Bb][01]+ "BIN_INT"
|
||||||
|
|||||||
@@ -1,16 +1,17 @@
|
|||||||
pub mod ast;
|
pub mod ast;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
pub mod preprocessor;
|
||||||
use lrlex::lrlex_mod;
|
|
||||||
use lrpar::lrpar_mod;
|
|
||||||
|
|
||||||
lrlex_mod!("vm/obj/syn/lexer.l");
|
|
||||||
lrpar_mod!("vm/obj/syn/parser.y");
|
|
||||||
|
|
||||||
pub mod parser {
|
pub mod parser {
|
||||||
pub use super::parser_y::*;
|
use lrpar::lrpar_mod;
|
||||||
|
lrpar_mod!("vm/obj/syn/parser.y");
|
||||||
|
|
||||||
|
pub use self::parser_y::*;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod lexer {
|
pub mod lexer {
|
||||||
pub use super::lexer_l::*;
|
use lrlex::lrlex_mod;
|
||||||
|
lrlex_mod!("vm/obj/syn/lexer.l");
|
||||||
|
|
||||||
|
pub use self::lexer_l::*;
|
||||||
}
|
}
|
||||||
|
|||||||
13
src/vm/obj/syn/preprocessor.l
Normal file
13
src/vm/obj/syn/preprocessor.l
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
%%
|
||||||
|
\$[0-9]+ "DEC_INT"
|
||||||
|
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
|
||||||
|
\$0[Bb][01]+ "BIN_INT"
|
||||||
|
#define "DEFINE"
|
||||||
|
%[a-z0-9]+ "REG"
|
||||||
|
\.[a-zA-Z0-9]+ "DIRECTIVE"
|
||||||
|
[a-zA-Z_][a-zA-Z0-9_]* "NAME"
|
||||||
|
;[^\n]* "COMMENT"
|
||||||
|
[ \t]+ "WHITESPACE"
|
||||||
|
\n "EOL"
|
||||||
|
"([^"]|\\[\\nt0"'])*" "STRING"
|
||||||
|
. "OTHER"
|
||||||
142
src/vm/obj/syn/preprocessor.rs
Normal file
142
src/vm/obj/syn/preprocessor.rs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
mod lexer {
|
||||||
|
use lrlex::lrlex_mod;
|
||||||
|
lrlex_mod!("vm/obj/syn/preprocessor.l");
|
||||||
|
|
||||||
|
pub use self::preprocessor_l::*;
|
||||||
|
}
|
||||||
|
|
||||||
|
use lrlex;
|
||||||
|
use lrpar::{self, LexError, Lexer, Span};
|
||||||
|
use std::{collections::HashMap, mem};
|
||||||
|
|
||||||
|
type StorageT = u32;
|
||||||
|
type Lexeme = lrpar::Lexeme<StorageT>;
|
||||||
|
type LexerDef = lrlex::LexerDef<StorageT>;
|
||||||
|
type Result<T, E = LexError> = std::result::Result<T, E>;
|
||||||
|
|
||||||
|
pub fn preprocess(text: &str) -> Result<String> {
|
||||||
|
let lexerdef = lexer::lexerdef();
|
||||||
|
let lexer = lexerdef.lexer(text);
|
||||||
|
Preprocess::new(&lexer).preprocess()
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Preprocess<'t> {
|
||||||
|
names: HashMap<String, String>,
|
||||||
|
lexer: &'t dyn Lexer<StorageT>,
|
||||||
|
tokens: Box<dyn Iterator<Item = Result<Lexeme>> + 't>,
|
||||||
|
curr: Option<Lexeme>,
|
||||||
|
span: Span,
|
||||||
|
out: String,
|
||||||
|
lexer_def: LexerDef,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Preprocess<'t> {
|
||||||
|
pub fn new(lexer: &'t dyn Lexer<StorageT>) -> Self {
|
||||||
|
Preprocess {
|
||||||
|
names: Default::default(),
|
||||||
|
lexer,
|
||||||
|
tokens: lexer.iter(),
|
||||||
|
curr: None,
|
||||||
|
span: Span::new(0, 0),
|
||||||
|
out: String::new(),
|
||||||
|
lexer_def: lexer::lexerdef(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn preprocess(mut self) -> Result<String> {
|
||||||
|
// load the first token in, start parsing
|
||||||
|
self.adv_token()?;
|
||||||
|
|
||||||
|
while let Some(curr) = self.curr.clone() {
|
||||||
|
let rule = self.lexer_def.get_rule_by_id(curr.tok_id());
|
||||||
|
let token_text = self.lexer.span_str(curr.span());
|
||||||
|
match rule.name.as_ref().map(|s| s.as_str()) {
|
||||||
|
Some("DEFINE") => {
|
||||||
|
self.next_define()?;
|
||||||
|
}
|
||||||
|
Some("NAME") => {
|
||||||
|
if let Some(value) = self.names.get(token_text) {
|
||||||
|
self.out += value;
|
||||||
|
} else {
|
||||||
|
self.out += token_text;
|
||||||
|
}
|
||||||
|
self.adv_token()?;
|
||||||
|
}
|
||||||
|
Some("COMMENT") => {
|
||||||
|
// ignore comments
|
||||||
|
self.adv_token()?;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Preserve everything else
|
||||||
|
self.out += token_text;
|
||||||
|
self.adv_token()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(self.out)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_define(&mut self) -> Result<()> {
|
||||||
|
self.expect_token("DEFINE")?;
|
||||||
|
// skip whitespace and comments
|
||||||
|
while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { }
|
||||||
|
|
||||||
|
let name_token = self.expect_token("NAME")?;
|
||||||
|
|
||||||
|
// skip whitespace and comments
|
||||||
|
while self.skip_token("COMMENT")? || self.skip_token("WHITESPACE")? { }
|
||||||
|
|
||||||
|
let start = self.span.start();
|
||||||
|
let mut end = self.span.end();
|
||||||
|
|
||||||
|
while let Some(curr) = self.adv_token()? {
|
||||||
|
if self.rule_name(curr) == Some("EOL") {
|
||||||
|
self.out += "\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
end = self.span.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = self.lexer.span_str(Span::new(start, end)).trim().to_string();
|
||||||
|
let name = self.lexer.span_str(name_token.span()).to_string();
|
||||||
|
self.names.insert(name, value);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rule_name(&self, token: Lexeme) -> Option<&str> {
|
||||||
|
let rule = self.lexer_def.get_rule_by_id(token.tok_id());
|
||||||
|
rule.name.as_ref().map(String::as_str)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expect_token(&mut self, rule_name: &str) -> Result<Lexeme> {
|
||||||
|
self.match_token(rule_name)?
|
||||||
|
.ok_or_else(|| LexError::new(self.span))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_token(&mut self, rule_name: &str) -> Result<bool> {
|
||||||
|
self.match_token(rule_name)
|
||||||
|
.map(|t| t.is_some())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_token(&mut self, rule_name: &str) -> Result<Option<Lexeme>> {
|
||||||
|
let curr = if let Some(curr) = self.curr.clone() {
|
||||||
|
curr
|
||||||
|
} else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
if self.rule_name(curr) == Some(rule_name) {
|
||||||
|
self.adv_token()
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn adv_token(&mut self) -> Result<Option<Lexeme>> {
|
||||||
|
let curr = self.tokens.next().transpose()?;
|
||||||
|
if let Some(curr) = curr.as_ref() {
|
||||||
|
self.span = curr.span();
|
||||||
|
}
|
||||||
|
Ok(mem::replace(&mut self.curr, curr))
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user