Use lrpar for parsing, big 'ol syntax overhaul

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-02-17 16:15:06 -05:00
parent cf9ba376aa
commit 2c4b56e362
23 changed files with 1394 additions and 1494 deletions

View File

@@ -1,165 +1,82 @@
use crate::vm::{inst::*, reg::Reg};
use crate::vm::reg::Reg;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SectionBlock {
Data {
org: SectionOrg,
body: Vec<Line>,
},
Code {
org: SectionOrg,
body: Vec<Line>,
},
Meta {
entries: Vec<(String, ImmValue)>,
},
#[derive(Debug, Clone)]
pub enum SectionDef {
Meta(MetaSection),
Data(DataSection),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone)]
pub struct MetaSection {
pub values: Vec<MetaLine>,
}
#[derive(Debug, Clone)]
pub struct MetaLine {
pub name: String,
pub value: Value,
}
#[derive(Debug, Clone)]
pub struct DataSection {
pub name: String,
pub org: Option<SectionOrg>,
pub lines: Vec<DataLine>,
}
#[derive(Debug, Clone)]
pub enum SectionOrg {
Start(u64),
Range(u64, u64),
StartEnd(u64, u64),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Line {
#[derive(Debug, Clone)]
pub enum DataLine {
ValueDef(ValueDef),
Inst(Inst),
LabelDef(String),
ValueDecl(ValueDecl),
Export(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ImmValue {
Number(u64),
Label(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValueDecl {
U64(u64),
U32(u64),
U16(u64),
U8(u64),
#[derive(Debug, Clone)]
pub enum ValueDef {
Int(u64),
String(String),
ZString(String),
}
impl ValueDecl {
pub fn len(&self) -> usize {
match self {
ValueDecl::U64(_) => 8,
ValueDecl::U32(_) => 4,
ValueDecl::U16(_) => 2,
ValueDecl::U8(_) => 1,
ValueDecl::String(s) => s.as_bytes().len() + 8,
ValueDecl::ZString(s) => s.as_bytes().len() + 1,
}
}
pub fn to_bytes(&self) -> Vec<u8> {
let len = self.len();
let bytes = match self {
ValueDecl::U64(v) => v.to_le_bytes().to_vec(),
ValueDecl::U32(v) => v.to_le_bytes()[0..4].to_vec(),
ValueDecl::U16(v) => v.to_le_bytes()[0..2].to_vec(),
ValueDecl::U8(v) => vec![(v & 0xff) as u8],
ValueDecl::String(s) => {
let mut bytes = Vec::with_capacity(self.len());
bytes.extend(&(s.len() as u64).to_le_bytes());
bytes.extend(s.as_bytes());
bytes
}
ValueDecl::ZString(s) => {
let mut bytes = Vec::with_capacity(self.len());
bytes.extend(s.as_bytes());
bytes.push(0);
bytes
}
};
assert_eq!(bytes.len(), len);
bytes
}
#[derive(Debug, Clone)]
pub enum Value {
Int(u64),
Reg(Reg),
Name(String),
Here,
//Array(Vec<Value>),
//Deref(Value, Size
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone)]
pub enum Inst {
Add(Reg, Reg),
Mul(Reg, Reg),
Div(Reg, Reg),
Mod(Reg, Reg),
INeg(Reg),
And(Reg, Reg),
Or(Reg, Reg),
Inv(Reg),
Not(Reg),
Xor(Reg, Reg),
Shl(Reg, Reg),
Shr(Reg, Reg),
CmpEq(Reg, Reg),
CmpLt(Reg, Reg),
Jmp(Reg),
Jz(Reg),
Jnz(Reg),
Load(Reg, Reg),
Store(Reg, Reg),
StoreImm(Reg, ImmValue),
StoreImm32(Reg, ImmValue),
StoreImm64(Reg, ImmValue),
MemCopy(Reg, Reg),
RegCopy(Reg, Reg),
Nop,
Add(Value, Value),
Sub(Value, Value),
Mul(Value, Value),
Div(Value, Value),
Mod(Value, Value),
And(Value, Value),
Or(Value, Value),
Xor(Value, Value),
Shl(Value, Value),
Shr(Value, Value),
INeg(Value, Value),
Inv(Value, Value),
Not(Value, Value),
CmpEq(Value, Value),
CmpLt(Value, Value),
Jmp(Value),
Jz(Value),
Jnz(Value),
Mov(Value, Value),
Halt,
}
impl Inst {
pub fn op(&self) -> InstOp {
match self {
Inst::Add(_, _) => ADD,
Inst::Mul(_, _) => MUL,
Inst::Div(_, _) => DIV,
Inst::Mod(_, _) => MOD,
Inst::INeg(_) => INEG,
Inst::And(_, _) => AND,
Inst::Or(_, _) => OR,
Inst::Inv(_) => INV,
Inst::Not(_) => NOT,
Inst::Xor(_, _) => XOR,
Inst::Shl(_, _) => SHL,
Inst::Shr(_, _) => SHR,
Inst::CmpEq(_, _) => CMPEQ,
Inst::CmpLt(_, _) => CMPLT,
Inst::Jmp(_) => JMP,
Inst::Jz(_) => JZ,
Inst::Jnz(_) => JNZ,
Inst::Load(_, _) => LOAD,
Inst::Store(_, _) => STORE,
Inst::StoreImm(_, imm) => {
if let ImmValue::Number(imm) = imm {
if *imm > (u32::max_value() as u64) {
STOREIMM64
} else {
STOREIMM32
}
} else {
STOREIMM64
}
}
Inst::StoreImm32(_, _) => STOREIMM32,
Inst::StoreImm64(_, _) => STOREIMM64,
Inst::MemCopy(_, _) => MEMCOPY,
Inst::RegCopy(_, _) => REGCOPY,
Inst::Nop => NOP,
Inst::Halt => HALT,
}
}
pub fn len(&self) -> usize {
inst_len(self.op())
}
Nop,
Dump,
}

View File

@@ -1,44 +1,16 @@
use snafu::Snafu;
use std::{fmt::Debug, io};
//use std::{fmt::Debug, io};
#[derive(Debug, Snafu)]
pub enum ParseError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
pub enum SyntaxError {
//#[snafu(display("IO error: {}", source))]
//Io { source: io::Error },
#[snafu(display("wrong magic number"))]
WrongMagic,
#[snafu(display("unexpected {}", what))]
Unexpected { what: String },
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
UnknownSectionKind { kind: u8 },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
#[snafu(display("expected {}, but got {} instead", expected, got))]
ExpectedGot { expected: String, got: String },
}
macro_rules! into_parse_error {
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;
pub type Result<T, E = SyntaxError> = std::result::Result<T, E>;

50
src/vm/obj/syn/lexer.l Normal file
View File

@@ -0,0 +1,50 @@
%%
\$[0-9]+ "DEC_INT"
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
\$0[Bb][01]+ "BIN_INT"
\.meta "DIR_META"
\.section "DIR_SECTION"
\.export "DIR_EXPORT"
\{ "LBRACE"
\} "RBRACE"
\.\. "DOTDOT"
: "COLON"
, "COMMA"
\$\$ "BUCKBUCK"
[iu](8|16|32|64) "INT_TYPE"
\.[iu](8|16|32|64) "INT_DEF"
\.string "STR_DEF"
\.zstring "ZSTR_DEF"
"([^"]|\\[\\nt0"'])*" "STRING"
add "ADD"
sub "SUB"
mul "MUL"
div "DIV"
mod "MOD"
and "AND"
or "OR"
xor "XOR"
shl "SHL"
shr "SHR"
ineg "INEG"
inv "INV"
not "NOT"
cmpeq "CMPEQ"
cmplt "CMPLT"
jmp "JMP"
jz "JZ"
jnz "JNZ"
mov "MOV"
halt "HALT"
nop "NOP"
dump "DUMP"
%ip "REG_IP"
%sp "REG_SP"
%fp "REG_FP"
%flags "REG_FLAGS"
%null "REG_NULL"
%status "REG_STATUS"
%r[0-9]{1,2} "REG_GENERAL"
[a-zA-Z_][a-zA-Z0-9_]* "NAME"
;[^\n]* ;
[ \n\t]+ ;

View File

@@ -1,16 +1,16 @@
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser, "/vm/obj/syn/parser.rs");
pub mod ast;
pub mod error;
pub fn unescape_string(s: impl AsRef<str>) -> String {
let s = s.as_ref();
s.replace(r"\\", "\\")
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\t", "\t")
.replace("\\0", "\0")
.replace("\\\"", "\"")
use lrlex::lrlex_mod;
use lrpar::lrpar_mod;
lrlex_mod!("vm/obj/syn/lexer.l");
lrpar_mod!("vm/obj/syn/parser.y");
pub mod parser {
pub use super::parser_y::*;
}
pub mod lexer {
pub use super::lexer_l::*;
}

View File

@@ -1,20 +1,13 @@
use std::str::FromStr;
use crate::vm::{
common::Addr,
inst::*,
obj::syn::{unescape_string, ast::*},
reg::*,
};
grammar;
LabelDef: String = {
<Label> ":" => <>
}
ImmValue: ImmValue = {
<Label> => ImmValue::Label(<>),
<Number> => ImmValue::Number(<>),
}
Label: String = {
r"[a-zA-Z_][a-zA-Z0-9_]*" => String::from(<>),
}
@@ -33,45 +26,13 @@ String: String = {
}
Reg: Reg = {
r"%ip" => IP,
r"%sp" => SP,
r"%fp" => FP,
r"%flags" => FLAGS,
r"%null" => NULL,
r"%status" => STATUS,
r"%r[0-9]{2}" => {
let offset = (&<>[2..]).parse::<u8>().unwrap();
let reg = R00 + offset;
assert!(reg < LAST_REG, "invalid register");
reg
}
}
Inst: Inst = {
"add" <d:Reg> "," <s:Reg> => Inst::Add(d, s),
"mul" <d:Reg> "," <s:Reg> => Inst::Mul(d, s),
"div" <d:Reg> "," <s:Reg> =>Inst::Div(d, s),
"mod" <d:Reg> "," <s:Reg> => Inst::Mod(d, s),
"ineg" <d:Reg> => Inst::INeg(d),
"and" <d:Reg> "," <s:Reg> => Inst::And(d, s),
"or" <d:Reg> "," <s:Reg> => Inst::Or(d, s),
"xor" <d:Reg> "," <s:Reg> => Inst::Xor(d, s),
"shl" <d:Reg> "," <s:Reg> => Inst::Shl(d, s),
"shr" <d:Reg> "," <s:Reg> => Inst::Shr(d, s),
"cmpeq" <d:Reg> "," <s:Reg> => Inst::CmpEq(d, s),
"cmplt" <d:Reg> "," <s:Reg> => Inst::CmpLt(d, s),
"jmp" <d:Reg> => Inst::Jmp(d),
"jz" <d:Reg> => Inst::Jz(d),
"jnz" <d:Reg> => Inst::Jnz(d),
"load" <d:Reg> "," <s:Reg> => Inst::Load(d, s),
"store" <d:Reg> "," <s:Reg> => Inst::Store(d, s),
"storeimm" <d:Reg> "," <s:ImmValue> => Inst::StoreImm(d, s),
"storeimm32" <d:Reg> "," <s:ImmValue> => Inst::StoreImm32(d, s),
"storeimm64" <d:Reg> "," <s:ImmValue> => Inst::StoreImm64(d, s),
"memcopy" <d:Reg> "," <s:Reg> => Inst::MemCopy(d, s),
"regcopy" <d:Reg> "," <s:Reg> => Inst::RegCopy(d, s),
"nop" => Inst::Nop,
"halt" => Inst::Halt,
r"%ip" => todo!(),
r"%sp" => todo!(),
r"%fp" => todo!(),
r"%flags" => todo!(),
r"%nil" => todo!(),
r"%status" => todo!(),
r"%r[0-9]{1,2}" => todo!(),
}
ValueDecl: ValueDecl = {
@@ -83,34 +44,13 @@ ValueDecl: ValueDecl = {
r"\.zstring" <String> => ValueDecl::ZString(<>),
}
Line: Line = {
<Inst> => Line::Inst(<>),
<LabelDef> => Line::LabelDef(<>),
<ValueDecl> => Line::ValueDecl(<>),
r"\.export" <Label> => Line::Export(<>),
}
MetaLine: (String, ImmValue) = {
<name:Label> ":" <value:ImmValue> => (name, value),
}
SectionOrg: SectionOrg = {
<start:Number> => SectionOrg::Start(start),
<start:Number> r"\.\." <end:Number> => SectionOrg::Range(start, end),
}
Section: SectionBlock = {
"data" <org:SectionOrg> "{" <body:Line*> "}" => {
SectionBlock::Data { org, body }
},
"code" <org:SectionOrg> "{" <body:Line*> "}" => {
SectionBlock::Code { org, body }
},
"meta" "{" <entries:MetaLine*> "}" => {
SectionBlock::Meta { entries, }
}
}
pub Sections: Vec<SectionBlock> = {

204
src/vm/obj/syn/parser.y Normal file
View File

@@ -0,0 +1,204 @@
%start SectionDefs
%%
SectionDefs -> Vec<SectionDef>:
SectionDefs SectionDef { $1.push($2); $1 }
| { Vec::new() }
;
SectionDef -> SectionDef:
'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { values: $2 }) }
| 'DIR_SECTION' Name MaybeSectionOrg DataBlock {
SectionDef::Data(DataSection {
name: $2,
org: $3,
lines: $4,
})
}
;
MetaBlock -> Vec<MetaLine>: 'LBRACE' MetaLines 'RBRACE' { $2 };
MetaLines -> Vec<MetaLine>:
MetaLines MetaLine { $1.push($2); $1 }
| { Vec::new() }
;
MetaLine -> MetaLine: Name 'COLON' Value { MetaLine { name: $1, value: $3 } };
MaybeSectionOrg -> Option<SectionOrg>:
SectionOrg { Some($1) }
| { None }
;
SectionOrg -> SectionOrg:
Int { SectionOrg::Start($1) }
| Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) }
;
DataBlock -> Vec<DataLine>: 'LBRACE' DataLines 'RBRACE' { $2 };
DataLines -> Vec<DataLine>:
DataLines DataLine { $1.push($2); $1 }
| { Vec::new() }
;
DataLine -> DataLine:
ValueDef { DataLine::ValueDef($1) }
| Inst { DataLine::Inst($1) }
| 'DIR_EXPORT' Name { DataLine::Export($2) }
| Name 'COLON' { DataLine::Label($1) }
;
ValueDef -> ValueDef:
'INT_DEF' Int { ValueDef::Int($2) }
| 'STR_DEF' String { ValueDef::String($2) }
| 'ZSTR_DEF' String { ValueDef::ZString($2) }
;
Value -> Value:
Int { Value::Int($1) }
| Reg { Value::Reg($1) }
| Name { Value::Name($1) }
| 'BUCKBUCK' { Value::Here }
;
Inst -> Inst:
'ADD' Value 'COMMA' Value { Inst::Add($2, $4) }
| 'SUB' Value 'COMMA' Value { Inst::Sub($2, $4) }
| 'MUL' Value 'COMMA' Value { Inst::Mul($2, $4) }
| 'DIV' Value 'COMMA' Value { Inst::Div($2, $4) }
| 'MOD' Value 'COMMA' Value { Inst::Mod($2, $4) }
| 'AND' Value 'COMMA' Value { Inst::And($2, $4) }
| 'OR' Value 'COMMA' Value { Inst::Or($2, $4) }
| 'XOR' Value 'COMMA' Value { Inst::Xor($2, $4) }
| 'SHL' Value 'COMMA' Value { Inst::Shl($2, $4) }
| 'SHR' Value 'COMMA' Value { Inst::Shr($2, $4) }
| 'INEG' Value 'COMMA' Value { Inst::INeg($2, $4) }
| 'INV' Value 'COMMA' Value { Inst::Inv($2, $4) }
| 'NOT' Value 'COMMA' Value { Inst::Not($2, $4) }
| 'CMPEQ' Value 'COMMA' Value { Inst::CmpEq($2, $4) }
| 'CMPLT' Value 'COMMA' Value { Inst::CmpLt($2, $4) }
| 'JMP' Value { Inst::Jmp($2) }
| 'JZ' Value { Inst::Jz($2) }
| 'JNZ' Value { Inst::Jnz($2) }
| 'MOV' Value 'COMMA' Value { Inst::Mov($2, $4) }
| 'HALT' { Inst::Halt }
| 'NOP' { Inst::Nop }
| 'DUMP' { Inst::Dump }
;
Name -> String:
'NAME' {
let v = $1.expect("could not parse name");
$lexer.span_str(v.span()).to_string()
}
;
Int -> u64:
'DEC_INT' {
let span = $1.expect("could not parse dec_int").span();
let s = &$lexer.span_str(span)[1..];
s.parse().unwrap()
}
| 'HEX_INT' {
let span = $1.expect("could not parse hex_int").span();
let s = &$lexer.span_str(span)[3..];
u64::from_str_radix(s, 16).unwrap()
}
| 'BIN_INT' {
let span = $1.expect("could not parse bin_int").span();
let s = &$lexer.span_str(span)[3..];
u64::from_str_radix(s, 2).unwrap()
}
;
Reg -> Reg:
'REG_IP' { IP }
| 'REG_SP' { SP }
| 'REG_FP' { FP }
| 'REG_FLAGS' { FLAGS }
| 'REG_NULL' { NULL }
| 'REG_STATUS' { STATUS }
| 'REG_GENERAL' {
let v = $1.expect("could not parse reg");
parse_reg($lexer.span_str(v.span())).unwrap()
}
;
String -> String:
'STRING' {
let v = $1.expect("could not parse string");
parse_string($lexer.span_str(v.span()))
}
;
%%
use crate::vm::{
obj::syn::ast::*,
reg::*,
};
fn parse_string(input: &str) -> String {
let mut s = String::new();
let input = &input[1..input.bytes().len() - 2];
let mut chars = input.chars();
while let Some(c) = chars.next() {
if c == '\\' {
let next = chars.next().unwrap();
let c = match next {
'\\' => '\\',
'n' => '\n',
't' => '\t',
'"' => '"',
'\'' => '\'',
'0' => '\0',
_ => unreachable!(),
};
s.push(c);
} else {
s.push(c);
}
}
s
}
fn parse_reg(input: &str) -> Option<Reg> {
use regex::Regex;
use lazy_static::lazy_static;
lazy_static! {
static ref REG_RE: Regex = Regex::new(r"^%r([0-9]{1,2})$").unwrap();
}
let captures = REG_RE.captures(input)?;
let reg_no: Reg = captures.get(1)?
.as_str()
.parse()
.unwrap();
let reg = R00 + reg_no;
if reg > R31 {
None
} else {
Some(reg)
}
}
#[cfg(test)]
mod test {
use crate::vm::reg::*;
use super::parse_reg;
#[test]
fn test_parse_reg() {
assert_eq!(parse_reg("%r00"), Some(R00));
assert_eq!(parse_reg("%r0"), Some(R00));
assert_eq!(parse_reg("%r1"), Some(R01));
assert_eq!(parse_reg("%r01"), Some(R01));
assert_eq!(parse_reg("%r31"), Some(R31));
assert_eq!(parse_reg("%r32"), None);
assert_eq!(parse_reg("%r0000"), None);
assert_eq!(parse_reg("%r9"), Some(R09));
assert_eq!(parse_reg("%r"), None);
assert_eq!(parse_reg("%r12"), Some(R12));
}
}