Add expression parsing

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-05-02 18:42:01 -04:00
parent 28d29c2270
commit d9edf21d16
9 changed files with 542 additions and 101 deletions

View File

@@ -1,8 +1,7 @@
#![allow(dead_code)]
use crate::syn::{ast::*, error::*, lexer::Lexer, span::*, token::*};
use crate::syn::{ast::*, error::*, lexer::Lexer, op::*, span::*, token::*};
use std::{convert::TryFrom, mem};
const EXPR_START: &'static [TokenKind] = &[
const BASE_EXPR_START: &[TokenKind] = &[
TokenKind::Ident,
TokenKind::Num,
TokenKind::Str,
@@ -10,19 +9,14 @@ const EXPR_START: &'static [TokenKind] = &[
TokenKind::LParen,
TokenKind::LBracket,
TokenKind::LBrace,
// TODO unary tokens
];
const VALUE_EXPR_START: &'static [TokenKind] = &[
TokenKind::Ident,
TokenKind::Num,
TokenKind::Str,
TokenKind::Sym,
];
const UN_EXPR_START: &[TokenKind] = &[TokenKind::Plus, TokenKind::Minus, TokenKind::Bang];
pub struct Parser<'t> {
lexer: Lexer<'t>,
curr_token: Option<Token>,
skip_newlines: bool,
}
impl<'t> Parser<'t> {
@@ -30,6 +24,7 @@ impl<'t> Parser<'t> {
let mut parser = Parser {
lexer,
curr_token: None,
skip_newlines: false,
};
parser.adv_token()?;
Ok(parser)
@@ -42,27 +37,63 @@ impl<'t> Parser<'t> {
pub fn pos(&self) -> Pos {
self.span().start
}
}
////////////////////////////////////////////////////////////////////////////////
// Parsing functions
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Parsing functions
////////////////////////////////////////////////////////////////////////////////
macro_rules! bin_expr {
($name:ident, $op_tokens:expr, $next:ident) => {
fn $name(&mut self) -> Result<Expr> {
let lhs = self.$next()?;
if let Some(token) = self.match_token_where(|t| $op_tokens.contains(&t.kind()))? {
let op = BinOp::from(token);
let rhs = self.$name()?;
let span = lhs.span().union(rhs.span());
Ok(BinExpr { lhs, op, rhs, span }.into())
} else {
Ok(lhs)
}
}
};
}
impl<'t> Parser<'t> {
pub fn next_expr(&mut self) -> Result<Expr> {
self.next_bin_expr()
self.next_bin_add_expr()
}
fn next_bin_expr(&mut self) -> Result<Expr> {
let lhs = self.next_un_expr()?;
todo!()
}
bin_expr!(
next_bin_add_expr,
&[TokenKind::Plus, TokenKind::Minus],
next_bin_mul_expr
);
bin_expr!(
next_bin_mul_expr,
&[TokenKind::FSlash, TokenKind::Splat],
next_un_expr
);
fn next_un_expr(&mut self) -> Result<Expr> {
todo!()
if let Some(un_op) = self.match_token_where(|t| UN_EXPR_START.contains(&t.kind()))? {
let start = un_op.span();
let expr = self.next_un_expr()?;
let end = expr.span();
Ok(UnExpr {
op: un_op.kind().into(),
expr,
span: start.union(end),
}
.into())
} else {
self.next_base_expr()
}
}
fn next_base_expr(&mut self) -> Result<Expr> {
let token =
self.expect_token_where(|t| VALUE_EXPR_START.contains(&t.kind()), "base expression")?;
self.expect_token_where(|t| BASE_EXPR_START.contains(&t.kind()), "base expression")?;
let expr: Expr = match token.kind() {
TokenKind::Ident => BaseExpr {
kind: BaseExprKind::Ident,
@@ -84,30 +115,57 @@ impl<'t> Parser<'t> {
span: token.span(),
}
.into(),
TokenKind::LBracket => {
let expr_list = self.next_expr_list(TokenKind::RBracket)?;
let end_token =
self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
let span = token.span().union(end_token.span());
BaseExpr {
kind: BaseExprKind::List(expr_list),
span,
}
.into()
}
TokenKind::LBrace => todo!(),
TokenKind::LParen => {
let first = self.next_expr()?;
if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
let mut list = self.next_expr_list(TokenKind::RParen)?;
let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
let span = first.span().union(end_token.span());
list.insert(0, first);
Expr::Base(
BaseExpr {
kind: BaseExprKind::Tuple(list),
span,
}
.into(),
)
} else {
self.expect_token_kind(TokenKind::RParen, "end of expression")?;
first
}
}
_ => unreachable!(),
};
Ok(expr)
}
fn next_list(&mut self) -> Result<Expr> {
let start_token = self.expect_token_where(|t| t.kind() == TokenKind::LBracket, "start of list (left bracket)")?;
/// Gets a list of expressions separated by commas, stopping when EOF or specified end token is
/// reached.
fn next_expr_list(&mut self, stop_before: TokenKind) -> Result<Vec<Expr>> {
let mut list_items = Vec::new();
while ! matches!(self.curr_token.map(|t| t.kind()), Some(TokenKind::RBrace) | None) {
let expr = self.next_expr()?;
while !self.is_token_kind(stop_before) {
list_items.push(self.next_expr()?);
// match a comma, otherwise exit the loop
if self.match_token_kind(TokenKind::Comma)?.is_none() {
break;
}
}
let end_token = self.expect_token_where(
|t| t.kind() == TokenKind::RBracket,
"end of list (right bracket)",
)?;
let expr = BaseExpr {
kind: BaseExprKind::List(list_items),
span: start_token.span().union(end_token.span()),
};
Ok(expr.into())
Ok(list_items)
}
////////////////////////////////////////////////////////////////////////////////
@@ -123,12 +181,35 @@ impl<'t> Parser<'t> {
where
P: Fn(Token) -> bool,
{
match self.curr_token {
Some(curr) if (pred)(curr) => self.adv_token(),
_ => Ok(None),
if self.is_token_match(pred) {
self.adv_token()
} else {
Ok(None)
}
}
fn match_token_kind(&mut self, kind: TokenKind) -> Result<Option<Token>> {
if self.is_token_kind(kind) {
self.adv_token()
} else {
Ok(None)
}
}
fn is_token_match<P>(&self, pred: P) -> bool
where
P: Fn(Token) -> bool,
{
match self.curr_token {
Some(token) => (pred)(token),
None => false,
}
}
fn is_token_kind(&self, kind: TokenKind) -> bool {
self.is_token_match(|t| t.kind() == kind)
}
fn expect_token_where<P>(&mut self, pred: P, expected: impl ToString) -> Result<Token>
where
P: Fn(Token) -> bool,
@@ -143,6 +224,10 @@ impl<'t> Parser<'t> {
pos: self.pos(),
})
}
fn expect_token_kind(&mut self, kind: TokenKind, expected: impl ToString) -> Result<Token> {
self.expect_token_where(|t| t.kind() == kind, expected)
}
}
impl<'t> Spanned for Parser<'t> {
@@ -231,19 +316,177 @@ mod test {
assert!(parser.is_eof());
}
macro_rules! test_parser {
($text:expr, $($tail:tt)+) => {{
let mut parser = Parser::try_from($text).unwrap();
test_parser!(@TAIL, &mut parser, $($tail)+);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr) => {{
assert_eq!($parser.$method().unwrap(), $expected);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr, $($tail:tt)+) => {{
test_parser!(@TAIL, $parser, $method, $expected);
test_parser!(@TAIL, $parser, $($tail)*);
}};
(@TAIL, $parser:expr) => {};
(@TAIL, $parser:expr,) => {};
}
fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
Expr::Bin(
BinExpr {
lhs,
op,
rhs,
span: Default::default(),
}
.into(),
)
}
fn un_expr(op: UnOp, expr: Expr) -> Expr {
Expr::Un(
UnExpr {
op,
expr,
span: Default::default(),
}
.into(),
)
}
fn base_expr(kind: BaseExprKind) -> Expr {
Expr::Base(BaseExpr {
kind,
span: Default::default(),
})
}
#[test]
fn test_base_expr() {
let mut parser = Parser::try_from("1").unwrap();
assert!(matches!(
parser.next_base_expr(),
Ok(
Expr::Base(
BaseExpr {
kind: BaseExprKind::Num,
..
}
test_parser!(
"1 x 'value' :sym",
// 1
next_expr,
base_expr(BaseExprKind::Num),
// x
next_expr,
base_expr(BaseExprKind::Ident),
// 'value'
next_expr,
base_expr(BaseExprKind::Str),
// :sym
next_expr,
base_expr(BaseExprKind::Sym)
);
test_parser!(
"[1, x, 'value', :sym]",
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
"[1, x, 'value', :sym,]",
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
}
#[test]
fn test_compound_expr() {
test_parser!(
r#"
x + 2
1 + 2 * 3 - 4 / 5
1+2*3-4/5
1 - -1 * 8
1--1*8
"#,
// x + 2
next_expr,
bin_expr(
base_expr(BaseExprKind::Ident),
BinOp::Plus,
base_expr(BaseExprKind::Num)
),
// 1 + 2 * 3 - 4 / 5
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
),
//
// 1+2*3-4/5
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
),
// 1 - -1 * 8
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
),
// 1--1*8
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
)
));
);
//
}
}