From a4f289fb535a583e171749af9b30048834f40e08 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Tue, 5 May 2020 16:38:07 -0400 Subject: [PATCH] Add more expressions to parser, add EOL and newline tokens * Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff --- src/syn/ast.rs | 15 ++- src/syn/lexer.rs | 16 ++- src/syn/op.rs | 10 ++ src/syn/parser.rs | 249 ++++++++++++++++++++++++++++++++++++++++++---- src/syn/span.rs | 2 + src/syn/token.rs | 4 + 6 files changed, 274 insertions(+), 22 deletions(-) diff --git a/src/syn/ast.rs b/src/syn/ast.rs index 9dd7337..9ad1fdb 100644 --- a/src/syn/ast.rs +++ b/src/syn/ast.rs @@ -3,10 +3,23 @@ use derivative::Derivative; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Stmt { - Assign(Expr, Expr), + Assign(AssignStmt), Expr(Expr), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AssignStmt { + pub lhs: Expr, + pub rhs: Expr, + pub span: Span, +} + +impl Spanned for AssignStmt { + fn span(&self) -> Span { + self.span + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum Expr { Base(BaseExpr), diff --git a/src/syn/lexer.rs b/src/syn/lexer.rs index 335404b..ca34870 100644 --- a/src/syn/lexer.rs +++ b/src/syn/lexer.rs @@ -46,7 +46,7 @@ impl<'t> Lexer<'t> { fn skip_whitespace(&mut self) { while let Some(c) = self.curr_char() { - if !c.is_whitespace() { + if !c.is_whitespace() || c == '\n' { break; } else { self.adv_char(); @@ -96,6 +96,8 @@ impl<'t> Lexer<'t> { |(?P\{) |(?P\}) |(?P,) + |(?P;) + |(?P\n) "#).ignore_whitespace(true) .build() .unwrap(); @@ -133,6 +135,8 @@ impl<'t> Lexer<'t> { ("gt", TokenKind::Gt), ("eq", TokenKind::Eq), + ("eol", TokenKind::Eol), + ("newline", TokenKind::Newline), ]; self.skip_whitespace(); @@ -206,7 +210,7 @@ mod test { assert!(matches!(lexer.next_token(), Ok(None))); assert!(lexer.is_eof()); - let mut lexer = Lexer::new(" \n \n \n\r\n\t "); + let mut lexer = Lexer::new(" \t \r \r\r\t\t "); assert!(matches!(lexer.next_token(), Ok(None))); assert!(lexer.is_eof()); } @@ -263,6 +267,14 @@ mod test { ); } + #[test] + fn test_eol() { + test_token!("\n;", + TokenKind::Newline, "\n", + TokenKind::Eol, ";" + ); + } + #[test] fn test_symbols() { test_token!("(", TokenKind::LParen); diff --git a/src/syn/op.rs b/src/syn/op.rs index 6f53a28..0283e85 100644 --- a/src/syn/op.rs +++ b/src/syn/op.rs @@ -30,6 +30,11 @@ pub enum BinOp { Minus, Times, Div, + EqEq, + Lt, + Gt, + LtEq, + GtEq, } impl From for BinOp { @@ -39,6 +44,11 @@ impl From for BinOp { TokenKind::Minus => BinOp::Minus, TokenKind::Splat => BinOp::Times, TokenKind::FSlash => BinOp::Div, + TokenKind::EqEq => BinOp::EqEq, + TokenKind::LtEq => BinOp::LtEq, + TokenKind::GtEq => BinOp::GtEq, + TokenKind::Lt => BinOp::Lt, + TokenKind::Gt => BinOp::Gt, _ => panic!("{:?} cannot be converted to a binop", other), } } diff --git a/src/syn/parser.rs b/src/syn/parser.rs index da4a511..59d1041 100644 --- a/src/syn/parser.rs +++ b/src/syn/parser.rs @@ -37,6 +37,28 @@ impl<'t> Parser<'t> { pub fn pos(&self) -> Pos { self.span().start } + + pub fn set_skip_newlines(&mut self, skip: bool) -> Result { + let prev = mem::replace(&mut self.skip_newlines, skip); + match self.skip_newlines() { + Ok(()) => Ok(prev), + Err(e) => { + self.skip_newlines = prev; + Err(e) + } + } + } + + pub fn is_skip_newlines(&self) -> bool { + self.skip_newlines + } + + fn skip_newlines(&mut self) -> Result<()> { + if self.is_skip_newlines() && self.is_token_kind(TokenKind::Newline) { + self.adv_token()?; + } + Ok(()) + } } //////////////////////////////////////////////////////////////////////////////// @@ -61,14 +83,28 @@ macro_rules! bin_expr { impl<'t> Parser<'t> { pub fn next_expr(&mut self) -> Result { - self.next_bin_add_expr() + self.next_bin_cmp_expr() } + // == < > <= >= + bin_expr!( + next_bin_cmp_expr, + &[ + TokenKind::EqEq, + TokenKind::Lt, + TokenKind::Gt, + TokenKind::LtEq, + TokenKind::GtEq + ], + next_bin_add_expr + ); + // + - bin_expr!( next_bin_add_expr, &[TokenKind::Plus, TokenKind::Minus], next_bin_mul_expr ); + // * / bin_expr!( next_bin_mul_expr, &[TokenKind::FSlash, TokenKind::Splat], @@ -80,6 +116,7 @@ impl<'t> Parser<'t> { let start = un_op.span(); let expr = self.next_un_expr()?; let end = expr.span(); + Ok(UnExpr { op: un_op.kind().into(), expr, @@ -116,7 +153,9 @@ impl<'t> Parser<'t> { } .into(), TokenKind::LBracket => { + let prev_skip = self.set_skip_newlines(true)?; let expr_list = self.next_expr_list(TokenKind::RBracket)?; + self.set_skip_newlines(prev_skip)?; let end_token = self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?; let span = token.span().union(end_token.span()); @@ -128,10 +167,14 @@ impl<'t> Parser<'t> { } TokenKind::LBrace => todo!(), TokenKind::LParen => { + let prev_skip = self.set_skip_newlines(true)?; let first = self.next_expr()?; if let Some(_) = self.match_token_kind(TokenKind::Comma)? { let mut list = self.next_expr_list(TokenKind::RParen)?; + + self.set_skip_newlines(prev_skip)?; let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?; + let span = first.span().union(end_token.span()); list.insert(0, first); Expr::Base( @@ -142,6 +185,7 @@ impl<'t> Parser<'t> { .into(), ) } else { + self.set_skip_newlines(prev_skip)?; self.expect_token_kind(TokenKind::RParen, "end of expression")?; first } @@ -173,7 +217,21 @@ impl<'t> Parser<'t> { //////////////////////////////////////////////////////////////////////////////// fn adv_token(&mut self) -> Result> { - let next_token = self.lexer.next_token()?; + let mut next_token = self.lexer.next_token()?; + + if self.is_skip_newlines() { + while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) { + next_token = self.lexer.next_token()?; + } + + if self.is_token_kind(TokenKind::Newline) { + self.curr_token = next_token; + while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) { + next_token = self.lexer.next_token()?; + } + } + } + Ok(mem::replace(&mut self.curr_token, next_token)) } @@ -317,9 +375,8 @@ mod test { } macro_rules! test_parser { - ($text:expr, $($tail:tt)+) => {{ - let mut parser = Parser::try_from($text).unwrap(); - + ($parser:expr, $($tail:tt)+) => {{ + let mut parser = $parser; test_parser!(@TAIL, &mut parser, $($tail)+); }}; @@ -332,8 +389,12 @@ mod test { test_parser!(@TAIL, $parser, $($tail)*); }}; - (@TAIL, $parser:expr) => {}; - (@TAIL, $parser:expr,) => {}; + (@TAIL, $parser:expr) => { + assert!($parser.is_eof()); + }; + (@TAIL, $parser:expr,) => { + assert!($parser.is_eof()); + }; } fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr { @@ -369,7 +430,7 @@ mod test { #[test] fn test_base_expr() { test_parser!( - "1 x 'value' :sym", + Parser::try_from("1 x 'value' :sym").unwrap(), // 1 next_expr, base_expr(BaseExprKind::Num), @@ -383,9 +444,12 @@ mod test { next_expr, base_expr(BaseExprKind::Sym) ); + } + #[test] + fn test_list_expr() { test_parser!( - "[1, x, 'value', :sym]", + Parser::try_from("[1, x, 'value', :sym]").unwrap(), next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), @@ -396,7 +460,7 @@ mod test { ); test_parser!( - "[1, x, 'value', :sym,]", + Parser::try_from("[1, x, 'value', :sym,]").unwrap(), next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), @@ -408,15 +472,49 @@ mod test { } #[test] - fn test_compound_expr() { + fn test_tuple_expr() { test_parser!( - r#" - x + 2 - 1 + 2 * 3 - 4 / 5 - 1+2*3-4/5 - 1 - -1 * 8 - 1--1*8 - "#, + Parser::try_from("(1, :sym)").unwrap(), + next_expr, + base_expr(BaseExprKind::Tuple(vec![ + base_expr(BaseExprKind::Num), + base_expr(BaseExprKind::Sym), + ])) + ); + + test_parser!( + Parser::try_from("(:sym,)").unwrap(), + next_expr, + base_expr(BaseExprKind::Tuple(vec![ + base_expr(BaseExprKind::Sym), + ])) + ); + + test_parser!( + Parser::try_from("(:sym)").unwrap(), + next_expr, + base_expr(BaseExprKind::Sym) + ); + } + + #[test] + fn test_bin_expr() { + test_parser!( + { + let mut parser = Parser::try_from( + r#"x + 2 + 1 + 2 * 3 - 4 / 5 + 1+2*3-4/5 + 1 - -1 * 8 + 1--1*8 + 1 + 1 == 2 + 3 >= 1 + 2 + "#, + ) + .unwrap(); + parser.set_skip_newlines(true).unwrap(); + parser + }, // x + 2 next_expr, bin_expr( @@ -484,9 +582,122 @@ mod test { BinOp::Times, base_expr(BaseExprKind::Num) ) + ), + // 1 + 1 == 2 + next_expr, + bin_expr( + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::Plus, + base_expr(BaseExprKind::Num) + ), + BinOp::EqEq, + base_expr(BaseExprKind::Num) + ), + // 3 >= 1 + 2 + next_expr, + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::GtEq, + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::Plus, + base_expr(BaseExprKind::Num) + ) + ) + ); + } + + #[test] + fn test_multiline_exprs() { + test_parser!( + Parser::try_from(r"[ + 1, + x, + ['value', :value], + :sym + ]").unwrap(), + next_expr, + base_expr(BaseExprKind::List(vec![ + base_expr(BaseExprKind::Num), + base_expr(BaseExprKind::Ident), + base_expr(BaseExprKind::List(vec![ + base_expr(BaseExprKind::Str), + base_expr(BaseExprKind::Sym), + ])), + base_expr(BaseExprKind::Sym), + ])) + ); + + test_parser!( + Parser::try_from(r"( + 1, + x, + [ + 'value', + :value, + ], + :sym + )").unwrap(), + next_expr, + base_expr(BaseExprKind::Tuple(vec![ + base_expr(BaseExprKind::Num), + base_expr(BaseExprKind::Ident), + base_expr(BaseExprKind::List(vec![ + base_expr(BaseExprKind::Str), + base_expr(BaseExprKind::Sym), + ])), + base_expr(BaseExprKind::Sym), + ])) + ); + + test_parser!( + Parser::try_from(r"( + + 1 + + 2 + + 3 + + 4 + )").unwrap(), + next_expr, + bin_expr( + un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)), + BinOp::Plus, + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::Plus, + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::Plus, + base_expr(BaseExprKind::Num) + ) + ) ) ); - // + test_parser!( + Parser::try_from(r"( + + 1 + + 2 + + 3 + + 4 + , + )").unwrap(), + next_expr, + base_expr(BaseExprKind::Tuple(vec![ + bin_expr( + un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)), + BinOp::Plus, + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::Plus, + bin_expr( + base_expr(BaseExprKind::Num), + BinOp::Plus, + base_expr(BaseExprKind::Num) + ) + ) + ) + ])) + ); } } diff --git a/src/syn/span.rs b/src/syn/span.rs index 23c4c00..a024da3 100644 --- a/src/syn/span.rs +++ b/src/syn/span.rs @@ -120,6 +120,7 @@ impl Spanned for Span { } } +/* pub struct Sourced<'t, T: Spanned> { text: &'t str, inner: T, @@ -136,6 +137,7 @@ impl Spanned for Sourced<'_, T> { self.inner.span() } } +*/ #[cfg(test)] mod test { diff --git a/src/syn/token.rs b/src/syn/token.rs index 866a994..3fcc26d 100644 --- a/src/syn/token.rs +++ b/src/syn/token.rs @@ -31,6 +31,8 @@ pub enum TokenKind { Splat, FSlash, Bang, + Eol, + Newline, } impl Display for TokenKind { @@ -65,6 +67,8 @@ impl Display for TokenKind { Splat => "splat (or times)", FSlash => "fslash (or divide)", Bang => "not", + Eol => "line end", + Newline => "newline", }; Display::fmt(s, fmt) }