Add more expressions to parser, add EOL and newline tokens

* Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2020-05-05 16:38:07 -04:00
parent d9edf21d16
commit a4f289fb53
6 changed files with 274 additions and 22 deletions
--- a/src/syn/ast.rs
+++ b/src/syn/ast.rs
@@ -3,10 +3,23 @@ use derivative::Derivative;
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Stmt {
-    Assign(Expr, Expr),
+    Assign(AssignStmt),
    Expr(Expr),
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct AssignStmt {
    pub lhs: Expr,
    pub rhs: Expr,
    pub span: Span,
 }
 impl Spanned for AssignStmt {
    fn span(&self) -> Span {
        self.span
    }
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Expr {
    Base(BaseExpr),
--- a/src/syn/lexer.rs
+++ b/src/syn/lexer.rs
@@ -46,7 +46,7 @@ impl<'t> Lexer<'t> {
    fn skip_whitespace(&mut self) {
        while let Some(c) = self.curr_char() {
-            if !c.is_whitespace() {
+            if !c.is_whitespace() || c == '\n' {
                break;
            } else {
                self.adv_char();
@@ -96,6 +96,8 @@ impl<'t> Lexer<'t> {
            |(?P<lbrace>\{)
            |(?P<rbrace>\})
            |(?P<comma>,)
            |(?P<eol>;)
            |(?P<newline>\n)
            "#).ignore_whitespace(true)
                .build()
                .unwrap();
@@ -133,6 +135,8 @@ impl<'t> Lexer<'t> {
            ("gt", TokenKind::Gt),
            ("eq", TokenKind::Eq),
            ("eol", TokenKind::Eol),
            ("newline", TokenKind::Newline),
        ];
        self.skip_whitespace();
@@ -206,7 +210,7 @@ mod test {
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());
-        let mut lexer = Lexer::new("   \n \n \n\r\n\t    ");
+        let mut lexer = Lexer::new("   \t \r \r\r\t\t    ");
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());
    }
@@ -263,6 +267,14 @@ mod test {
        );
    }
    #[test]
    fn test_eol() {
        test_token!("\n;",
            TokenKind::Newline, "\n",
            TokenKind::Eol, ";"
        );
    }
    #[test]
    fn test_symbols() {
        test_token!("(", TokenKind::LParen);
--- a/src/syn/op.rs
+++ b/src/syn/op.rs
@@ -30,6 +30,11 @@ pub enum BinOp {
    Minus,
    Times,
    Div,
    EqEq,
    Lt,
    Gt,
    LtEq,
    GtEq,
 }
 impl From<TokenKind> for BinOp {
@@ -39,6 +44,11 @@ impl From<TokenKind> for BinOp {
            TokenKind::Minus => BinOp::Minus,
            TokenKind::Splat => BinOp::Times,
            TokenKind::FSlash => BinOp::Div,
            TokenKind::EqEq => BinOp::EqEq,
            TokenKind::LtEq => BinOp::LtEq,
            TokenKind::GtEq => BinOp::GtEq,
            TokenKind::Lt => BinOp::Lt,
            TokenKind::Gt => BinOp::Gt,
            _ => panic!("{:?} cannot be converted to a binop", other),
        }
    }
--- a/src/syn/parser.rs
+++ b/src/syn/parser.rs
@@ -37,6 +37,28 @@ impl<'t> Parser<'t> {
    pub fn pos(&self) -> Pos {
        self.span().start
    }
    pub fn set_skip_newlines(&mut self, skip: bool) -> Result<bool> {
        let prev = mem::replace(&mut self.skip_newlines, skip);
        match self.skip_newlines() {
            Ok(()) => Ok(prev),
            Err(e) => {
                self.skip_newlines = prev;
                Err(e)
            }
        }
    }
    pub fn is_skip_newlines(&self) -> bool {
        self.skip_newlines
    }
    fn skip_newlines(&mut self) -> Result<()> {
        if self.is_skip_newlines() && self.is_token_kind(TokenKind::Newline) {
            self.adv_token()?;
        }
        Ok(())
    }
 }
 ////////////////////////////////////////////////////////////////////////////////
@@ -61,14 +83,28 @@ macro_rules! bin_expr {
 impl<'t> Parser<'t> {
    pub fn next_expr(&mut self) -> Result<Expr> {
-        self.next_bin_add_expr()
+        self.next_bin_cmp_expr()
    }
    // == < > <= >=
    bin_expr!(
        next_bin_cmp_expr,
        &[
            TokenKind::EqEq,
            TokenKind::Lt,
            TokenKind::Gt,
            TokenKind::LtEq,
            TokenKind::GtEq
        ],
        next_bin_add_expr
    );
    // + -
    bin_expr!(
        next_bin_add_expr,
        &[TokenKind::Plus, TokenKind::Minus],
        next_bin_mul_expr
    );
    // * /
    bin_expr!(
        next_bin_mul_expr,
        &[TokenKind::FSlash, TokenKind::Splat],
@@ -80,6 +116,7 @@ impl<'t> Parser<'t> {
            let start = un_op.span();
            let expr = self.next_un_expr()?;
            let end = expr.span();
            Ok(UnExpr {
                op: un_op.kind().into(),
                expr,
@@ -116,7 +153,9 @@ impl<'t> Parser<'t> {
            }
            .into(),
            TokenKind::LBracket => {
                let prev_skip = self.set_skip_newlines(true)?;
                let expr_list = self.next_expr_list(TokenKind::RBracket)?;
                self.set_skip_newlines(prev_skip)?;
                let end_token =
                    self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
                let span = token.span().union(end_token.span());
@@ -128,10 +167,14 @@ impl<'t> Parser<'t> {
            }
            TokenKind::LBrace => todo!(),
            TokenKind::LParen => {
                let prev_skip = self.set_skip_newlines(true)?;
                let first = self.next_expr()?;
                if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
                    let mut list = self.next_expr_list(TokenKind::RParen)?;
                    self.set_skip_newlines(prev_skip)?;
                    let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
                    let span = first.span().union(end_token.span());
                    list.insert(0, first);
                    Expr::Base(
@@ -142,6 +185,7 @@ impl<'t> Parser<'t> {
                        .into(),
                    )
                } else {
                    self.set_skip_newlines(prev_skip)?;
                    self.expect_token_kind(TokenKind::RParen, "end of expression")?;
                    first
                }
@@ -173,7 +217,21 @@ impl<'t> Parser<'t> {
    ////////////////////////////////////////////////////////////////////////////////
    fn adv_token(&mut self) -> Result<Option<Token>> {
-        let next_token = self.lexer.next_token()?;
+        let mut next_token = self.lexer.next_token()?;
        if self.is_skip_newlines() {
            while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) {
                next_token = self.lexer.next_token()?;
            }
            if self.is_token_kind(TokenKind::Newline) {
                self.curr_token = next_token;
                while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) {
                    next_token = self.lexer.next_token()?;
                }
            }
        }
        Ok(mem::replace(&mut self.curr_token, next_token))
    }
@@ -317,9 +375,8 @@ mod test {
    }
    macro_rules! test_parser {
-        ($text:expr, $($tail:tt)+) => {{
+        ($parser:expr, $($tail:tt)+) => {{
-            let mut parser = Parser::try_from($text).unwrap();
+            let mut parser = $parser;
            test_parser!(@TAIL, &mut parser, $($tail)+);
        }};
@@ -332,8 +389,12 @@ mod test {
            test_parser!(@TAIL, $parser, $($tail)*);
        }};
-        (@TAIL, $parser:expr) => {};
+        (@TAIL, $parser:expr) => {
-        (@TAIL, $parser:expr,) => {};
+            assert!($parser.is_eof());
        };
        (@TAIL, $parser:expr,) => {
            assert!($parser.is_eof());
        };
    }
    fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
@@ -369,7 +430,7 @@ mod test {
    #[test]
    fn test_base_expr() {
        test_parser!(
-            "1 x 'value' :sym",
+            Parser::try_from("1 x 'value' :sym").unwrap(),
            // 1
            next_expr,
            base_expr(BaseExprKind::Num),
@@ -383,9 +444,12 @@ mod test {
            next_expr,
            base_expr(BaseExprKind::Sym)
        );
    }
    #[test]
    fn test_list_expr() {
        test_parser!(
-            "[1, x, 'value', :sym]",
+            Parser::try_from("[1, x, 'value', :sym]").unwrap(),
            next_expr,
            base_expr(BaseExprKind::List(vec![
                base_expr(BaseExprKind::Num),
@@ -396,7 +460,7 @@ mod test {
        );
        test_parser!(
-            "[1, x, 'value', :sym,]",
+            Parser::try_from("[1, x, 'value', :sym,]").unwrap(),
            next_expr,
            base_expr(BaseExprKind::List(vec![
                base_expr(BaseExprKind::Num),
@@ -408,15 +472,49 @@ mod test {
    }
    #[test]
-    fn test_compound_expr() {
+    fn test_tuple_expr() {
        test_parser!(
-            r#"
+            Parser::try_from("(1, :sym)").unwrap(),
-            x + 2
+            next_expr,
            base_expr(BaseExprKind::Tuple(vec![
                base_expr(BaseExprKind::Num),
                base_expr(BaseExprKind::Sym),
            ]))
        );
        test_parser!(
            Parser::try_from("(:sym,)").unwrap(),
            next_expr,
            base_expr(BaseExprKind::Tuple(vec![
                base_expr(BaseExprKind::Sym),
            ]))
        );
        test_parser!(
            Parser::try_from("(:sym)").unwrap(),
            next_expr,
            base_expr(BaseExprKind::Sym)
        );
    }
    #[test]
    fn test_bin_expr() {
        test_parser!(
            {
                let mut parser = Parser::try_from(
                    r#"x + 2
                1 + 2 * 3 - 4 / 5
                1+2*3-4/5
                1 - -1 * 8
                1--1*8
                1 + 1 == 2
                3 >= 1 + 2
                "#,
                )
                .unwrap();
                parser.set_skip_newlines(true).unwrap();
                parser
            },
            // x + 2
            next_expr,
            bin_expr(
@@ -484,9 +582,122 @@ mod test {
                    BinOp::Times,
                    base_expr(BaseExprKind::Num)
                )
            ),
            // 1 + 1 == 2
            next_expr,
            bin_expr(
                bin_expr(
                    base_expr(BaseExprKind::Num),
                    BinOp::Plus,
                    base_expr(BaseExprKind::Num)
                ),
                BinOp::EqEq,
                base_expr(BaseExprKind::Num)
            ),
            // 3 >= 1 + 2
            next_expr,
            bin_expr(
                base_expr(BaseExprKind::Num),
                BinOp::GtEq,
                bin_expr(
                    base_expr(BaseExprKind::Num),
                    BinOp::Plus,
                    base_expr(BaseExprKind::Num)
                )
            )
        );
    }
    #[test]
    fn test_multiline_exprs() {
        test_parser!(
            Parser::try_from(r"[
                1,
                x,
                ['value', :value],
                :sym
            ]").unwrap(),
            next_expr,
            base_expr(BaseExprKind::List(vec![
                base_expr(BaseExprKind::Num),
                base_expr(BaseExprKind::Ident),
                base_expr(BaseExprKind::List(vec![
                        base_expr(BaseExprKind::Str),
                        base_expr(BaseExprKind::Sym),
                ])),
                base_expr(BaseExprKind::Sym),
            ]))
        );
        test_parser!(
            Parser::try_from(r"(
                1,
                x,
                [
                    'value',
                    :value,
                ],
                :sym
            )").unwrap(),
            next_expr,
            base_expr(BaseExprKind::Tuple(vec![
                base_expr(BaseExprKind::Num),
                base_expr(BaseExprKind::Ident),
                base_expr(BaseExprKind::List(vec![
                        base_expr(BaseExprKind::Str),
                        base_expr(BaseExprKind::Sym),
                ])),
                base_expr(BaseExprKind::Sym),
            ]))
        );
        test_parser!(
            Parser::try_from(r"(
                + 1
                + 2
                + 3
                + 4
            )").unwrap(),
            next_expr,
            bin_expr(
                un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
                BinOp::Plus,
                bin_expr(
                    base_expr(BaseExprKind::Num),
                    BinOp::Plus,
                    bin_expr(
                        base_expr(BaseExprKind::Num),
                        BinOp::Plus,
                        base_expr(BaseExprKind::Num)
                    )
                )
            )
        );
-        //
+        test_parser!(
            Parser::try_from(r"(
                + 1
                + 2
                + 3
                + 4
                ,
            )").unwrap(),
            next_expr,
            base_expr(BaseExprKind::Tuple(vec![
                bin_expr(
                    un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
                    BinOp::Plus,
                    bin_expr(
                        base_expr(BaseExprKind::Num),
                        BinOp::Plus,
                        bin_expr(
                            base_expr(BaseExprKind::Num),
                            BinOp::Plus,
                            base_expr(BaseExprKind::Num)
                        )
                    )
                )
            ]))
        );
    }
 }
--- a/src/syn/span.rs
+++ b/src/syn/span.rs
@@ -120,6 +120,7 @@ impl Spanned for Span {
    }
 }
 /*
 pub struct Sourced<'t, T: Spanned> {
    text: &'t str,
    inner: T,
@@ -136,6 +137,7 @@ impl<T: Spanned> Spanned for Sourced<'_, T> {
        self.inner.span()
    }
 }
 */
 #[cfg(test)]
 mod test {
--- a/src/syn/token.rs
+++ b/src/syn/token.rs
@@ -31,6 +31,8 @@ pub enum TokenKind {
    Splat,
    FSlash,
    Bang,
    Eol,
    Newline,
 }
 impl Display for TokenKind {
@@ -65,6 +67,8 @@ impl Display for TokenKind {
            Splat => "splat (or times)",
            FSlash => "fslash (or divide)",
            Bang => "not",
            Eol => "line end",
            Newline => "newline",
        };
        Display::fmt(s, fmt)
    }