use crate::syn::{ast::*, error::*, lexer::Lexer, op::*, span::*, token::*}; use std::{convert::TryFrom, mem}; const BASE_EXPR_START: &[TokenKind] = &[ TokenKind::Ident, TokenKind::Num, TokenKind::Str, TokenKind::Sym, TokenKind::LParen, TokenKind::LBracket, TokenKind::LBrace, TokenKind::ObjBrace, ]; const UN_EXPR_START: &[TokenKind] = &[TokenKind::Plus, TokenKind::Minus, TokenKind::Bang]; pub struct Parser<'t> { lexer: Lexer<'t>, curr_token: Option, skip_newlines: bool, } impl<'t> Parser<'t> { pub fn new(lexer: Lexer<'t>) -> Result { let mut parser = Parser { lexer, curr_token: None, skip_newlines: false, }; parser.adv_token()?; Ok(parser) } pub fn is_eof(&self) -> bool { self.lexer.is_eof() } pub fn pos(&self) -> Pos { self.span().start } pub fn set_skip_newlines(&mut self, skip: bool) -> Result { let prev = mem::replace(&mut self.skip_newlines, skip); match self.skip_newlines() { Ok(()) => Ok(prev), Err(e) => { self.skip_newlines = prev; Err(e) } } } pub fn is_skip_newlines(&self) -> bool { self.skip_newlines } fn skip_newlines(&mut self) -> Result<()> { if self.is_skip_newlines() && self.is_token_kind(TokenKind::Newline) { self.adv_token()?; } Ok(()) } } //////////////////////////////////////////////////////////////////////////////// // Statement parsing //////////////////////////////////////////////////////////////////////////////// impl<'t> Parser<'t> { pub fn next_stmt(&mut self) -> Result { todo!() } } //////////////////////////////////////////////////////////////////////////////// // Expression parsing //////////////////////////////////////////////////////////////////////////////// macro_rules! bin_expr { ($name:ident, $op_tokens:expr, $next:ident) => { fn $name(&mut self) -> Result { let lhs = self.$next()?; if let Some(token) = self.match_token_where(|t| $op_tokens.contains(&t.kind()))? { let op = BinOp::from(token); let rhs = self.$name()?; let span = lhs.span().union(rhs.span()); Ok(BinExpr { lhs, op, rhs, span }.into()) } else { Ok(lhs) } } }; } impl<'t> Parser<'t> { pub fn next_expr(&mut self) -> Result { self.next_bin_cmp_expr() } // == < > <= >= bin_expr!( next_bin_cmp_expr, &[ TokenKind::EqEq, TokenKind::Lt, TokenKind::Gt, TokenKind::LtEq, TokenKind::GtEq ], next_bin_add_expr ); // + - bin_expr!( next_bin_add_expr, &[TokenKind::Plus, TokenKind::Minus], next_bin_mul_expr ); // * / bin_expr!( next_bin_mul_expr, &[TokenKind::FSlash, TokenKind::Splat], next_un_expr ); fn next_un_expr(&mut self) -> Result { if let Some(un_op) = self.match_token_where(|t| UN_EXPR_START.contains(&t.kind()))? { let start = un_op.span(); let expr = self.next_un_expr()?; let end = expr.span(); Ok(UnExpr { op: un_op.kind().into(), expr, span: start.union(end), } .into()) } else { self.next_base_expr() } } fn next_base_expr(&mut self) -> Result { let token = self.expect_token_where(|t| BASE_EXPR_START.contains(&t.kind()), "base expression")?; let expr: Expr = match token.kind() { TokenKind::Ident => BaseExpr { kind: BaseExprKind::Ident, span: token.span(), } .into(), TokenKind::Num => BaseExpr { kind: BaseExprKind::Num, span: token.span(), } .into(), TokenKind::Str => BaseExpr { kind: BaseExprKind::Str, span: token.span(), } .into(), TokenKind::Sym => BaseExpr { kind: BaseExprKind::Sym, span: token.span(), } .into(), TokenKind::LBracket => { let prev_skip = self.set_skip_newlines(true)?; let expr_list = self.next_expr_list(TokenKind::RBracket)?; self.set_skip_newlines(prev_skip)?; let end_token = self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?; let span = token.span().union(end_token.span()); BaseExpr { kind: BaseExprKind::List(expr_list), span, } .into() } TokenKind::LBrace => todo!("TODO body expressions"), TokenKind::ObjBrace => { let prev_skip = self.set_skip_newlines(true)?; let object = self.next_obj_list()?; let end_token = self.expect_token_kind(TokenKind::RBrace, "end of object (right curly brace)")?; let span = token.span().union(end_token.span()); Expr::Base(BaseExpr { kind: BaseExprKind::Object(object), span, }) } TokenKind::LParen => { let prev_skip = self.set_skip_newlines(true)?; let first = self.next_expr()?; if let Some(_) = self.match_token_kind(TokenKind::Comma)? { let mut list = self.next_expr_list(TokenKind::RParen)?; self.set_skip_newlines(prev_skip)?; let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?; let span = token.span().union(end_token.span()); list.insert(0, first); Expr::Base( BaseExpr { kind: BaseExprKind::Tuple(list), span, } .into(), ) } else { self.set_skip_newlines(prev_skip)?; self.expect_token_kind(TokenKind::RParen, "end of expression")?; first } } _ => unreachable!(), }; Ok(expr) } /// Gets a list of expressions separated by commas, stopping when EOF or specified end token is /// reached. fn next_expr_list(&mut self, stop_before: TokenKind) -> Result> { let mut list_items = Vec::new(); while !self.is_token_kind(stop_before) { list_items.push(self.next_expr()?); // match a comma, otherwise exit the loop if self.match_token_kind(TokenKind::Comma)?.is_none() { break; } } Ok(list_items) } /// Parses the tail of a `BaseExprKind::Object` expression. /// /// This function expects that the initial object brace `%{` has already been eaten. fn next_obj_list(&mut self) -> Result> { let mut object = Vec::new(); // Parses this pattern: // ( key EQ value ( COMMA key = value )* COMMA? )? while !self.is_token_kind(TokenKind::RBrace) { let key = self.next_expr()?; self.expect_token_kind(TokenKind::Eq, "equals sign for object item")?; let value = self.next_expr()?; object.push((key, value)); // match a comma, otherwise exit the loop if self.match_token_kind(TokenKind::Comma)?.is_none() { break; } } Ok(object) } //////////////////////////////////////////////////////////////////////////////// // Token matching functions //////////////////////////////////////////////////////////////////////////////// fn adv_token(&mut self) -> Result> { let mut next_token = self.lexer.next_token()?; if self.is_skip_newlines() { while next_token .map(|t| t.kind() == TokenKind::Newline) .unwrap_or(false) { next_token = self.lexer.next_token()?; } if self.is_token_kind(TokenKind::Newline) { self.curr_token = next_token; while next_token .map(|t| t.kind() == TokenKind::Newline) .unwrap_or(false) { next_token = self.lexer.next_token()?; } } } Ok(mem::replace(&mut self.curr_token, next_token)) } fn match_token_where

(&mut self, pred: P) -> Result> where P: Fn(Token) -> bool, { if self.is_token_match(pred) { self.adv_token() } else { Ok(None) } } fn match_token_kind(&mut self, kind: TokenKind) -> Result> { if self.is_token_kind(kind) { self.adv_token() } else { Ok(None) } } fn is_token_match

(&self, pred: P) -> bool where P: Fn(Token) -> bool, { match self.curr_token { Some(token) => (pred)(token), None => false, } } fn is_token_kind(&self, kind: TokenKind) -> bool { self.is_token_match(|t| t.kind() == kind) } fn expect_token_where

(&mut self, pred: P, expected: impl ToString) -> Result where P: Fn(Token) -> bool, { self.match_token_where(pred)? .ok_or_else(|| Error::ExpectedGot { expected: expected.to_string(), got: self .curr_token .map(|token| token.kind().to_string()) .unwrap_or_else(|| "EOF".to_string()), pos: self.pos(), }) } fn expect_token_kind(&mut self, kind: TokenKind, expected: impl ToString) -> Result { self.expect_token_where(|t| t.kind() == kind, expected) } } impl<'t> Spanned for Parser<'t> { fn span(&self) -> Span { self.curr_token .as_ref() .map(Spanned::span) .unwrap_or(Span::default()) } } impl<'t> TryFrom> for Parser<'t> { type Error = Error; fn try_from(lexer: Lexer<'t>) -> Result { Parser::new(lexer) } } impl<'t> TryFrom<&'t str> for Parser<'t> { type Error = Error; fn try_from(text: &'t str) -> Result { Parser::new(Lexer::new(text)) } } #[cfg(test)] mod test { use super::*; #[test] fn test_adv_token() { const EXPECTED: &'static [TokenKind] = &[ TokenKind::Num, TokenKind::Ident, TokenKind::Sym, TokenKind::Str, ]; let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap(); for expected in EXPECTED.iter().copied() { let token = parser.adv_token().unwrap(); let kind = token.unwrap().kind(); assert_eq!(kind, expected); } assert!(parser.is_eof()); } #[test] fn test_match_token_where() { let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap(); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Num), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Num), Ok(None) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Ident), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Ident), Ok(None) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Sym), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Sym), Ok(None) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Str), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Str), Ok(None) )); assert!(parser.is_eof()); } macro_rules! test_parser { ($parser:expr, $($tail:tt)+) => {{ let mut parser = $parser; test_parser!(@TAIL, &mut parser, $($tail)+); }}; (@TAIL, $parser:expr, $method:ident, $expected:expr) => {{ assert_eq!($parser.$method().unwrap(), $expected); }}; (@TAIL, $parser:expr, $method:ident, $expected:expr, $($tail:tt)+) => {{ test_parser!(@TAIL, $parser, $method, $expected); test_parser!(@TAIL, $parser, $($tail)*); }}; (@TAIL, $parser:expr) => { assert!($parser.is_eof()); }; (@TAIL, $parser:expr,) => { assert!($parser.is_eof()); }; } fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr { Expr::Bin( BinExpr { lhs, op, rhs, span: Default::default(), } .into(), ) } fn un_expr(op: UnOp, expr: Expr) -> Expr { Expr::Un( UnExpr { op, expr, span: Default::default(), } .into(), ) } fn base_expr(kind: BaseExprKind) -> Expr { Expr::Base(BaseExpr { kind, span: Default::default(), }) } #[test] fn test_base_expr() { test_parser!( Parser::try_from("1 x 'value' :sym").unwrap(), // 1 next_expr, base_expr(BaseExprKind::Num), // x next_expr, base_expr(BaseExprKind::Ident), // 'value' next_expr, base_expr(BaseExprKind::Str), // :sym next_expr, base_expr(BaseExprKind::Sym) ); } #[test] fn test_list_expr() { test_parser!( Parser::try_from("[1, x, 'value', :sym]").unwrap(), next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Sym), ])) ); test_parser!( Parser::try_from("[1, x, 'value', :sym,]").unwrap(), next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Sym), ])) ); } #[test] fn test_tuple_expr() { test_parser!( Parser::try_from("(1, :sym)").unwrap(), next_expr, base_expr(BaseExprKind::Tuple(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Sym), ])) ); test_parser!( Parser::try_from("(:sym,)").unwrap(), next_expr, base_expr(BaseExprKind::Tuple(vec![base_expr(BaseExprKind::Sym),])) ); test_parser!( Parser::try_from("(:sym)").unwrap(), next_expr, base_expr(BaseExprKind::Sym) ); } #[test] fn test_object_expr() { test_parser!( Parser::try_from( r#"%{ :sym = value, key = "value", "lit" = 1, }"# ) .unwrap(), next_expr, base_expr(BaseExprKind::Object(vec![ (base_expr(BaseExprKind::Sym), base_expr(BaseExprKind::Ident)), (base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::Str)), (base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Num)), ])) ); } #[test] fn test_bin_expr() { test_parser!( Parser::try_from("x + 2").unwrap(), next_expr, bin_expr( base_expr(BaseExprKind::Ident), BinOp::Plus, base_expr(BaseExprKind::Num) ) ); test_parser!( Parser::try_from("1 + 2 * 3 - 4 / 5").unwrap(), next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, bin_expr( bin_expr( base_expr(BaseExprKind::Num), BinOp::Times, base_expr(BaseExprKind::Num) ), BinOp::Minus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Div, base_expr(BaseExprKind::Num) ) ) ) ); test_parser!( Parser::try_from("1+2*3-4/5").unwrap(), next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, bin_expr( bin_expr( base_expr(BaseExprKind::Num), BinOp::Times, base_expr(BaseExprKind::Num) ), BinOp::Minus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Div, base_expr(BaseExprKind::Num) ) ) ) ); test_parser!( Parser::try_from("1 - -1 * 8").unwrap(), next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Minus, bin_expr( un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)), BinOp::Times, base_expr(BaseExprKind::Num) ) ) ); test_parser!( Parser::try_from("1--1*8").unwrap(), next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Minus, bin_expr( un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)), BinOp::Times, base_expr(BaseExprKind::Num) ) ) ); test_parser!( Parser::try_from("1 + 1 == 2").unwrap(), next_expr, bin_expr( bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, base_expr(BaseExprKind::Num) ), BinOp::EqEq, base_expr(BaseExprKind::Num) ) ); test_parser!( Parser::try_from("3 >= 1 + 2").unwrap(), next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::GtEq, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, base_expr(BaseExprKind::Num) ) ) ); } #[test] fn test_multiline_exprs() { test_parser!( Parser::try_from( r"[ 1, x, ['value', :value], :sym ]" ) .unwrap(), next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Sym), ])), base_expr(BaseExprKind::Sym), ])) ); test_parser!( Parser::try_from( r"( 1, x, [ :key, 'value', ], :sym )" ) .unwrap(), next_expr, base_expr(BaseExprKind::Tuple(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Sym), base_expr(BaseExprKind::Str), ])), base_expr(BaseExprKind::Sym), ])) ); test_parser!( Parser::try_from( r"( + 1 + 2 + 3 + 4 )" ) .unwrap(), next_expr, bin_expr( un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)), BinOp::Plus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, base_expr(BaseExprKind::Num) ) ) ) ); test_parser!( Parser::try_from( r"( + 1 + 2 + 3 + 4 , )" ) .unwrap(), next_expr, base_expr(BaseExprKind::Tuple(vec![bin_expr( un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)), BinOp::Plus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, base_expr(BaseExprKind::Num) ) ) )])) ); } }