use crate::syn::{ast::*, error::*, lexer::Lexer, op::*, span::*, token::*}; use std::{convert::TryFrom, mem}; const BASE_EXPR_START: &[TokenKind] = &[ TokenKind::Ident, TokenKind::Num, TokenKind::Str, TokenKind::Sym, TokenKind::LParen, TokenKind::LBracket, TokenKind::LBrace, ]; const UN_EXPR_START: &[TokenKind] = &[TokenKind::Plus, TokenKind::Minus, TokenKind::Bang]; pub struct Parser<'t> { lexer: Lexer<'t>, curr_token: Option, skip_newlines: bool, } impl<'t> Parser<'t> { pub fn new(lexer: Lexer<'t>) -> Result { let mut parser = Parser { lexer, curr_token: None, skip_newlines: false, }; parser.adv_token()?; Ok(parser) } pub fn is_eof(&self) -> bool { self.lexer.is_eof() } pub fn pos(&self) -> Pos { self.span().start } } //////////////////////////////////////////////////////////////////////////////// // Parsing functions //////////////////////////////////////////////////////////////////////////////// macro_rules! bin_expr { ($name:ident, $op_tokens:expr, $next:ident) => { fn $name(&mut self) -> Result { let lhs = self.$next()?; if let Some(token) = self.match_token_where(|t| $op_tokens.contains(&t.kind()))? { let op = BinOp::from(token); let rhs = self.$name()?; let span = lhs.span().union(rhs.span()); Ok(BinExpr { lhs, op, rhs, span }.into()) } else { Ok(lhs) } } }; } impl<'t> Parser<'t> { pub fn next_expr(&mut self) -> Result { self.next_bin_add_expr() } bin_expr!( next_bin_add_expr, &[TokenKind::Plus, TokenKind::Minus], next_bin_mul_expr ); bin_expr!( next_bin_mul_expr, &[TokenKind::FSlash, TokenKind::Splat], next_un_expr ); fn next_un_expr(&mut self) -> Result { if let Some(un_op) = self.match_token_where(|t| UN_EXPR_START.contains(&t.kind()))? { let start = un_op.span(); let expr = self.next_un_expr()?; let end = expr.span(); Ok(UnExpr { op: un_op.kind().into(), expr, span: start.union(end), } .into()) } else { self.next_base_expr() } } fn next_base_expr(&mut self) -> Result { let token = self.expect_token_where(|t| BASE_EXPR_START.contains(&t.kind()), "base expression")?; let expr: Expr = match token.kind() { TokenKind::Ident => BaseExpr { kind: BaseExprKind::Ident, span: token.span(), } .into(), TokenKind::Num => BaseExpr { kind: BaseExprKind::Num, span: token.span(), } .into(), TokenKind::Str => BaseExpr { kind: BaseExprKind::Str, span: token.span(), } .into(), TokenKind::Sym => BaseExpr { kind: BaseExprKind::Sym, span: token.span(), } .into(), TokenKind::LBracket => { let expr_list = self.next_expr_list(TokenKind::RBracket)?; let end_token = self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?; let span = token.span().union(end_token.span()); BaseExpr { kind: BaseExprKind::List(expr_list), span, } .into() } TokenKind::LBrace => todo!(), TokenKind::LParen => { let first = self.next_expr()?; if let Some(_) = self.match_token_kind(TokenKind::Comma)? { let mut list = self.next_expr_list(TokenKind::RParen)?; let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?; let span = first.span().union(end_token.span()); list.insert(0, first); Expr::Base( BaseExpr { kind: BaseExprKind::Tuple(list), span, } .into(), ) } else { self.expect_token_kind(TokenKind::RParen, "end of expression")?; first } } _ => unreachable!(), }; Ok(expr) } /// Gets a list of expressions separated by commas, stopping when EOF or specified end token is /// reached. fn next_expr_list(&mut self, stop_before: TokenKind) -> Result> { let mut list_items = Vec::new(); while !self.is_token_kind(stop_before) { list_items.push(self.next_expr()?); // match a comma, otherwise exit the loop if self.match_token_kind(TokenKind::Comma)?.is_none() { break; } } Ok(list_items) } //////////////////////////////////////////////////////////////////////////////// // Token matching functions //////////////////////////////////////////////////////////////////////////////// fn adv_token(&mut self) -> Result> { let next_token = self.lexer.next_token()?; Ok(mem::replace(&mut self.curr_token, next_token)) } fn match_token_where

(&mut self, pred: P) -> Result> where P: Fn(Token) -> bool, { if self.is_token_match(pred) { self.adv_token() } else { Ok(None) } } fn match_token_kind(&mut self, kind: TokenKind) -> Result> { if self.is_token_kind(kind) { self.adv_token() } else { Ok(None) } } fn is_token_match

(&self, pred: P) -> bool where P: Fn(Token) -> bool, { match self.curr_token { Some(token) => (pred)(token), None => false, } } fn is_token_kind(&self, kind: TokenKind) -> bool { self.is_token_match(|t| t.kind() == kind) } fn expect_token_where

(&mut self, pred: P, expected: impl ToString) -> Result where P: Fn(Token) -> bool, { self.match_token_where(pred)? .ok_or_else(|| Error::ExpectedGot { expected: expected.to_string(), got: self .curr_token .map(|token| token.kind().to_string()) .unwrap_or_else(|| "EOF".to_string()), pos: self.pos(), }) } fn expect_token_kind(&mut self, kind: TokenKind, expected: impl ToString) -> Result { self.expect_token_where(|t| t.kind() == kind, expected) } } impl<'t> Spanned for Parser<'t> { fn span(&self) -> Span { self.curr_token .as_ref() .map(Spanned::span) .unwrap_or(Span::default()) } } impl<'t> TryFrom> for Parser<'t> { type Error = Error; fn try_from(lexer: Lexer<'t>) -> Result { Parser::new(lexer) } } impl<'t> TryFrom<&'t str> for Parser<'t> { type Error = Error; fn try_from(text: &'t str) -> Result { Parser::new(Lexer::new(text)) } } #[cfg(test)] mod test { use super::*; #[test] fn test_adv_token() { const EXPECTED: &'static [TokenKind] = &[ TokenKind::Num, TokenKind::Ident, TokenKind::Sym, TokenKind::Str, ]; let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap(); for expected in EXPECTED.iter().copied() { let token = parser.adv_token().unwrap(); let kind = token.unwrap().kind(); assert_eq!(kind, expected); } assert!(parser.is_eof()); } #[test] fn test_match_token_where() { let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap(); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Num), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Num), Ok(None) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Ident), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Ident), Ok(None) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Sym), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Sym), Ok(None) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Str), Ok(Some(_)) )); assert!(matches!( parser.match_token_where(|token| token.kind() == TokenKind::Str), Ok(None) )); assert!(parser.is_eof()); } macro_rules! test_parser { ($text:expr, $($tail:tt)+) => {{ let mut parser = Parser::try_from($text).unwrap(); test_parser!(@TAIL, &mut parser, $($tail)+); }}; (@TAIL, $parser:expr, $method:ident, $expected:expr) => {{ assert_eq!($parser.$method().unwrap(), $expected); }}; (@TAIL, $parser:expr, $method:ident, $expected:expr, $($tail:tt)+) => {{ test_parser!(@TAIL, $parser, $method, $expected); test_parser!(@TAIL, $parser, $($tail)*); }}; (@TAIL, $parser:expr) => {}; (@TAIL, $parser:expr,) => {}; } fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr { Expr::Bin( BinExpr { lhs, op, rhs, span: Default::default(), } .into(), ) } fn un_expr(op: UnOp, expr: Expr) -> Expr { Expr::Un( UnExpr { op, expr, span: Default::default(), } .into(), ) } fn base_expr(kind: BaseExprKind) -> Expr { Expr::Base(BaseExpr { kind, span: Default::default(), }) } #[test] fn test_base_expr() { test_parser!( "1 x 'value' :sym", // 1 next_expr, base_expr(BaseExprKind::Num), // x next_expr, base_expr(BaseExprKind::Ident), // 'value' next_expr, base_expr(BaseExprKind::Str), // :sym next_expr, base_expr(BaseExprKind::Sym) ); test_parser!( "[1, x, 'value', :sym]", next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Sym), ])) ); test_parser!( "[1, x, 'value', :sym,]", next_expr, base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Sym), ])) ); } #[test] fn test_compound_expr() { test_parser!( r#" x + 2 1 + 2 * 3 - 4 / 5 1+2*3-4/5 1 - -1 * 8 1--1*8 "#, // x + 2 next_expr, bin_expr( base_expr(BaseExprKind::Ident), BinOp::Plus, base_expr(BaseExprKind::Num) ), // 1 + 2 * 3 - 4 / 5 next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, bin_expr( bin_expr( base_expr(BaseExprKind::Num), BinOp::Times, base_expr(BaseExprKind::Num) ), BinOp::Minus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Div, base_expr(BaseExprKind::Num) ) ) ), // // 1+2*3-4/5 next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Plus, bin_expr( bin_expr( base_expr(BaseExprKind::Num), BinOp::Times, base_expr(BaseExprKind::Num) ), BinOp::Minus, bin_expr( base_expr(BaseExprKind::Num), BinOp::Div, base_expr(BaseExprKind::Num) ) ) ), // 1 - -1 * 8 next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Minus, bin_expr( un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)), BinOp::Times, base_expr(BaseExprKind::Num) ) ), // 1--1*8 next_expr, bin_expr( base_expr(BaseExprKind::Num), BinOp::Minus, bin_expr( un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)), BinOp::Times, base_expr(BaseExprKind::Num) ) ) ); // } }