use crate::syn::{ast::*, error::*, lexer::*, token::*}; // ///////////////////////////////////////////////////////////////////////////// // Parser // ///////////////////////////////////////////////////////////////////////////// #[derive(Debug)] pub struct Parser<'t> { lexer: Lexer<'t>, token: Result>, } impl<'t> From<&'t str> for Parser<'t> { fn from(text: &'t str) -> Self { Parser::new(Lexer::new(text)) } } impl<'t> Parser<'t> { pub fn new(mut lexer: Lexer<'t>) -> Self { let token = lexer.next(); Self { lexer, token } } pub fn is_eof(&self) -> bool { self.lexer.is_eof() } fn peek(&self) -> Result> { self.token.clone() } fn adv(&mut self) -> Result> { let next = self.lexer.next(); std::mem::replace(&mut self.token, next) } /// Checks if the next token is part of the list, returning it if so. fn expect_any_token(&mut self, expected: &[Token]) -> Result { let token = self.peek()?; match (token, expected) { // Token matches (Some(token), expected) if expected.contains(token.inner()) => { self.adv()?; Ok(token) } // Token does not match, only one token expected (token, &[expected]) => { // get the string version of whether this is a token or EOF let got = token .map(|t| format!("{} token", t.inner().name())) .unwrap_or_else(|| "EOF".to_string()); Err(SyntaxError::ExpectedGot { expected: format!("{} token", expected.name()), got, }) } // Token does not match, any of N tokens expected (token, expected) => { // make the comma-separated list of everything except for the last item let expected_str = expected .iter() .take(expected.len() - 1) .map(Token::name) .collect::>() .join(", "); // get the string version of whether this is a token or EOF let got = token .map(|t| format!("{} token", t.inner().name())) .unwrap_or_else(|| "EOF".to_string()); Err(SyntaxError::ExpectedGot { expected: format!( "{} or {} token", expected_str, expected.last().unwrap().name() ), got, }) } } } /// Gets all expressions until EOF is reached, or until a quote end is reached. pub fn next_expr_list(&mut self) -> Result> { let mut exprs = Vec::new(); while let Some(peek) = self.peek()? { match peek.inner() { Token::RQuote => break, _ => exprs.push(self.next_expr()?), } } Ok(exprs) } pub fn next_expr(&mut self) -> Result { // peek ahead and see if we need to handle a quote match self.peek()? { Some(peek) if *peek.inner() == Token::LQuote => self.next_quote(), _ => { let atom = self.next_atom()?; let span = atom.span(); Ok(SpExpr::new(span, Expr::Atom(atom))) } } } pub fn next_quote(&mut self) -> Result { let start = self.expect_any_token(&[Token::LQuote])?; let exprs = self.next_expr_list()?; let end = self.expect_any_token(&[Token::RQuote])?; let span = start.span().union(end.span()); Ok(SpExpr::new(span, Expr::Quote(exprs))) } pub fn next_atom(&mut self) -> Result { use Token::*; let token = self.expect_any_token(&[Assign, Word, Float, Int, Str, Apply])?; Ok(self.token_to_atom(token)) } fn token_to_atom(&self, token: SpToken) -> SpAtom { // NOTE - self is required for this because we get the text let (span, token) = token.into_split(); let text = span.text_at(self.lexer.text()); let atom = match token { Token::Assign => Atom::Assign(text[1..].to_string()), Token::Word => Atom::Word(text.to_string()), Token::Float => Atom::Float(text.parse().unwrap()), Token::Int => Atom::Int(text.parse().unwrap()), Token::Str => Atom::Str(unescape_string(text)), Token::Apply => Atom::Apply, _ => panic!("invalid token specified for token_to_atom, it should be an atom"), }; SpAtom::new(span, atom) } } fn unescape_string(text: &str) -> String { let mut string = String::with_capacity(text.len() - 2); let mut chars = text.chars().skip(1).take(text.len() - 2); while let Some(c) = chars.next() { if c == '\\' { let c = match chars .next() .expect("reached end of string literal before escape") { '"' => '"', '\'' => '\'', '\\' => '\\', 'n' => '\n', 'r' => '\r', 't' => '\t', u => panic!( "unexpected character escape that made it through the lexer: {:?}", u ), }; string.push(c); } else { string.push(c); } } string } // ///////////////////////////////////////////////////////////////////////////// // Tests // ///////////////////////////////////////////////////////////////////////////// #[cfg(test)] macro_rules! expect_atom { ($parser:expr, $expected:expr) => {{ let expr_result = $parser.next_expr(); assert!( expr_result.is_ok(), "expected {:?} but got {:?} instead", $expected, expr_result ); let expr = expr_result.unwrap(); let (span, expr) = expr.into_split(); assert_eq!(expr, Expr::Atom(SpAtom::new(span, $expected))); }}; } #[cfg(test)] macro_rules! expect_expr { ($parser:expr, $expected:expr) => {{ let expr_result = $parser.next_expr(); assert!( expr_result.is_ok(), "expected {:?} but got {:?} instead", $expected, expr_result ); let expr = expr_result.unwrap(); assert_eq!(expr, $expected); }}; } /// Makes an SpExpr Quote value using the given SpExpr values #[cfg(test)] macro_rules! make_quote { ($($expr:expr),+ $(,)?) => {{ SpExpr::new( Default::default(), Expr::Quote(make_quote_vec!($($expr),+)) ) }}; } /// Makes a vec appropriate for an Expr::Quote #[cfg(test)] macro_rules! make_quote_vec { ($($expr:expr),+ $(,)?) => {{ vec![$( SpExpr::new(Default::default(), $expr) ),+] }}; } /// Makes an SpAtom from an Atom type. #[cfg(test)] macro_rules! make_atom { ($atom:expr) => {{ SpAtom::new(Default::default(), $atom) }}; } #[test] fn test_parser_atoms() { let mut parser = Parser::from( r#" a ab bcd dcefg foo bar baz 1 2 3 4 5 1.2 3.4 5.6 7.8 9.10 "this is a string" "this\nis\na\nstring\nwith\nnewlines" "this\tis\ta\tstring\twith\ttabs" "#, ); expect_atom!(parser, Atom::Word("a".to_string())); expect_atom!(parser, Atom::Word("ab".to_string())); expect_atom!(parser, Atom::Word("bcd".to_string())); expect_atom!(parser, Atom::Word("dcefg".to_string())); expect_atom!(parser, Atom::Word("foo".to_string())); expect_atom!(parser, Atom::Word("bar".to_string())); expect_atom!(parser, Atom::Word("baz".to_string())); expect_atom!(parser, Atom::Int(1)); expect_atom!(parser, Atom::Int(2)); expect_atom!(parser, Atom::Int(3)); expect_atom!(parser, Atom::Int(4)); expect_atom!(parser, Atom::Int(5)); expect_atom!(parser, Atom::Float(1.2)); expect_atom!(parser, Atom::Float(3.4)); expect_atom!(parser, Atom::Float(5.6)); expect_atom!(parser, Atom::Float(7.8)); expect_atom!(parser, Atom::Float(9.1)); expect_atom!(parser, Atom::Str("this is a string".to_string())); expect_atom!( parser, Atom::Str("this\nis\na\nstring\nwith\nnewlines".to_string()) ); expect_atom!( parser, Atom::Str("this\tis\ta\tstring\twith\ttabs".to_string()) ); assert!(parser.is_eof()); } #[test] fn test_parser_quotes() { let mut parser = Parser::from( r#" [ a ab bcd dcefg foo bar baz ] [1 2 3 4 5 [1.2 3.4 5.6 7.8 9.10] ] "#, ); expect_expr!( parser, make_quote![ Expr::Atom(make_atom!(Atom::Word("a".to_string()))), Expr::Atom(make_atom!(Atom::Word("ab".to_string()))), Expr::Atom(make_atom!(Atom::Word("bcd".to_string()))), Expr::Atom(make_atom!(Atom::Word("dcefg".to_string()))), Expr::Atom(make_atom!(Atom::Word("foo".to_string()))), Expr::Atom(make_atom!(Atom::Word("bar".to_string()))), Expr::Atom(make_atom!(Atom::Word("baz".to_string()))), ] ); expect_expr!( parser, make_quote![ Expr::Atom(make_atom!(Atom::Int(1))), Expr::Atom(make_atom!(Atom::Int(2))), Expr::Atom(make_atom!(Atom::Int(3))), Expr::Atom(make_atom!(Atom::Int(4))), Expr::Atom(make_atom!(Atom::Int(5))), Expr::Quote(make_quote_vec![ Expr::Atom(make_atom!(Atom::Float(1.2))), Expr::Atom(make_atom!(Atom::Float(3.4))), Expr::Atom(make_atom!(Atom::Float(5.6))), Expr::Atom(make_atom!(Atom::Float(7.8))), Expr::Atom(make_atom!(Atom::Float(9.10))), ]), ] ); }