Add more expressions to parser, add EOL and newline tokens

* Lexer recognizes semicolons as EOL tokens, and newline tokens
* Parser can be configured to ignore newlines (which is also used
  internally)
* Newlines are allowed in lists, tuples, and parenthesized expressions
* Add a bunch of tests for the new stuff

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-05-05 16:38:07 -04:00
parent d9edf21d16
commit a4f289fb53
6 changed files with 274 additions and 22 deletions

View File

@@ -3,10 +3,23 @@ use derivative::Derivative;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Stmt { pub enum Stmt {
Assign(Expr, Expr), Assign(AssignStmt),
Expr(Expr), Expr(Expr),
} }
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AssignStmt {
pub lhs: Expr,
pub rhs: Expr,
pub span: Span,
}
impl Spanned for AssignStmt {
fn span(&self) -> Span {
self.span
}
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Expr { pub enum Expr {
Base(BaseExpr), Base(BaseExpr),

View File

@@ -46,7 +46,7 @@ impl<'t> Lexer<'t> {
fn skip_whitespace(&mut self) { fn skip_whitespace(&mut self) {
while let Some(c) = self.curr_char() { while let Some(c) = self.curr_char() {
if !c.is_whitespace() { if !c.is_whitespace() || c == '\n' {
break; break;
} else { } else {
self.adv_char(); self.adv_char();
@@ -96,6 +96,8 @@ impl<'t> Lexer<'t> {
|(?P<lbrace>\{) |(?P<lbrace>\{)
|(?P<rbrace>\}) |(?P<rbrace>\})
|(?P<comma>,) |(?P<comma>,)
|(?P<eol>;)
|(?P<newline>\n)
"#).ignore_whitespace(true) "#).ignore_whitespace(true)
.build() .build()
.unwrap(); .unwrap();
@@ -133,6 +135,8 @@ impl<'t> Lexer<'t> {
("gt", TokenKind::Gt), ("gt", TokenKind::Gt),
("eq", TokenKind::Eq), ("eq", TokenKind::Eq),
("eol", TokenKind::Eol),
("newline", TokenKind::Newline),
]; ];
self.skip_whitespace(); self.skip_whitespace();
@@ -206,7 +210,7 @@ mod test {
assert!(matches!(lexer.next_token(), Ok(None))); assert!(matches!(lexer.next_token(), Ok(None)));
assert!(lexer.is_eof()); assert!(lexer.is_eof());
let mut lexer = Lexer::new(" \n \n \n\r\n\t "); let mut lexer = Lexer::new(" \t \r \r\r\t\t ");
assert!(matches!(lexer.next_token(), Ok(None))); assert!(matches!(lexer.next_token(), Ok(None)));
assert!(lexer.is_eof()); assert!(lexer.is_eof());
} }
@@ -263,6 +267,14 @@ mod test {
); );
} }
#[test]
fn test_eol() {
test_token!("\n;",
TokenKind::Newline, "\n",
TokenKind::Eol, ";"
);
}
#[test] #[test]
fn test_symbols() { fn test_symbols() {
test_token!("(", TokenKind::LParen); test_token!("(", TokenKind::LParen);

View File

@@ -30,6 +30,11 @@ pub enum BinOp {
Minus, Minus,
Times, Times,
Div, Div,
EqEq,
Lt,
Gt,
LtEq,
GtEq,
} }
impl From<TokenKind> for BinOp { impl From<TokenKind> for BinOp {
@@ -39,6 +44,11 @@ impl From<TokenKind> for BinOp {
TokenKind::Minus => BinOp::Minus, TokenKind::Minus => BinOp::Minus,
TokenKind::Splat => BinOp::Times, TokenKind::Splat => BinOp::Times,
TokenKind::FSlash => BinOp::Div, TokenKind::FSlash => BinOp::Div,
TokenKind::EqEq => BinOp::EqEq,
TokenKind::LtEq => BinOp::LtEq,
TokenKind::GtEq => BinOp::GtEq,
TokenKind::Lt => BinOp::Lt,
TokenKind::Gt => BinOp::Gt,
_ => panic!("{:?} cannot be converted to a binop", other), _ => panic!("{:?} cannot be converted to a binop", other),
} }
} }

View File

@@ -37,6 +37,28 @@ impl<'t> Parser<'t> {
pub fn pos(&self) -> Pos { pub fn pos(&self) -> Pos {
self.span().start self.span().start
} }
pub fn set_skip_newlines(&mut self, skip: bool) -> Result<bool> {
let prev = mem::replace(&mut self.skip_newlines, skip);
match self.skip_newlines() {
Ok(()) => Ok(prev),
Err(e) => {
self.skip_newlines = prev;
Err(e)
}
}
}
pub fn is_skip_newlines(&self) -> bool {
self.skip_newlines
}
fn skip_newlines(&mut self) -> Result<()> {
if self.is_skip_newlines() && self.is_token_kind(TokenKind::Newline) {
self.adv_token()?;
}
Ok(())
}
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@@ -61,14 +83,28 @@ macro_rules! bin_expr {
impl<'t> Parser<'t> { impl<'t> Parser<'t> {
pub fn next_expr(&mut self) -> Result<Expr> { pub fn next_expr(&mut self) -> Result<Expr> {
self.next_bin_add_expr() self.next_bin_cmp_expr()
} }
// == < > <= >=
bin_expr!(
next_bin_cmp_expr,
&[
TokenKind::EqEq,
TokenKind::Lt,
TokenKind::Gt,
TokenKind::LtEq,
TokenKind::GtEq
],
next_bin_add_expr
);
// + -
bin_expr!( bin_expr!(
next_bin_add_expr, next_bin_add_expr,
&[TokenKind::Plus, TokenKind::Minus], &[TokenKind::Plus, TokenKind::Minus],
next_bin_mul_expr next_bin_mul_expr
); );
// * /
bin_expr!( bin_expr!(
next_bin_mul_expr, next_bin_mul_expr,
&[TokenKind::FSlash, TokenKind::Splat], &[TokenKind::FSlash, TokenKind::Splat],
@@ -80,6 +116,7 @@ impl<'t> Parser<'t> {
let start = un_op.span(); let start = un_op.span();
let expr = self.next_un_expr()?; let expr = self.next_un_expr()?;
let end = expr.span(); let end = expr.span();
Ok(UnExpr { Ok(UnExpr {
op: un_op.kind().into(), op: un_op.kind().into(),
expr, expr,
@@ -116,7 +153,9 @@ impl<'t> Parser<'t> {
} }
.into(), .into(),
TokenKind::LBracket => { TokenKind::LBracket => {
let prev_skip = self.set_skip_newlines(true)?;
let expr_list = self.next_expr_list(TokenKind::RBracket)?; let expr_list = self.next_expr_list(TokenKind::RBracket)?;
self.set_skip_newlines(prev_skip)?;
let end_token = let end_token =
self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?; self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
let span = token.span().union(end_token.span()); let span = token.span().union(end_token.span());
@@ -128,10 +167,14 @@ impl<'t> Parser<'t> {
} }
TokenKind::LBrace => todo!(), TokenKind::LBrace => todo!(),
TokenKind::LParen => { TokenKind::LParen => {
let prev_skip = self.set_skip_newlines(true)?;
let first = self.next_expr()?; let first = self.next_expr()?;
if let Some(_) = self.match_token_kind(TokenKind::Comma)? { if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
let mut list = self.next_expr_list(TokenKind::RParen)?; let mut list = self.next_expr_list(TokenKind::RParen)?;
self.set_skip_newlines(prev_skip)?;
let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?; let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
let span = first.span().union(end_token.span()); let span = first.span().union(end_token.span());
list.insert(0, first); list.insert(0, first);
Expr::Base( Expr::Base(
@@ -142,6 +185,7 @@ impl<'t> Parser<'t> {
.into(), .into(),
) )
} else { } else {
self.set_skip_newlines(prev_skip)?;
self.expect_token_kind(TokenKind::RParen, "end of expression")?; self.expect_token_kind(TokenKind::RParen, "end of expression")?;
first first
} }
@@ -173,7 +217,21 @@ impl<'t> Parser<'t> {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
fn adv_token(&mut self) -> Result<Option<Token>> { fn adv_token(&mut self) -> Result<Option<Token>> {
let next_token = self.lexer.next_token()?; let mut next_token = self.lexer.next_token()?;
if self.is_skip_newlines() {
while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) {
next_token = self.lexer.next_token()?;
}
if self.is_token_kind(TokenKind::Newline) {
self.curr_token = next_token;
while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) {
next_token = self.lexer.next_token()?;
}
}
}
Ok(mem::replace(&mut self.curr_token, next_token)) Ok(mem::replace(&mut self.curr_token, next_token))
} }
@@ -317,9 +375,8 @@ mod test {
} }
macro_rules! test_parser { macro_rules! test_parser {
($text:expr, $($tail:tt)+) => {{ ($parser:expr, $($tail:tt)+) => {{
let mut parser = Parser::try_from($text).unwrap(); let mut parser = $parser;
test_parser!(@TAIL, &mut parser, $($tail)+); test_parser!(@TAIL, &mut parser, $($tail)+);
}}; }};
@@ -332,8 +389,12 @@ mod test {
test_parser!(@TAIL, $parser, $($tail)*); test_parser!(@TAIL, $parser, $($tail)*);
}}; }};
(@TAIL, $parser:expr) => {}; (@TAIL, $parser:expr) => {
(@TAIL, $parser:expr,) => {}; assert!($parser.is_eof());
};
(@TAIL, $parser:expr,) => {
assert!($parser.is_eof());
};
} }
fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr { fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
@@ -369,7 +430,7 @@ mod test {
#[test] #[test]
fn test_base_expr() { fn test_base_expr() {
test_parser!( test_parser!(
"1 x 'value' :sym", Parser::try_from("1 x 'value' :sym").unwrap(),
// 1 // 1
next_expr, next_expr,
base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Num),
@@ -383,9 +444,12 @@ mod test {
next_expr, next_expr,
base_expr(BaseExprKind::Sym) base_expr(BaseExprKind::Sym)
); );
}
#[test]
fn test_list_expr() {
test_parser!( test_parser!(
"[1, x, 'value', :sym]", Parser::try_from("[1, x, 'value', :sym]").unwrap(),
next_expr, next_expr,
base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Num),
@@ -396,7 +460,7 @@ mod test {
); );
test_parser!( test_parser!(
"[1, x, 'value', :sym,]", Parser::try_from("[1, x, 'value', :sym,]").unwrap(),
next_expr, next_expr,
base_expr(BaseExprKind::List(vec![ base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num), base_expr(BaseExprKind::Num),
@@ -408,15 +472,49 @@ mod test {
} }
#[test] #[test]
fn test_compound_expr() { fn test_tuple_expr() {
test_parser!( test_parser!(
r#" Parser::try_from("(1, :sym)").unwrap(),
x + 2 next_expr,
base_expr(BaseExprKind::Tuple(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from("(:sym,)").unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from("(:sym)").unwrap(),
next_expr,
base_expr(BaseExprKind::Sym)
);
}
#[test]
fn test_bin_expr() {
test_parser!(
{
let mut parser = Parser::try_from(
r#"x + 2
1 + 2 * 3 - 4 / 5 1 + 2 * 3 - 4 / 5
1+2*3-4/5 1+2*3-4/5
1 - -1 * 8 1 - -1 * 8
1--1*8 1--1*8
1 + 1 == 2
3 >= 1 + 2
"#, "#,
)
.unwrap();
parser.set_skip_newlines(true).unwrap();
parser
},
// x + 2 // x + 2
next_expr, next_expr,
bin_expr( bin_expr(
@@ -484,9 +582,122 @@ mod test {
BinOp::Times, BinOp::Times,
base_expr(BaseExprKind::Num) base_expr(BaseExprKind::Num)
) )
),
// 1 + 1 == 2
next_expr,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
),
BinOp::EqEq,
base_expr(BaseExprKind::Num)
),
// 3 >= 1 + 2
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::GtEq,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
)
);
}
#[test]
fn test_multiline_exprs() {
test_parser!(
Parser::try_from(r"[
1,
x,
['value', :value],
:sym
]").unwrap(),
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
])),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from(r"(
1,
x,
[
'value',
:value,
],
:sym
)").unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
])),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from(r"(
+ 1
+ 2
+ 3
+ 4
)").unwrap(),
next_expr,
bin_expr(
un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
)
) )
); );
// test_parser!(
Parser::try_from(r"(
+ 1
+ 2
+ 3
+ 4
,
)").unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![
bin_expr(
un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
)
)
]))
);
} }
} }

View File

@@ -120,6 +120,7 @@ impl Spanned for Span {
} }
} }
/*
pub struct Sourced<'t, T: Spanned> { pub struct Sourced<'t, T: Spanned> {
text: &'t str, text: &'t str,
inner: T, inner: T,
@@ -136,6 +137,7 @@ impl<T: Spanned> Spanned for Sourced<'_, T> {
self.inner.span() self.inner.span()
} }
} }
*/
#[cfg(test)] #[cfg(test)]
mod test { mod test {

View File

@@ -31,6 +31,8 @@ pub enum TokenKind {
Splat, Splat,
FSlash, FSlash,
Bang, Bang,
Eol,
Newline,
} }
impl Display for TokenKind { impl Display for TokenKind {
@@ -65,6 +67,8 @@ impl Display for TokenKind {
Splat => "splat (or times)", Splat => "splat (or times)",
FSlash => "fslash (or divide)", FSlash => "fslash (or divide)",
Bang => "not", Bang => "not",
Eol => "line end",
Newline => "newline",
}; };
Display::fmt(s, fmt) Display::fmt(s, fmt)
} }