Add more expressions to parser, add EOL and newline tokens
* Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -3,10 +3,23 @@ use derivative::Derivative;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Stmt {
|
||||
Assign(Expr, Expr),
|
||||
Assign(AssignStmt),
|
||||
Expr(Expr),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct AssignStmt {
|
||||
pub lhs: Expr,
|
||||
pub rhs: Expr,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Spanned for AssignStmt {
|
||||
fn span(&self) -> Span {
|
||||
self.span
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Expr {
|
||||
Base(BaseExpr),
|
||||
|
||||
@@ -46,7 +46,7 @@ impl<'t> Lexer<'t> {
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(c) = self.curr_char() {
|
||||
if !c.is_whitespace() {
|
||||
if !c.is_whitespace() || c == '\n' {
|
||||
break;
|
||||
} else {
|
||||
self.adv_char();
|
||||
@@ -96,6 +96,8 @@ impl<'t> Lexer<'t> {
|
||||
|(?P<lbrace>\{)
|
||||
|(?P<rbrace>\})
|
||||
|(?P<comma>,)
|
||||
|(?P<eol>;)
|
||||
|(?P<newline>\n)
|
||||
"#).ignore_whitespace(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
@@ -133,6 +135,8 @@ impl<'t> Lexer<'t> {
|
||||
("gt", TokenKind::Gt),
|
||||
|
||||
("eq", TokenKind::Eq),
|
||||
("eol", TokenKind::Eol),
|
||||
("newline", TokenKind::Newline),
|
||||
];
|
||||
|
||||
self.skip_whitespace();
|
||||
@@ -206,7 +210,7 @@ mod test {
|
||||
assert!(matches!(lexer.next_token(), Ok(None)));
|
||||
assert!(lexer.is_eof());
|
||||
|
||||
let mut lexer = Lexer::new(" \n \n \n\r\n\t ");
|
||||
let mut lexer = Lexer::new(" \t \r \r\r\t\t ");
|
||||
assert!(matches!(lexer.next_token(), Ok(None)));
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
@@ -263,6 +267,14 @@ mod test {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_eol() {
|
||||
test_token!("\n;",
|
||||
TokenKind::Newline, "\n",
|
||||
TokenKind::Eol, ";"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbols() {
|
||||
test_token!("(", TokenKind::LParen);
|
||||
|
||||
@@ -30,6 +30,11 @@ pub enum BinOp {
|
||||
Minus,
|
||||
Times,
|
||||
Div,
|
||||
EqEq,
|
||||
Lt,
|
||||
Gt,
|
||||
LtEq,
|
||||
GtEq,
|
||||
}
|
||||
|
||||
impl From<TokenKind> for BinOp {
|
||||
@@ -39,6 +44,11 @@ impl From<TokenKind> for BinOp {
|
||||
TokenKind::Minus => BinOp::Minus,
|
||||
TokenKind::Splat => BinOp::Times,
|
||||
TokenKind::FSlash => BinOp::Div,
|
||||
TokenKind::EqEq => BinOp::EqEq,
|
||||
TokenKind::LtEq => BinOp::LtEq,
|
||||
TokenKind::GtEq => BinOp::GtEq,
|
||||
TokenKind::Lt => BinOp::Lt,
|
||||
TokenKind::Gt => BinOp::Gt,
|
||||
_ => panic!("{:?} cannot be converted to a binop", other),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,28 @@ impl<'t> Parser<'t> {
|
||||
pub fn pos(&self) -> Pos {
|
||||
self.span().start
|
||||
}
|
||||
|
||||
pub fn set_skip_newlines(&mut self, skip: bool) -> Result<bool> {
|
||||
let prev = mem::replace(&mut self.skip_newlines, skip);
|
||||
match self.skip_newlines() {
|
||||
Ok(()) => Ok(prev),
|
||||
Err(e) => {
|
||||
self.skip_newlines = prev;
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_skip_newlines(&self) -> bool {
|
||||
self.skip_newlines
|
||||
}
|
||||
|
||||
fn skip_newlines(&mut self) -> Result<()> {
|
||||
if self.is_skip_newlines() && self.is_token_kind(TokenKind::Newline) {
|
||||
self.adv_token()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -61,14 +83,28 @@ macro_rules! bin_expr {
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
pub fn next_expr(&mut self) -> Result<Expr> {
|
||||
self.next_bin_add_expr()
|
||||
self.next_bin_cmp_expr()
|
||||
}
|
||||
|
||||
// == < > <= >=
|
||||
bin_expr!(
|
||||
next_bin_cmp_expr,
|
||||
&[
|
||||
TokenKind::EqEq,
|
||||
TokenKind::Lt,
|
||||
TokenKind::Gt,
|
||||
TokenKind::LtEq,
|
||||
TokenKind::GtEq
|
||||
],
|
||||
next_bin_add_expr
|
||||
);
|
||||
// + -
|
||||
bin_expr!(
|
||||
next_bin_add_expr,
|
||||
&[TokenKind::Plus, TokenKind::Minus],
|
||||
next_bin_mul_expr
|
||||
);
|
||||
// * /
|
||||
bin_expr!(
|
||||
next_bin_mul_expr,
|
||||
&[TokenKind::FSlash, TokenKind::Splat],
|
||||
@@ -80,6 +116,7 @@ impl<'t> Parser<'t> {
|
||||
let start = un_op.span();
|
||||
let expr = self.next_un_expr()?;
|
||||
let end = expr.span();
|
||||
|
||||
Ok(UnExpr {
|
||||
op: un_op.kind().into(),
|
||||
expr,
|
||||
@@ -116,7 +153,9 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
.into(),
|
||||
TokenKind::LBracket => {
|
||||
let prev_skip = self.set_skip_newlines(true)?;
|
||||
let expr_list = self.next_expr_list(TokenKind::RBracket)?;
|
||||
self.set_skip_newlines(prev_skip)?;
|
||||
let end_token =
|
||||
self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
|
||||
let span = token.span().union(end_token.span());
|
||||
@@ -128,10 +167,14 @@ impl<'t> Parser<'t> {
|
||||
}
|
||||
TokenKind::LBrace => todo!(),
|
||||
TokenKind::LParen => {
|
||||
let prev_skip = self.set_skip_newlines(true)?;
|
||||
let first = self.next_expr()?;
|
||||
if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
|
||||
let mut list = self.next_expr_list(TokenKind::RParen)?;
|
||||
|
||||
self.set_skip_newlines(prev_skip)?;
|
||||
let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
|
||||
|
||||
let span = first.span().union(end_token.span());
|
||||
list.insert(0, first);
|
||||
Expr::Base(
|
||||
@@ -142,6 +185,7 @@ impl<'t> Parser<'t> {
|
||||
.into(),
|
||||
)
|
||||
} else {
|
||||
self.set_skip_newlines(prev_skip)?;
|
||||
self.expect_token_kind(TokenKind::RParen, "end of expression")?;
|
||||
first
|
||||
}
|
||||
@@ -173,7 +217,21 @@ impl<'t> Parser<'t> {
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fn adv_token(&mut self) -> Result<Option<Token>> {
|
||||
let next_token = self.lexer.next_token()?;
|
||||
let mut next_token = self.lexer.next_token()?;
|
||||
|
||||
if self.is_skip_newlines() {
|
||||
while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) {
|
||||
next_token = self.lexer.next_token()?;
|
||||
}
|
||||
|
||||
if self.is_token_kind(TokenKind::Newline) {
|
||||
self.curr_token = next_token;
|
||||
while next_token.map(|t| t.kind() == TokenKind::Newline).unwrap_or(false) {
|
||||
next_token = self.lexer.next_token()?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(mem::replace(&mut self.curr_token, next_token))
|
||||
}
|
||||
|
||||
@@ -317,9 +375,8 @@ mod test {
|
||||
}
|
||||
|
||||
macro_rules! test_parser {
|
||||
($text:expr, $($tail:tt)+) => {{
|
||||
let mut parser = Parser::try_from($text).unwrap();
|
||||
|
||||
($parser:expr, $($tail:tt)+) => {{
|
||||
let mut parser = $parser;
|
||||
test_parser!(@TAIL, &mut parser, $($tail)+);
|
||||
}};
|
||||
|
||||
@@ -332,8 +389,12 @@ mod test {
|
||||
test_parser!(@TAIL, $parser, $($tail)*);
|
||||
}};
|
||||
|
||||
(@TAIL, $parser:expr) => {};
|
||||
(@TAIL, $parser:expr,) => {};
|
||||
(@TAIL, $parser:expr) => {
|
||||
assert!($parser.is_eof());
|
||||
};
|
||||
(@TAIL, $parser:expr,) => {
|
||||
assert!($parser.is_eof());
|
||||
};
|
||||
}
|
||||
|
||||
fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
|
||||
@@ -369,7 +430,7 @@ mod test {
|
||||
#[test]
|
||||
fn test_base_expr() {
|
||||
test_parser!(
|
||||
"1 x 'value' :sym",
|
||||
Parser::try_from("1 x 'value' :sym").unwrap(),
|
||||
// 1
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Num),
|
||||
@@ -383,9 +444,12 @@ mod test {
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Sym)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_expr() {
|
||||
test_parser!(
|
||||
"[1, x, 'value', :sym]",
|
||||
Parser::try_from("[1, x, 'value', :sym]").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::List(vec![
|
||||
base_expr(BaseExprKind::Num),
|
||||
@@ -396,7 +460,7 @@ mod test {
|
||||
);
|
||||
|
||||
test_parser!(
|
||||
"[1, x, 'value', :sym,]",
|
||||
Parser::try_from("[1, x, 'value', :sym,]").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::List(vec![
|
||||
base_expr(BaseExprKind::Num),
|
||||
@@ -408,15 +472,49 @@ mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compound_expr() {
|
||||
fn test_tuple_expr() {
|
||||
test_parser!(
|
||||
r#"
|
||||
x + 2
|
||||
1 + 2 * 3 - 4 / 5
|
||||
1+2*3-4/5
|
||||
1 - -1 * 8
|
||||
1--1*8
|
||||
"#,
|
||||
Parser::try_from("(1, :sym)").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Tuple(vec![
|
||||
base_expr(BaseExprKind::Num),
|
||||
base_expr(BaseExprKind::Sym),
|
||||
]))
|
||||
);
|
||||
|
||||
test_parser!(
|
||||
Parser::try_from("(:sym,)").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Tuple(vec![
|
||||
base_expr(BaseExprKind::Sym),
|
||||
]))
|
||||
);
|
||||
|
||||
test_parser!(
|
||||
Parser::try_from("(:sym)").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Sym)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bin_expr() {
|
||||
test_parser!(
|
||||
{
|
||||
let mut parser = Parser::try_from(
|
||||
r#"x + 2
|
||||
1 + 2 * 3 - 4 / 5
|
||||
1+2*3-4/5
|
||||
1 - -1 * 8
|
||||
1--1*8
|
||||
1 + 1 == 2
|
||||
3 >= 1 + 2
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
parser.set_skip_newlines(true).unwrap();
|
||||
parser
|
||||
},
|
||||
// x + 2
|
||||
next_expr,
|
||||
bin_expr(
|
||||
@@ -484,9 +582,122 @@ mod test {
|
||||
BinOp::Times,
|
||||
base_expr(BaseExprKind::Num)
|
||||
)
|
||||
),
|
||||
// 1 + 1 == 2
|
||||
next_expr,
|
||||
bin_expr(
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::Plus,
|
||||
base_expr(BaseExprKind::Num)
|
||||
),
|
||||
BinOp::EqEq,
|
||||
base_expr(BaseExprKind::Num)
|
||||
),
|
||||
// 3 >= 1 + 2
|
||||
next_expr,
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::GtEq,
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::Plus,
|
||||
base_expr(BaseExprKind::Num)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiline_exprs() {
|
||||
test_parser!(
|
||||
Parser::try_from(r"[
|
||||
1,
|
||||
x,
|
||||
['value', :value],
|
||||
:sym
|
||||
]").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::List(vec![
|
||||
base_expr(BaseExprKind::Num),
|
||||
base_expr(BaseExprKind::Ident),
|
||||
base_expr(BaseExprKind::List(vec![
|
||||
base_expr(BaseExprKind::Str),
|
||||
base_expr(BaseExprKind::Sym),
|
||||
])),
|
||||
base_expr(BaseExprKind::Sym),
|
||||
]))
|
||||
);
|
||||
|
||||
test_parser!(
|
||||
Parser::try_from(r"(
|
||||
1,
|
||||
x,
|
||||
[
|
||||
'value',
|
||||
:value,
|
||||
],
|
||||
:sym
|
||||
)").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Tuple(vec![
|
||||
base_expr(BaseExprKind::Num),
|
||||
base_expr(BaseExprKind::Ident),
|
||||
base_expr(BaseExprKind::List(vec![
|
||||
base_expr(BaseExprKind::Str),
|
||||
base_expr(BaseExprKind::Sym),
|
||||
])),
|
||||
base_expr(BaseExprKind::Sym),
|
||||
]))
|
||||
);
|
||||
|
||||
test_parser!(
|
||||
Parser::try_from(r"(
|
||||
+ 1
|
||||
+ 2
|
||||
+ 3
|
||||
+ 4
|
||||
)").unwrap(),
|
||||
next_expr,
|
||||
bin_expr(
|
||||
un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
|
||||
BinOp::Plus,
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::Plus,
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::Plus,
|
||||
base_expr(BaseExprKind::Num)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
//
|
||||
test_parser!(
|
||||
Parser::try_from(r"(
|
||||
+ 1
|
||||
+ 2
|
||||
+ 3
|
||||
+ 4
|
||||
,
|
||||
)").unwrap(),
|
||||
next_expr,
|
||||
base_expr(BaseExprKind::Tuple(vec![
|
||||
bin_expr(
|
||||
un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
|
||||
BinOp::Plus,
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::Plus,
|
||||
bin_expr(
|
||||
base_expr(BaseExprKind::Num),
|
||||
BinOp::Plus,
|
||||
base_expr(BaseExprKind::Num)
|
||||
)
|
||||
)
|
||||
)
|
||||
]))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,6 +120,7 @@ impl Spanned for Span {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub struct Sourced<'t, T: Spanned> {
|
||||
text: &'t str,
|
||||
inner: T,
|
||||
@@ -136,6 +137,7 @@ impl<T: Spanned> Spanned for Sourced<'_, T> {
|
||||
self.inner.span()
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
@@ -31,6 +31,8 @@ pub enum TokenKind {
|
||||
Splat,
|
||||
FSlash,
|
||||
Bang,
|
||||
Eol,
|
||||
Newline,
|
||||
}
|
||||
|
||||
impl Display for TokenKind {
|
||||
@@ -65,6 +67,8 @@ impl Display for TokenKind {
|
||||
Splat => "splat (or times)",
|
||||
FSlash => "fslash (or divide)",
|
||||
Bang => "not",
|
||||
Eol => "line end",
|
||||
Newline => "newline",
|
||||
};
|
||||
Display::fmt(s, fmt)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user