This repository has been archived on 2020-09-15. You can view files and clone it, but cannot push or open issues or pull requests.
Files
not-python-old.2020-08-27/src/syn/parser.rs

782 lines
22 KiB
Rust
Raw Normal View History

use crate::syn::{ast::*, error::*, lexer::Lexer, op::*, span::*, token::*};
use std::{convert::TryFrom, mem};
const BASE_EXPR_START: &[TokenKind] = &[
TokenKind::Ident,
TokenKind::Num,
TokenKind::Str,
TokenKind::Sym,
TokenKind::LParen,
TokenKind::LBracket,
TokenKind::LBrace,
TokenKind::ObjBrace,
];
const UN_EXPR_START: &[TokenKind] = &[TokenKind::Plus, TokenKind::Minus, TokenKind::Bang];
pub struct Parser<'t> {
lexer: Lexer<'t>,
curr_token: Option<Token>,
skip_newlines: bool,
}
impl<'t> Parser<'t> {
pub fn new(lexer: Lexer<'t>) -> Result<Self> {
let mut parser = Parser {
lexer,
curr_token: None,
skip_newlines: false,
};
parser.adv_token()?;
Ok(parser)
}
pub fn is_eof(&self) -> bool {
self.lexer.is_eof()
}
pub fn pos(&self) -> Pos {
self.span().start
}
pub fn set_skip_newlines(&mut self, skip: bool) -> Result<bool> {
let prev = mem::replace(&mut self.skip_newlines, skip);
match self.skip_newlines() {
Ok(()) => Ok(prev),
Err(e) => {
self.skip_newlines = prev;
Err(e)
}
}
}
pub fn is_skip_newlines(&self) -> bool {
self.skip_newlines
}
fn skip_newlines(&mut self) -> Result<()> {
if self.is_skip_newlines() && self.is_token_kind(TokenKind::Newline) {
self.adv_token()?;
}
Ok(())
}
}
////////////////////////////////////////////////////////////////////////////////
// Statement parsing
////////////////////////////////////////////////////////////////////////////////
impl<'t> Parser<'t> {
pub fn next_stmt(&mut self) -> Result<Stmt> {
todo!()
}
}
////////////////////////////////////////////////////////////////////////////////
// Expression parsing
////////////////////////////////////////////////////////////////////////////////
macro_rules! bin_expr {
($name:ident, $op_tokens:expr, $next:ident) => {
fn $name(&mut self) -> Result<Expr> {
let lhs = self.$next()?;
if let Some(token) = self.match_token_where(|t| $op_tokens.contains(&t.kind()))? {
let op = BinOp::from(token);
let rhs = self.$name()?;
let span = lhs.span().union(rhs.span());
Ok(BinExpr { lhs, op, rhs, span }.into())
} else {
Ok(lhs)
}
}
};
}
impl<'t> Parser<'t> {
pub fn next_expr(&mut self) -> Result<Expr> {
self.next_bin_cmp_expr()
}
// == < > <= >=
bin_expr!(
next_bin_cmp_expr,
&[
TokenKind::EqEq,
TokenKind::Lt,
TokenKind::Gt,
TokenKind::LtEq,
TokenKind::GtEq
],
next_bin_add_expr
);
// + -
bin_expr!(
next_bin_add_expr,
&[TokenKind::Plus, TokenKind::Minus],
next_bin_mul_expr
);
// * /
bin_expr!(
next_bin_mul_expr,
&[TokenKind::FSlash, TokenKind::Splat],
next_un_expr
);
fn next_un_expr(&mut self) -> Result<Expr> {
if let Some(un_op) = self.match_token_where(|t| UN_EXPR_START.contains(&t.kind()))? {
let start = un_op.span();
let expr = self.next_un_expr()?;
let end = expr.span();
Ok(UnExpr {
op: un_op.kind().into(),
expr,
span: start.union(end),
}
.into())
} else {
self.next_base_expr()
}
}
fn next_base_expr(&mut self) -> Result<Expr> {
let token =
self.expect_token_where(|t| BASE_EXPR_START.contains(&t.kind()), "base expression")?;
let expr: Expr = match token.kind() {
TokenKind::Ident => BaseExpr {
kind: BaseExprKind::Ident,
span: token.span(),
}
.into(),
TokenKind::Num => BaseExpr {
kind: BaseExprKind::Num,
span: token.span(),
}
.into(),
TokenKind::Str => BaseExpr {
kind: BaseExprKind::Str,
span: token.span(),
}
.into(),
TokenKind::Sym => BaseExpr {
kind: BaseExprKind::Sym,
span: token.span(),
}
.into(),
TokenKind::LBracket => {
let prev_skip = self.set_skip_newlines(true)?;
let expr_list = self.next_expr_list(TokenKind::RBracket)?;
self.set_skip_newlines(prev_skip)?;
let end_token =
self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
let span = token.span().union(end_token.span());
BaseExpr {
kind: BaseExprKind::List(expr_list),
span,
}
.into()
}
TokenKind::LBrace => todo!("TODO body expressions"),
TokenKind::ObjBrace => {
let prev_skip = self.set_skip_newlines(true)?;
let object = self.next_obj_list()?;
let end_token = self.expect_token_kind(TokenKind::RBrace, "end of object (right curly brace)")?;
let span = token.span().union(end_token.span());
Expr::Base(BaseExpr {
kind: BaseExprKind::Object(object),
span,
})
}
TokenKind::LParen => {
let prev_skip = self.set_skip_newlines(true)?;
let first = self.next_expr()?;
if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
let mut list = self.next_expr_list(TokenKind::RParen)?;
self.set_skip_newlines(prev_skip)?;
let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
let span = token.span().union(end_token.span());
list.insert(0, first);
Expr::Base(
BaseExpr {
kind: BaseExprKind::Tuple(list),
span,
}
.into(),
)
} else {
self.set_skip_newlines(prev_skip)?;
self.expect_token_kind(TokenKind::RParen, "end of expression")?;
first
}
}
_ => unreachable!(),
};
Ok(expr)
}
/// Gets a list of expressions separated by commas, stopping when EOF or specified end token is
/// reached.
fn next_expr_list(&mut self, stop_before: TokenKind) -> Result<Vec<Expr>> {
let mut list_items = Vec::new();
while !self.is_token_kind(stop_before) {
list_items.push(self.next_expr()?);
// match a comma, otherwise exit the loop
if self.match_token_kind(TokenKind::Comma)?.is_none() {
break;
}
}
Ok(list_items)
}
/// Parses the tail of a `BaseExprKind::Object` expression.
///
/// This function expects that the initial object brace `%{` has already been eaten.
fn next_obj_list(&mut self) -> Result<Vec<(Expr, Expr)>> {
let mut object = Vec::new();
// Parses this pattern:
// ( key EQ value ( COMMA key = value )* COMMA? )?
while !self.is_token_kind(TokenKind::RBrace) {
let key = self.next_expr()?;
self.expect_token_kind(TokenKind::Eq, "equals sign for object item")?;
let value = self.next_expr()?;
object.push((key, value));
// match a comma, otherwise exit the loop
if self.match_token_kind(TokenKind::Comma)?.is_none() {
break;
}
}
Ok(object)
}
////////////////////////////////////////////////////////////////////////////////
// Token matching functions
////////////////////////////////////////////////////////////////////////////////
fn adv_token(&mut self) -> Result<Option<Token>> {
let mut next_token = self.lexer.next_token()?;
if self.is_skip_newlines() {
while next_token
.map(|t| t.kind() == TokenKind::Newline)
.unwrap_or(false)
{
next_token = self.lexer.next_token()?;
}
if self.is_token_kind(TokenKind::Newline) {
self.curr_token = next_token;
while next_token
.map(|t| t.kind() == TokenKind::Newline)
.unwrap_or(false)
{
next_token = self.lexer.next_token()?;
}
}
}
Ok(mem::replace(&mut self.curr_token, next_token))
}
fn match_token_where<P>(&mut self, pred: P) -> Result<Option<Token>>
where
P: Fn(Token) -> bool,
{
if self.is_token_match(pred) {
self.adv_token()
} else {
Ok(None)
}
}
fn match_token_kind(&mut self, kind: TokenKind) -> Result<Option<Token>> {
if self.is_token_kind(kind) {
self.adv_token()
} else {
Ok(None)
}
}
fn is_token_match<P>(&self, pred: P) -> bool
where
P: Fn(Token) -> bool,
{
match self.curr_token {
Some(token) => (pred)(token),
None => false,
}
}
fn is_token_kind(&self, kind: TokenKind) -> bool {
self.is_token_match(|t| t.kind() == kind)
}
fn expect_token_where<P>(&mut self, pred: P, expected: impl ToString) -> Result<Token>
where
P: Fn(Token) -> bool,
{
self.match_token_where(pred)?
.ok_or_else(|| Error::ExpectedGot {
expected: expected.to_string(),
got: self
.curr_token
.map(|token| token.kind().to_string())
.unwrap_or_else(|| "EOF".to_string()),
pos: self.pos(),
})
}
fn expect_token_kind(&mut self, kind: TokenKind, expected: impl ToString) -> Result<Token> {
self.expect_token_where(|t| t.kind() == kind, expected)
}
}
impl<'t> Spanned for Parser<'t> {
fn span(&self) -> Span {
self.curr_token
.as_ref()
.map(Spanned::span)
.unwrap_or(Span::default())
}
}
impl<'t> TryFrom<Lexer<'t>> for Parser<'t> {
type Error = Error;
fn try_from(lexer: Lexer<'t>) -> Result<Self> {
Parser::new(lexer)
}
}
impl<'t> TryFrom<&'t str> for Parser<'t> {
type Error = Error;
fn try_from(text: &'t str) -> Result<Self> {
Parser::new(Lexer::new(text))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_adv_token() {
const EXPECTED: &'static [TokenKind] = &[
TokenKind::Num,
TokenKind::Ident,
TokenKind::Sym,
TokenKind::Str,
];
let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap();
for expected in EXPECTED.iter().copied() {
let token = parser.adv_token().unwrap();
let kind = token.unwrap().kind();
assert_eq!(kind, expected);
}
assert!(parser.is_eof());
}
#[test]
fn test_match_token_where() {
let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap();
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Num),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Num),
Ok(None)
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Ident),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Ident),
Ok(None)
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Sym),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Sym),
Ok(None)
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Str),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Str),
Ok(None)
));
assert!(parser.is_eof());
}
macro_rules! test_parser {
($parser:expr, $($tail:tt)+) => {{
let mut parser = $parser;
test_parser!(@TAIL, &mut parser, $($tail)+);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr) => {{
assert_eq!($parser.$method().unwrap(), $expected);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr, $($tail:tt)+) => {{
test_parser!(@TAIL, $parser, $method, $expected);
test_parser!(@TAIL, $parser, $($tail)*);
}};
(@TAIL, $parser:expr) => {
assert!($parser.is_eof());
};
(@TAIL, $parser:expr,) => {
assert!($parser.is_eof());
};
}
fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
Expr::Bin(
BinExpr {
lhs,
op,
rhs,
span: Default::default(),
}
.into(),
)
}
fn un_expr(op: UnOp, expr: Expr) -> Expr {
Expr::Un(
UnExpr {
op,
expr,
span: Default::default(),
}
.into(),
)
}
fn base_expr(kind: BaseExprKind) -> Expr {
Expr::Base(BaseExpr {
kind,
span: Default::default(),
})
}
#[test]
fn test_base_expr() {
test_parser!(
Parser::try_from("1 x 'value' :sym").unwrap(),
// 1
next_expr,
base_expr(BaseExprKind::Num),
// x
next_expr,
base_expr(BaseExprKind::Ident),
// 'value'
next_expr,
base_expr(BaseExprKind::Str),
// :sym
next_expr,
base_expr(BaseExprKind::Sym)
);
}
#[test]
fn test_list_expr() {
test_parser!(
Parser::try_from("[1, x, 'value', :sym]").unwrap(),
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from("[1, x, 'value', :sym,]").unwrap(),
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
}
#[test]
fn test_tuple_expr() {
test_parser!(
Parser::try_from("(1, :sym)").unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from("(:sym,)").unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![base_expr(BaseExprKind::Sym),]))
);
test_parser!(
Parser::try_from("(:sym)").unwrap(),
next_expr,
base_expr(BaseExprKind::Sym)
);
}
#[test]
fn test_object_expr() {
test_parser!(
Parser::try_from(
r#"%{
:sym = value,
key = "value",
"lit" = 1,
}"#
)
.unwrap(),
next_expr,
base_expr(BaseExprKind::Object(vec![
(base_expr(BaseExprKind::Sym), base_expr(BaseExprKind::Ident)),
(base_expr(BaseExprKind::Ident), base_expr(BaseExprKind::Str)),
(base_expr(BaseExprKind::Str), base_expr(BaseExprKind::Num)),
]))
);
}
#[test]
fn test_bin_expr() {
test_parser!(
Parser::try_from("x + 2").unwrap(),
next_expr,
bin_expr(
base_expr(BaseExprKind::Ident),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
);
test_parser!(
Parser::try_from("1 + 2 * 3 - 4 / 5").unwrap(),
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
)
);
test_parser!(
Parser::try_from("1+2*3-4/5").unwrap(),
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
)
);
test_parser!(
Parser::try_from("1 - -1 * 8").unwrap(),
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
)
);
test_parser!(
Parser::try_from("1--1*8").unwrap(),
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
)
);
test_parser!(
Parser::try_from("1 + 1 == 2").unwrap(),
next_expr,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
),
BinOp::EqEq,
base_expr(BaseExprKind::Num)
)
);
test_parser!(
Parser::try_from("3 >= 1 + 2").unwrap(),
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::GtEq,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
)
);
}
#[test]
fn test_multiline_exprs() {
test_parser!(
Parser::try_from(
r"[
1,
x,
['value', :value],
:sym
]"
)
.unwrap(),
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
])),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from(
r"(
1,
x,
[
:key,
'value',
],
:sym
)"
)
.unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Sym),
base_expr(BaseExprKind::Str),
])),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
Parser::try_from(
r"(
+ 1
+ 2
+ 3
+ 4
)"
)
.unwrap(),
next_expr,
bin_expr(
un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
)
)
);
test_parser!(
Parser::try_from(
r"(
+ 1
+ 2
+ 3
+ 4
,
)"
)
.unwrap(),
next_expr,
base_expr(BaseExprKind::Tuple(vec![bin_expr(
un_expr(UnOp::Plus, base_expr(BaseExprKind::Num)),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
base_expr(BaseExprKind::Num)
)
)
)]))
);
}
}