This repository has been archived on 2020-09-15. You can view files and clone it, but cannot push or open issues or pull requests.
Files
not-python-old.2020-08-27/src/syn/parser.rs

493 lines
14 KiB
Rust
Raw Normal View History

use crate::syn::{ast::*, error::*, lexer::Lexer, op::*, span::*, token::*};
use std::{convert::TryFrom, mem};
const BASE_EXPR_START: &[TokenKind] = &[
TokenKind::Ident,
TokenKind::Num,
TokenKind::Str,
TokenKind::Sym,
TokenKind::LParen,
TokenKind::LBracket,
TokenKind::LBrace,
];
const UN_EXPR_START: &[TokenKind] = &[TokenKind::Plus, TokenKind::Minus, TokenKind::Bang];
pub struct Parser<'t> {
lexer: Lexer<'t>,
curr_token: Option<Token>,
skip_newlines: bool,
}
impl<'t> Parser<'t> {
pub fn new(lexer: Lexer<'t>) -> Result<Self> {
let mut parser = Parser {
lexer,
curr_token: None,
skip_newlines: false,
};
parser.adv_token()?;
Ok(parser)
}
pub fn is_eof(&self) -> bool {
self.lexer.is_eof()
}
pub fn pos(&self) -> Pos {
self.span().start
}
}
////////////////////////////////////////////////////////////////////////////////
// Parsing functions
////////////////////////////////////////////////////////////////////////////////
macro_rules! bin_expr {
($name:ident, $op_tokens:expr, $next:ident) => {
fn $name(&mut self) -> Result<Expr> {
let lhs = self.$next()?;
if let Some(token) = self.match_token_where(|t| $op_tokens.contains(&t.kind()))? {
let op = BinOp::from(token);
let rhs = self.$name()?;
let span = lhs.span().union(rhs.span());
Ok(BinExpr { lhs, op, rhs, span }.into())
} else {
Ok(lhs)
}
}
};
}
impl<'t> Parser<'t> {
pub fn next_expr(&mut self) -> Result<Expr> {
self.next_bin_add_expr()
}
bin_expr!(
next_bin_add_expr,
&[TokenKind::Plus, TokenKind::Minus],
next_bin_mul_expr
);
bin_expr!(
next_bin_mul_expr,
&[TokenKind::FSlash, TokenKind::Splat],
next_un_expr
);
fn next_un_expr(&mut self) -> Result<Expr> {
if let Some(un_op) = self.match_token_where(|t| UN_EXPR_START.contains(&t.kind()))? {
let start = un_op.span();
let expr = self.next_un_expr()?;
let end = expr.span();
Ok(UnExpr {
op: un_op.kind().into(),
expr,
span: start.union(end),
}
.into())
} else {
self.next_base_expr()
}
}
fn next_base_expr(&mut self) -> Result<Expr> {
let token =
self.expect_token_where(|t| BASE_EXPR_START.contains(&t.kind()), "base expression")?;
let expr: Expr = match token.kind() {
TokenKind::Ident => BaseExpr {
kind: BaseExprKind::Ident,
span: token.span(),
}
.into(),
TokenKind::Num => BaseExpr {
kind: BaseExprKind::Num,
span: token.span(),
}
.into(),
TokenKind::Str => BaseExpr {
kind: BaseExprKind::Str,
span: token.span(),
}
.into(),
TokenKind::Sym => BaseExpr {
kind: BaseExprKind::Sym,
span: token.span(),
}
.into(),
TokenKind::LBracket => {
let expr_list = self.next_expr_list(TokenKind::RBracket)?;
let end_token =
self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
let span = token.span().union(end_token.span());
BaseExpr {
kind: BaseExprKind::List(expr_list),
span,
}
.into()
}
TokenKind::LBrace => todo!(),
TokenKind::LParen => {
let first = self.next_expr()?;
if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
let mut list = self.next_expr_list(TokenKind::RParen)?;
let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
let span = first.span().union(end_token.span());
list.insert(0, first);
Expr::Base(
BaseExpr {
kind: BaseExprKind::Tuple(list),
span,
}
.into(),
)
} else {
self.expect_token_kind(TokenKind::RParen, "end of expression")?;
first
}
}
_ => unreachable!(),
};
Ok(expr)
}
/// Gets a list of expressions separated by commas, stopping when EOF or specified end token is
/// reached.
fn next_expr_list(&mut self, stop_before: TokenKind) -> Result<Vec<Expr>> {
let mut list_items = Vec::new();
while !self.is_token_kind(stop_before) {
list_items.push(self.next_expr()?);
// match a comma, otherwise exit the loop
if self.match_token_kind(TokenKind::Comma)?.is_none() {
break;
}
}
Ok(list_items)
}
////////////////////////////////////////////////////////////////////////////////
// Token matching functions
////////////////////////////////////////////////////////////////////////////////
fn adv_token(&mut self) -> Result<Option<Token>> {
let next_token = self.lexer.next_token()?;
Ok(mem::replace(&mut self.curr_token, next_token))
}
fn match_token_where<P>(&mut self, pred: P) -> Result<Option<Token>>
where
P: Fn(Token) -> bool,
{
if self.is_token_match(pred) {
self.adv_token()
} else {
Ok(None)
}
}
fn match_token_kind(&mut self, kind: TokenKind) -> Result<Option<Token>> {
if self.is_token_kind(kind) {
self.adv_token()
} else {
Ok(None)
}
}
fn is_token_match<P>(&self, pred: P) -> bool
where
P: Fn(Token) -> bool,
{
match self.curr_token {
Some(token) => (pred)(token),
None => false,
}
}
fn is_token_kind(&self, kind: TokenKind) -> bool {
self.is_token_match(|t| t.kind() == kind)
}
fn expect_token_where<P>(&mut self, pred: P, expected: impl ToString) -> Result<Token>
where
P: Fn(Token) -> bool,
{
self.match_token_where(pred)?
.ok_or_else(|| Error::ExpectedGot {
expected: expected.to_string(),
got: self
.curr_token
.map(|token| token.kind().to_string())
.unwrap_or_else(|| "EOF".to_string()),
pos: self.pos(),
})
}
fn expect_token_kind(&mut self, kind: TokenKind, expected: impl ToString) -> Result<Token> {
self.expect_token_where(|t| t.kind() == kind, expected)
}
}
impl<'t> Spanned for Parser<'t> {
fn span(&self) -> Span {
self.curr_token
.as_ref()
.map(Spanned::span)
.unwrap_or(Span::default())
}
}
impl<'t> TryFrom<Lexer<'t>> for Parser<'t> {
type Error = Error;
fn try_from(lexer: Lexer<'t>) -> Result<Self> {
Parser::new(lexer)
}
}
impl<'t> TryFrom<&'t str> for Parser<'t> {
type Error = Error;
fn try_from(text: &'t str) -> Result<Self> {
Parser::new(Lexer::new(text))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_adv_token() {
const EXPECTED: &'static [TokenKind] = &[
TokenKind::Num,
TokenKind::Ident,
TokenKind::Sym,
TokenKind::Str,
];
let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap();
for expected in EXPECTED.iter().copied() {
let token = parser.adv_token().unwrap();
let kind = token.unwrap().kind();
assert_eq!(kind, expected);
}
assert!(parser.is_eof());
}
#[test]
fn test_match_token_where() {
let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap();
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Num),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Num),
Ok(None)
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Ident),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Ident),
Ok(None)
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Sym),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Sym),
Ok(None)
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Str),
Ok(Some(_))
));
assert!(matches!(
parser.match_token_where(|token| token.kind() == TokenKind::Str),
Ok(None)
));
assert!(parser.is_eof());
}
macro_rules! test_parser {
($text:expr, $($tail:tt)+) => {{
let mut parser = Parser::try_from($text).unwrap();
test_parser!(@TAIL, &mut parser, $($tail)+);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr) => {{
assert_eq!($parser.$method().unwrap(), $expected);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr, $($tail:tt)+) => {{
test_parser!(@TAIL, $parser, $method, $expected);
test_parser!(@TAIL, $parser, $($tail)*);
}};
(@TAIL, $parser:expr) => {};
(@TAIL, $parser:expr,) => {};
}
fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
Expr::Bin(
BinExpr {
lhs,
op,
rhs,
span: Default::default(),
}
.into(),
)
}
fn un_expr(op: UnOp, expr: Expr) -> Expr {
Expr::Un(
UnExpr {
op,
expr,
span: Default::default(),
}
.into(),
)
}
fn base_expr(kind: BaseExprKind) -> Expr {
Expr::Base(BaseExpr {
kind,
span: Default::default(),
})
}
#[test]
fn test_base_expr() {
test_parser!(
"1 x 'value' :sym",
// 1
next_expr,
base_expr(BaseExprKind::Num),
// x
next_expr,
base_expr(BaseExprKind::Ident),
// 'value'
next_expr,
base_expr(BaseExprKind::Str),
// :sym
next_expr,
base_expr(BaseExprKind::Sym)
);
test_parser!(
"[1, x, 'value', :sym]",
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
"[1, x, 'value', :sym,]",
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
}
#[test]
fn test_compound_expr() {
test_parser!(
r#"
x + 2
1 + 2 * 3 - 4 / 5
1+2*3-4/5
1 - -1 * 8
1--1*8
"#,
// x + 2
next_expr,
bin_expr(
base_expr(BaseExprKind::Ident),
BinOp::Plus,
base_expr(BaseExprKind::Num)
),
// 1 + 2 * 3 - 4 / 5
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
),
//
// 1+2*3-4/5
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
),
// 1 - -1 * 8
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
),
// 1--1*8
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
)
);
//
}
}