Add expression parsing

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-05-02 18:42:01 -04:00
parent 28d29c2270
commit d9edf21d16
9 changed files with 542 additions and 101 deletions

View File

@@ -1,5 +1,5 @@
mod syn;
mod util;
//mod util;
fn main() {
println!("Hello, world!");

View File

@@ -1,11 +1,29 @@
use crate::syn::{op::*, span::*};
use derivative::Derivative;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Stmt {
Assign(Expr, Expr),
Expr(Expr),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Expr {
Base(BaseExpr),
Bin(Box<BinExpr>),
Un(Box<UnExpr>),
}
impl Spanned for Expr {
fn span(&self) -> Span {
match self {
Expr::Base(b) => b.span(),
Expr::Bin(b) => b.span(),
Expr::Un(u) => u.span(),
}
}
}
impl From<UnExpr> for Expr {
fn from(un: UnExpr) -> Self {
Expr::Un(Box::new(un))
@@ -24,18 +42,38 @@ impl From<BaseExpr> for Expr {
}
}
#[derive(Derivative, Clone, PartialEq, Eq)]
#[derivative(Debug)]
pub struct BinExpr {
pub lhs: Expr,
pub op: BinOp,
pub rhs: Expr,
}
pub struct UnExpr {
pub op: UnOp,
pub expr: Expr,
#[derivative(Debug = "ignore")]
pub span: Span,
}
impl Spanned for BinExpr {
fn span(&self) -> Span {
self.span
}
}
#[derive(Derivative, Clone, PartialEq, Eq)]
#[derivative(Debug)]
pub struct UnExpr {
pub op: UnOp,
pub expr: Expr,
#[derivative(Debug = "ignore")]
pub span: Span,
}
impl Spanned for UnExpr {
fn span(&self) -> Span {
self.span
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BaseExprKind {
Ident,
Num,
@@ -46,7 +84,16 @@ pub enum BaseExprKind {
Tuple(Vec<Expr>),
}
#[derive(Derivative, Clone, PartialEq, Eq)]
#[derivative(Debug)]
pub struct BaseExpr {
pub kind: BaseExprKind,
#[derivative(Debug = "ignore")]
pub span: Span,
}
impl Spanned for BaseExpr {
fn span(&self) -> Span {
self.span
}
}

View File

@@ -72,6 +72,23 @@ impl<'t> Lexer<'t> {
|(?P<sym>:[a-zA-Z_][a-zA-Z0-9_]*)
|(?P<hex_num>0[xX][0-9a-fA-F]+)
|(?P<dec_num>[0-9]+)
|(?P<dq_str>"([^\\"]|\\[ntr0"'])*")
|(?P<sq_str>'([^\\"]|\\[ntr0"'])*')
|(?P<arrow>->)
|(?P<eqeq>==)
|(?P<bangeq>!=)
|(?P<lteq><=)
|(?P<gteq>>=)
|(?P<lt><)
|(?P<gt>>)
|(?P<eq>=)
|(?P<plus>\+)
|(?P<minus>-)
|(?P<splat>\*)
|(?P<fslash>/)
|(?P<bang>!)
|(?P<lparen>\()
|(?P<rparen>\))
|(?P<lbracket>\[)
@@ -79,14 +96,6 @@ impl<'t> Lexer<'t> {
|(?P<lbrace>\{)
|(?P<rbrace>\})
|(?P<comma>,)
|(?P<arrow>->)
|(?P<eq>=)
|(?P<plus>\+)
|(?P<minus>-)
|(?P<splat>\*)
|(?P<fslash>/)
|(?P<dq_str>"([^\\"]|\\[ntr0"'])*")
|(?P<sq_str>'([^\\"]|\\[ntr0"'])*')
"#).ignore_whitespace(true)
.build()
.unwrap();
@@ -113,8 +122,16 @@ impl<'t> Lexer<'t> {
("minus", TokenKind::Minus),
("splat", TokenKind::Splat),
("fslash", TokenKind::FSlash),
("bang", TokenKind::Bang),
("arrow", TokenKind::Arrow),
("eqeq", TokenKind::EqEq),
("bangeq", TokenKind::BangEq),
("lteq", TokenKind::LtEq),
("gteq", TokenKind::GtEq),
("lt", TokenKind::Lt),
("gt", TokenKind::Gt),
("eq", TokenKind::Eq),
];
@@ -160,6 +177,25 @@ impl<'t> Lexer<'t> {
mod test {
use super::*;
macro_rules! test_token {
($text:expr, $($token_kind:expr, $token_text:expr),+ $(,)?) => {{
let text = $text;
let mut lexer = Lexer::new(text);
$(
let token = lexer.next_token().expect("token").expect("token");
assert_eq!(token.kind(), $token_kind);
assert_eq!(token.text_at(text), $token_text);
)+
assert!(lexer.is_eof());
}};
($text:expr, $token_kind:expr) => {{
test_token!($text, $token_kind, $text);
}};
}
#[test]
fn test_next_token_eof() {
let mut lexer = Lexer::new("");
@@ -175,27 +211,16 @@ mod test {
assert!(lexer.is_eof());
}
macro_rules! test_token {
($text:expr, $token_kind:expr, $token_text:expr) => {{
let text = $text;
let mut lexer = Lexer::new(text);
let token = lexer.next_token().expect("token").expect("token");
assert_eq!(token.kind(), $token_kind);
assert_eq!(token.text_at(text), $token_text);
}};
($text:expr, $token_kind:expr) => {{
test_token!($text, $token_kind, $text);
}};
}
#[test]
fn test_ident_token() {
test_token!("ident", TokenKind::Ident);
test_token!("OtherIdent", TokenKind::Ident);
test_token!("other_ident", TokenKind::Ident);
test_token!("ident1234", TokenKind::Ident);
test_token!("RETURN", TokenKind::Ident);
test_token!(
"ident OtherIdent other_ident ident1234 RETURN",
TokenKind::Ident, "ident",
TokenKind::Ident, "OtherIdent",
TokenKind::Ident, "other_ident",
TokenKind::Ident, "ident1234",
TokenKind::Ident, "RETURN",
);
}
#[test]
@@ -205,16 +230,19 @@ mod test {
#[test]
fn test_num_token() {
test_token!("1234", TokenKind::Num);
test_token!("4321", TokenKind::Num);
test_token!("123498765", TokenKind::Num);
test_token!("432156789", TokenKind::Num);
test_token!("0xdcbaBEEF", TokenKind::Num);
test_token!("0xabcdFEED", TokenKind::Num);
test_token!("0XdcbaBEEF", TokenKind::Num);
test_token!("0XabcdFEED", TokenKind::Num);
test_token!("0X123456789DCBAbeef", TokenKind::Num);
test_token!("0xABCDfeed192837465", TokenKind::Num);
test_token!(
"1234 4321 123498765 432156789 0xdcbaBEEF 0xabcdFEED 0XdcbaBEEF 0XabcdFEED 0X123456789DCBAbeef 0xABCDfeed192837465",
TokenKind::Num, "1234",
TokenKind::Num, "4321",
TokenKind::Num, "123498765",
TokenKind::Num, "432156789",
TokenKind::Num, "0xdcbaBEEF",
TokenKind::Num, "0xabcdFEED",
TokenKind::Num, "0XdcbaBEEF",
TokenKind::Num, "0XabcdFEED",
TokenKind::Num, "0X123456789DCBAbeef",
TokenKind::Num, "0xABCDfeed192837465",
);
}
#[test]
@@ -227,14 +255,16 @@ mod test {
#[test]
fn test_sym_token() {
test_token!(":symbol", TokenKind::Sym);
test_token!(":OtherSymbol", TokenKind::Sym);
test_token!(":other_symbol", TokenKind::Sym);
test_token!(":symbol1234", TokenKind::Sym);
test_token!(":symbol :OtherSymbol :other_symbol :symbol1234",
TokenKind::Sym, ":symbol",
TokenKind::Sym, ":OtherSymbol",
TokenKind::Sym, ":other_symbol",
TokenKind::Sym, ":symbol1234",
);
}
#[test]
fn test_single_char_symbols() {
fn test_symbols() {
test_token!("(", TokenKind::LParen);
test_token!(")", TokenKind::RParen);
test_token!("{", TokenKind::LBrace);
@@ -246,11 +276,15 @@ mod test {
test_token!("-", TokenKind::Minus);
test_token!("*", TokenKind::Splat);
test_token!("/", TokenKind::FSlash);
}
test_token!("!", TokenKind::Bang);
#[test]
fn test_op_tokens() {
test_token!("=", TokenKind::Eq);
test_token!("!=", TokenKind::BangEq);
test_token!("==", TokenKind::EqEq);
test_token!("<=", TokenKind::LtEq);
test_token!(">=", TokenKind::GtEq);
test_token!("<", TokenKind::Lt);
test_token!(">", TokenKind::Gt);
test_token!("->", TokenKind::Arrow);
}
}

View File

@@ -1,6 +1,51 @@
use crate::syn::token::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnOp {
Plus,
Minus,
Not,
}
impl From<TokenKind> for UnOp {
fn from(other: TokenKind) -> Self {
match other {
TokenKind::Plus => UnOp::Plus,
TokenKind::Minus => UnOp::Minus,
TokenKind::Bang => UnOp::Not,
_ => panic!("{:?} cannot be converted to a unop", other),
}
}
}
impl From<Token> for UnOp {
fn from(other: Token) -> Self {
From::from(other.kind())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinOp {
Plus,
Minus,
Times,
Div,
}
impl From<TokenKind> for BinOp {
fn from(other: TokenKind) -> Self {
match other {
TokenKind::Plus => BinOp::Plus,
TokenKind::Minus => BinOp::Minus,
TokenKind::Splat => BinOp::Times,
TokenKind::FSlash => BinOp::Div,
_ => panic!("{:?} cannot be converted to a binop", other),
}
}
}
impl From<Token> for BinOp {
fn from(other: Token) -> Self {
From::from(other.kind())
}
}

View File

@@ -1,8 +1,7 @@
#![allow(dead_code)]
use crate::syn::{ast::*, error::*, lexer::Lexer, span::*, token::*};
use crate::syn::{ast::*, error::*, lexer::Lexer, op::*, span::*, token::*};
use std::{convert::TryFrom, mem};
const EXPR_START: &'static [TokenKind] = &[
const BASE_EXPR_START: &[TokenKind] = &[
TokenKind::Ident,
TokenKind::Num,
TokenKind::Str,
@@ -10,19 +9,14 @@ const EXPR_START: &'static [TokenKind] = &[
TokenKind::LParen,
TokenKind::LBracket,
TokenKind::LBrace,
// TODO unary tokens
];
const VALUE_EXPR_START: &'static [TokenKind] = &[
TokenKind::Ident,
TokenKind::Num,
TokenKind::Str,
TokenKind::Sym,
];
const UN_EXPR_START: &[TokenKind] = &[TokenKind::Plus, TokenKind::Minus, TokenKind::Bang];
pub struct Parser<'t> {
lexer: Lexer<'t>,
curr_token: Option<Token>,
skip_newlines: bool,
}
impl<'t> Parser<'t> {
@@ -30,6 +24,7 @@ impl<'t> Parser<'t> {
let mut parser = Parser {
lexer,
curr_token: None,
skip_newlines: false,
};
parser.adv_token()?;
Ok(parser)
@@ -42,27 +37,63 @@ impl<'t> Parser<'t> {
pub fn pos(&self) -> Pos {
self.span().start
}
}
////////////////////////////////////////////////////////////////////////////////
// Parsing functions
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Parsing functions
////////////////////////////////////////////////////////////////////////////////
macro_rules! bin_expr {
($name:ident, $op_tokens:expr, $next:ident) => {
fn $name(&mut self) -> Result<Expr> {
let lhs = self.$next()?;
if let Some(token) = self.match_token_where(|t| $op_tokens.contains(&t.kind()))? {
let op = BinOp::from(token);
let rhs = self.$name()?;
let span = lhs.span().union(rhs.span());
Ok(BinExpr { lhs, op, rhs, span }.into())
} else {
Ok(lhs)
}
}
};
}
impl<'t> Parser<'t> {
pub fn next_expr(&mut self) -> Result<Expr> {
self.next_bin_expr()
self.next_bin_add_expr()
}
fn next_bin_expr(&mut self) -> Result<Expr> {
let lhs = self.next_un_expr()?;
todo!()
}
bin_expr!(
next_bin_add_expr,
&[TokenKind::Plus, TokenKind::Minus],
next_bin_mul_expr
);
bin_expr!(
next_bin_mul_expr,
&[TokenKind::FSlash, TokenKind::Splat],
next_un_expr
);
fn next_un_expr(&mut self) -> Result<Expr> {
todo!()
if let Some(un_op) = self.match_token_where(|t| UN_EXPR_START.contains(&t.kind()))? {
let start = un_op.span();
let expr = self.next_un_expr()?;
let end = expr.span();
Ok(UnExpr {
op: un_op.kind().into(),
expr,
span: start.union(end),
}
.into())
} else {
self.next_base_expr()
}
}
fn next_base_expr(&mut self) -> Result<Expr> {
let token =
self.expect_token_where(|t| VALUE_EXPR_START.contains(&t.kind()), "base expression")?;
self.expect_token_where(|t| BASE_EXPR_START.contains(&t.kind()), "base expression")?;
let expr: Expr = match token.kind() {
TokenKind::Ident => BaseExpr {
kind: BaseExprKind::Ident,
@@ -84,30 +115,57 @@ impl<'t> Parser<'t> {
span: token.span(),
}
.into(),
TokenKind::LBracket => {
let expr_list = self.next_expr_list(TokenKind::RBracket)?;
let end_token =
self.expect_token_kind(TokenKind::RBracket, "end of list (right bracket)")?;
let span = token.span().union(end_token.span());
BaseExpr {
kind: BaseExprKind::List(expr_list),
span,
}
.into()
}
TokenKind::LBrace => todo!(),
TokenKind::LParen => {
let first = self.next_expr()?;
if let Some(_) = self.match_token_kind(TokenKind::Comma)? {
let mut list = self.next_expr_list(TokenKind::RParen)?;
let end_token = self.expect_token_kind(TokenKind::RParen, "end of tuple")?;
let span = first.span().union(end_token.span());
list.insert(0, first);
Expr::Base(
BaseExpr {
kind: BaseExprKind::Tuple(list),
span,
}
.into(),
)
} else {
self.expect_token_kind(TokenKind::RParen, "end of expression")?;
first
}
}
_ => unreachable!(),
};
Ok(expr)
}
fn next_list(&mut self) -> Result<Expr> {
let start_token = self.expect_token_where(|t| t.kind() == TokenKind::LBracket, "start of list (left bracket)")?;
/// Gets a list of expressions separated by commas, stopping when EOF or specified end token is
/// reached.
fn next_expr_list(&mut self, stop_before: TokenKind) -> Result<Vec<Expr>> {
let mut list_items = Vec::new();
while ! matches!(self.curr_token.map(|t| t.kind()), Some(TokenKind::RBrace) | None) {
let expr = self.next_expr()?;
while !self.is_token_kind(stop_before) {
list_items.push(self.next_expr()?);
// match a comma, otherwise exit the loop
if self.match_token_kind(TokenKind::Comma)?.is_none() {
break;
}
}
let end_token = self.expect_token_where(
|t| t.kind() == TokenKind::RBracket,
"end of list (right bracket)",
)?;
let expr = BaseExpr {
kind: BaseExprKind::List(list_items),
span: start_token.span().union(end_token.span()),
};
Ok(expr.into())
Ok(list_items)
}
////////////////////////////////////////////////////////////////////////////////
@@ -123,12 +181,35 @@ impl<'t> Parser<'t> {
where
P: Fn(Token) -> bool,
{
match self.curr_token {
Some(curr) if (pred)(curr) => self.adv_token(),
_ => Ok(None),
if self.is_token_match(pred) {
self.adv_token()
} else {
Ok(None)
}
}
fn match_token_kind(&mut self, kind: TokenKind) -> Result<Option<Token>> {
if self.is_token_kind(kind) {
self.adv_token()
} else {
Ok(None)
}
}
fn is_token_match<P>(&self, pred: P) -> bool
where
P: Fn(Token) -> bool,
{
match self.curr_token {
Some(token) => (pred)(token),
None => false,
}
}
fn is_token_kind(&self, kind: TokenKind) -> bool {
self.is_token_match(|t| t.kind() == kind)
}
fn expect_token_where<P>(&mut self, pred: P, expected: impl ToString) -> Result<Token>
where
P: Fn(Token) -> bool,
@@ -143,6 +224,10 @@ impl<'t> Parser<'t> {
pos: self.pos(),
})
}
fn expect_token_kind(&mut self, kind: TokenKind, expected: impl ToString) -> Result<Token> {
self.expect_token_where(|t| t.kind() == kind, expected)
}
}
impl<'t> Spanned for Parser<'t> {
@@ -231,19 +316,177 @@ mod test {
assert!(parser.is_eof());
}
macro_rules! test_parser {
($text:expr, $($tail:tt)+) => {{
let mut parser = Parser::try_from($text).unwrap();
test_parser!(@TAIL, &mut parser, $($tail)+);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr) => {{
assert_eq!($parser.$method().unwrap(), $expected);
}};
(@TAIL, $parser:expr, $method:ident, $expected:expr, $($tail:tt)+) => {{
test_parser!(@TAIL, $parser, $method, $expected);
test_parser!(@TAIL, $parser, $($tail)*);
}};
(@TAIL, $parser:expr) => {};
(@TAIL, $parser:expr,) => {};
}
fn bin_expr(lhs: Expr, op: BinOp, rhs: Expr) -> Expr {
Expr::Bin(
BinExpr {
lhs,
op,
rhs,
span: Default::default(),
}
.into(),
)
}
fn un_expr(op: UnOp, expr: Expr) -> Expr {
Expr::Un(
UnExpr {
op,
expr,
span: Default::default(),
}
.into(),
)
}
fn base_expr(kind: BaseExprKind) -> Expr {
Expr::Base(BaseExpr {
kind,
span: Default::default(),
})
}
#[test]
fn test_base_expr() {
let mut parser = Parser::try_from("1").unwrap();
assert!(matches!(
parser.next_base_expr(),
Ok(
Expr::Base(
BaseExpr {
kind: BaseExprKind::Num,
..
}
test_parser!(
"1 x 'value' :sym",
// 1
next_expr,
base_expr(BaseExprKind::Num),
// x
next_expr,
base_expr(BaseExprKind::Ident),
// 'value'
next_expr,
base_expr(BaseExprKind::Str),
// :sym
next_expr,
base_expr(BaseExprKind::Sym)
);
test_parser!(
"[1, x, 'value', :sym]",
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
test_parser!(
"[1, x, 'value', :sym,]",
next_expr,
base_expr(BaseExprKind::List(vec![
base_expr(BaseExprKind::Num),
base_expr(BaseExprKind::Ident),
base_expr(BaseExprKind::Str),
base_expr(BaseExprKind::Sym),
]))
);
}
#[test]
fn test_compound_expr() {
test_parser!(
r#"
x + 2
1 + 2 * 3 - 4 / 5
1+2*3-4/5
1 - -1 * 8
1--1*8
"#,
// x + 2
next_expr,
bin_expr(
base_expr(BaseExprKind::Ident),
BinOp::Plus,
base_expr(BaseExprKind::Num)
),
// 1 + 2 * 3 - 4 / 5
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
),
//
// 1+2*3-4/5
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Plus,
bin_expr(
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Times,
base_expr(BaseExprKind::Num)
),
BinOp::Minus,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Div,
base_expr(BaseExprKind::Num)
)
)
),
// 1 - -1 * 8
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
),
// 1--1*8
next_expr,
bin_expr(
base_expr(BaseExprKind::Num),
BinOp::Minus,
bin_expr(
un_expr(UnOp::Minus, base_expr(BaseExprKind::Num)),
BinOp::Times,
base_expr(BaseExprKind::Num)
)
)
));
);
//
}
}

View File

@@ -1,4 +1,7 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
use std::fmt::{Display, Formatter, self};
#[derive(Debug, Clone, Copy, Eq)]
#[cfg_attr(not(test), derive(PartialEq))]
pub struct Pos {
pub source: usize,
pub line: usize,
@@ -7,6 +10,12 @@ pub struct Pos {
pub len: usize,
}
impl Display for Pos {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "line {} at {}", self.line + 1, self.col + 1)
}
}
impl Default for Pos {
fn default() -> Self {
Pos {
@@ -19,6 +28,13 @@ impl Default for Pos {
}
}
#[cfg(test)]
impl PartialEq for Pos {
fn eq(&self, _other: &Pos) -> bool {
true
}
}
impl Pos {
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
Pos {
@@ -77,6 +93,18 @@ impl Span {
}
}
impl Display for Span {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
if self.start == self.end {
Display::fmt(&self.start, fmt)
} else if self.start.line == self.end.line {
write!(fmt, "line {} at {}-{}", self.start.line + 1, self.start.col + 1, self.end.col + 1)
} else {
write!(fmt, "lines {} to {}", self.start.line + 1, self.end.line + 1)
}
}
}
pub trait Spanned {
fn span(&self) -> Span;
@@ -92,6 +120,23 @@ impl Spanned for Span {
}
}
pub struct Sourced<'t, T: Spanned> {
text: &'t str,
inner: T,
}
impl<'t, T: Spanned> Sourced<'t, T> {
fn text(&self) -> &'t str {
self.text_at(self.text)
}
}
impl<T: Spanned> Spanned for Sourced<'_, T> {
fn span(&self) -> Span {
self.inner.span()
}
}
#[cfg(test)]
mod test {
use super::*;

View File

@@ -19,11 +19,18 @@ pub enum TokenKind {
Comma,
Eq,
BangEq,
EqEq,
LtEq,
GtEq,
Lt,
Gt,
Arrow,
Plus,
Minus,
Splat,
FSlash,
Bang,
}
impl Display for TokenKind {
@@ -46,11 +53,18 @@ impl Display for TokenKind {
Comma => "comma",
Eq => "equals",
BangEq => "not equals",
EqEq => "double equals",
LtEq => "less than or equal to",
GtEq => "greater than or equal to",
Lt => "less than",
Gt => "greater than",
Arrow => "arrow",
Plus => "plus",
Minus => "minus",
Splat => "splat (or times)",
FSlash => "fslash (or divide)",
Bang => "not",
};
Display::fmt(s, fmt)
}