diff --git a/Cargo.lock b/Cargo.lock index a158ba5..0e1d1b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,12 +21,6 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - [[package]] name = "memchr" version = "2.3.3" @@ -38,7 +32,6 @@ name = "not-python" version = "0.1.0" dependencies = [ "lazy_static", - "maplit", "regex", "snafu", ] diff --git a/Cargo.toml b/Cargo.toml index 653beca..55a7885 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,5 +9,4 @@ edition = "2018" [dependencies] snafu = "0.6.6" lazy_static = "1.4.0" -maplit = "1.0.2" regex = "1.3.7" diff --git a/src/syn/ast.rs b/src/syn/ast.rs new file mode 100644 index 0000000..804fb99 --- /dev/null +++ b/src/syn/ast.rs @@ -0,0 +1,52 @@ +use crate::syn::{op::*, span::*}; + +pub enum Expr { + Base(BaseExpr), + Bin(Box), + Un(Box), +} + +impl From for Expr { + fn from(un: UnExpr) -> Self { + Expr::Un(Box::new(un)) + } +} + +impl From for Expr { + fn from(bin: BinExpr) -> Self { + Expr::Bin(Box::new(bin)) + } +} + +impl From for Expr { + fn from(base: BaseExpr) -> Self { + Expr::Base(base) + } +} + +pub struct BinExpr { + pub lhs: Expr, + pub op: BinOp, + pub rhs: Expr, +} + +pub struct UnExpr { + pub op: UnOp, + pub expr: Expr, + pub span: Span, +} + +pub enum BaseExprKind { + Ident, + Num, + Str, + Sym, + List(Vec), + Object(Vec<(Expr, Expr)>), + Tuple(Vec), +} + +pub struct BaseExpr { + pub kind: BaseExprKind, + pub span: Span, +} diff --git a/src/syn/lexer.rs b/src/syn/lexer.rs index 73de156..869ed60 100644 --- a/src/syn/lexer.rs +++ b/src/syn/lexer.rs @@ -1,46 +1,7 @@ -use crate::{ - syn::{error::*, span::*, token::*}, - util::LazyString, -}; +use crate::syn::{error::*, span::*, token::*}; use lazy_static::lazy_static; -use maplit::hashmap; use regex::{Regex, RegexBuilder}; -use std::{collections::HashMap, mem, str::Chars}; - -const IDENT_START_CHARS: &'static [char] = &[ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', - 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_', -]; -const IDENT_CHARS: &'static [char] = &[ - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', - 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', - 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', - 'V', 'W', 'X', 'Y', 'Z', '_', -]; - -const DEC_NUM_CHARS: &'static [char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; - -const HEX_NUM_CHARS: &'static [char] = &[ - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', - 'D', 'E', 'F', -]; - -const STR_QUOTE_CHARS: &'static [char] = &['"', '\'']; - -const OP_CHARS: &'static [char] = &['=', '+', '*', '-', '/', '>', '<', '~', '!', '%', '^']; - -lazy_static! { - static ref OPS: HashMap<&'static str, TokenKind> = hashmap! { - "=" => TokenKind::Eq, - "->" => TokenKind::Arrow, - }; - - static ref KEYWORDS: HashMap<&'static str, TokenKind> = hashmap! { - "return" => TokenKind::KwReturn, - }; - -} +use std::str::Chars; pub struct Lexer<'t> { text: &'t str, @@ -120,6 +81,10 @@ impl<'t> Lexer<'t> { |(?P,) |(?P->) |(?P=) + |(?P\+) + |(?P-) + |(?P\*) + |(?P/) |(?P"([^\\"]|\\[ntr0"'])*") |(?P'([^\\"]|\\[ntr0"'])*') "#).ignore_whitespace(true) @@ -129,12 +94,14 @@ impl<'t> Lexer<'t> { const CAPTURES: &[(&str, TokenKind)] = &[ ("kw_return", TokenKind::KwReturn), + ("ident", TokenKind::Ident), ("sym", TokenKind::Sym), ("dec_num", TokenKind::Num), ("hex_num", TokenKind::Num), ("dq_str", TokenKind::Str), ("sq_str", TokenKind::Str), + ("lparen", TokenKind::LParen), ("rparen", TokenKind::RParen), ("lbracket", TokenKind::LBracket), @@ -142,15 +109,18 @@ impl<'t> Lexer<'t> { ("lbrace", TokenKind::LBrace), ("rbrace", TokenKind::RBrace), ("comma", TokenKind::Comma), + ("plus", TokenKind::Plus), + ("minus", TokenKind::Minus), + ("splat", TokenKind::Splat), + ("fslash", TokenKind::FSlash), + ("arrow", TokenKind::Arrow), ("eq", TokenKind::Eq), ]; self.skip_whitespace(); - let curr = if let Some(curr) = self.curr_char() { - curr - } else { + if self.curr_char().is_none() { return Ok(None); }; @@ -272,6 +242,10 @@ mod test { test_token!("[", TokenKind::LBracket); test_token!("]", TokenKind::RBracket); test_token!(",", TokenKind::Comma); + test_token!("+", TokenKind::Plus); + test_token!("-", TokenKind::Minus); + test_token!("*", TokenKind::Splat); + test_token!("/", TokenKind::FSlash); } #[test] diff --git a/src/syn/mod.rs b/src/syn/mod.rs index 9959f63..97f51c5 100644 --- a/src/syn/mod.rs +++ b/src/syn/mod.rs @@ -1,4 +1,7 @@ +pub mod ast; pub mod error; pub mod lexer; +pub mod op; +pub mod parser; pub mod span; pub mod token; diff --git a/src/syn/op.rs b/src/syn/op.rs new file mode 100644 index 0000000..e59dc82 --- /dev/null +++ b/src/syn/op.rs @@ -0,0 +1,6 @@ +pub enum UnOp { + +} + +pub enum BinOp { +} diff --git a/src/syn/parser.rs b/src/syn/parser.rs new file mode 100644 index 0000000..4292e93 --- /dev/null +++ b/src/syn/parser.rs @@ -0,0 +1,249 @@ +#![allow(dead_code)] +use crate::syn::{ast::*, error::*, lexer::Lexer, span::*, token::*}; +use std::{convert::TryFrom, mem}; + +const EXPR_START: &'static [TokenKind] = &[ + TokenKind::Ident, + TokenKind::Num, + TokenKind::Str, + TokenKind::Sym, + TokenKind::LParen, + TokenKind::LBracket, + TokenKind::LBrace, + // TODO unary tokens +]; + +const VALUE_EXPR_START: &'static [TokenKind] = &[ + TokenKind::Ident, + TokenKind::Num, + TokenKind::Str, + TokenKind::Sym, +]; + +pub struct Parser<'t> { + lexer: Lexer<'t>, + curr_token: Option, +} + +impl<'t> Parser<'t> { + pub fn new(lexer: Lexer<'t>) -> Result { + let mut parser = Parser { + lexer, + curr_token: None, + }; + parser.adv_token()?; + Ok(parser) + } + + pub fn is_eof(&self) -> bool { + self.lexer.is_eof() + } + + pub fn pos(&self) -> Pos { + self.span().start + } + + //////////////////////////////////////////////////////////////////////////////// + // Parsing functions + //////////////////////////////////////////////////////////////////////////////// + + pub fn next_expr(&mut self) -> Result { + self.next_bin_expr() + } + + fn next_bin_expr(&mut self) -> Result { + let lhs = self.next_un_expr()?; + todo!() + } + + fn next_un_expr(&mut self) -> Result { + todo!() + } + + fn next_base_expr(&mut self) -> Result { + let token = + self.expect_token_where(|t| VALUE_EXPR_START.contains(&t.kind()), "base expression")?; + let expr: Expr = match token.kind() { + TokenKind::Ident => BaseExpr { + kind: BaseExprKind::Ident, + span: token.span(), + } + .into(), + TokenKind::Num => BaseExpr { + kind: BaseExprKind::Num, + span: token.span(), + } + .into(), + TokenKind::Str => BaseExpr { + kind: BaseExprKind::Str, + span: token.span(), + } + .into(), + TokenKind::Sym => BaseExpr { + kind: BaseExprKind::Sym, + span: token.span(), + } + .into(), + _ => unreachable!(), + }; + + Ok(expr) + } + + fn next_list(&mut self) -> Result { + let start_token = self.expect_token_where(|t| t.kind() == TokenKind::LBracket, "start of list (left bracket)")?; + let mut list_items = Vec::new(); + + while ! matches!(self.curr_token.map(|t| t.kind()), Some(TokenKind::RBrace) | None) { + let expr = self.next_expr()?; + } + + let end_token = self.expect_token_where( + |t| t.kind() == TokenKind::RBracket, + "end of list (right bracket)", + )?; + let expr = BaseExpr { + kind: BaseExprKind::List(list_items), + span: start_token.span().union(end_token.span()), + }; + + Ok(expr.into()) + } + + //////////////////////////////////////////////////////////////////////////////// + // Token matching functions + //////////////////////////////////////////////////////////////////////////////// + + fn adv_token(&mut self) -> Result> { + let next_token = self.lexer.next_token()?; + Ok(mem::replace(&mut self.curr_token, next_token)) + } + + fn match_token_where

(&mut self, pred: P) -> Result> + where + P: Fn(Token) -> bool, + { + match self.curr_token { + Some(curr) if (pred)(curr) => self.adv_token(), + _ => Ok(None), + } + } + + fn expect_token_where

(&mut self, pred: P, expected: impl ToString) -> Result + where + P: Fn(Token) -> bool, + { + self.match_token_where(pred)? + .ok_or_else(|| Error::ExpectedGot { + expected: expected.to_string(), + got: self + .curr_token + .map(|token| token.kind().to_string()) + .unwrap_or_else(|| "EOF".to_string()), + pos: self.pos(), + }) + } +} + +impl<'t> Spanned for Parser<'t> { + fn span(&self) -> Span { + self.curr_token + .as_ref() + .map(Spanned::span) + .unwrap_or(Span::default()) + } +} + +impl<'t> TryFrom> for Parser<'t> { + type Error = Error; + + fn try_from(lexer: Lexer<'t>) -> Result { + Parser::new(lexer) + } +} + +impl<'t> TryFrom<&'t str> for Parser<'t> { + type Error = Error; + + fn try_from(text: &'t str) -> Result { + Parser::new(Lexer::new(text)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_adv_token() { + const EXPECTED: &'static [TokenKind] = &[ + TokenKind::Num, + TokenKind::Ident, + TokenKind::Sym, + TokenKind::Str, + ]; + let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap(); + + for expected in EXPECTED.iter().copied() { + let token = parser.adv_token().unwrap(); + let kind = token.unwrap().kind(); + assert_eq!(kind, expected); + } + assert!(parser.is_eof()); + } + + #[test] + fn test_match_token_where() { + let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap(); + + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Num), + Ok(Some(_)) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Num), + Ok(None) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Ident), + Ok(Some(_)) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Ident), + Ok(None) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Sym), + Ok(Some(_)) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Sym), + Ok(None) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Str), + Ok(Some(_)) + )); + assert!(matches!( + parser.match_token_where(|token| token.kind() == TokenKind::Str), + Ok(None) + )); + assert!(parser.is_eof()); + } + + #[test] + fn test_base_expr() { + let mut parser = Parser::try_from("1").unwrap(); + assert!(matches!( + parser.next_base_expr(), + Ok( + Expr::Base( + BaseExpr { + kind: BaseExprKind::Num, + .. + } + ) + ) + )); + } +} diff --git a/src/syn/token.rs b/src/syn/token.rs index 4e58716..073a1c6 100644 --- a/src/syn/token.rs +++ b/src/syn/token.rs @@ -3,6 +3,8 @@ use std::fmt::{Display, Formatter, self}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum TokenKind { + KwReturn, + Ident, Num, Str, @@ -18,14 +20,18 @@ pub enum TokenKind { Eq, Arrow, - - KwReturn, + Plus, + Minus, + Splat, + FSlash, } impl Display for TokenKind { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { use TokenKind::*; let s = match self { + KwReturn => "return keyword", + Ident => "identifier", Num => "number", Str => "string", @@ -41,8 +47,10 @@ impl Display for TokenKind { Eq => "equals", Arrow => "arrow", - - KwReturn => "return keyword", + Plus => "plus", + Minus => "minus", + Splat => "splat (or times)", + FSlash => "fslash (or divide)", }; Display::fmt(s, fmt) }