Add parser and AST, remove some stuff from lexer
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -21,12 +21,6 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.3.3"
|
||||
@@ -38,7 +32,6 @@ name = "not-python"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"maplit",
|
||||
"regex",
|
||||
"snafu",
|
||||
]
|
||||
|
||||
@@ -9,5 +9,4 @@ edition = "2018"
|
||||
[dependencies]
|
||||
snafu = "0.6.6"
|
||||
lazy_static = "1.4.0"
|
||||
maplit = "1.0.2"
|
||||
regex = "1.3.7"
|
||||
|
||||
52
src/syn/ast.rs
Normal file
52
src/syn/ast.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
use crate::syn::{op::*, span::*};
|
||||
|
||||
pub enum Expr {
|
||||
Base(BaseExpr),
|
||||
Bin(Box<BinExpr>),
|
||||
Un(Box<UnExpr>),
|
||||
}
|
||||
|
||||
impl From<UnExpr> for Expr {
|
||||
fn from(un: UnExpr) -> Self {
|
||||
Expr::Un(Box::new(un))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BinExpr> for Expr {
|
||||
fn from(bin: BinExpr) -> Self {
|
||||
Expr::Bin(Box::new(bin))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BaseExpr> for Expr {
|
||||
fn from(base: BaseExpr) -> Self {
|
||||
Expr::Base(base)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BinExpr {
|
||||
pub lhs: Expr,
|
||||
pub op: BinOp,
|
||||
pub rhs: Expr,
|
||||
}
|
||||
|
||||
pub struct UnExpr {
|
||||
pub op: UnOp,
|
||||
pub expr: Expr,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
pub enum BaseExprKind {
|
||||
Ident,
|
||||
Num,
|
||||
Str,
|
||||
Sym,
|
||||
List(Vec<Expr>),
|
||||
Object(Vec<(Expr, Expr)>),
|
||||
Tuple(Vec<Expr>),
|
||||
}
|
||||
|
||||
pub struct BaseExpr {
|
||||
pub kind: BaseExprKind,
|
||||
pub span: Span,
|
||||
}
|
||||
@@ -1,46 +1,7 @@
|
||||
use crate::{
|
||||
syn::{error::*, span::*, token::*},
|
||||
util::LazyString,
|
||||
};
|
||||
use crate::syn::{error::*, span::*, token::*};
|
||||
use lazy_static::lazy_static;
|
||||
use maplit::hashmap;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::{collections::HashMap, mem, str::Chars};
|
||||
|
||||
const IDENT_START_CHARS: &'static [char] = &[
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
|
||||
't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
|
||||
'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_',
|
||||
];
|
||||
const IDENT_CHARS: &'static [char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
|
||||
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B',
|
||||
'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
|
||||
'V', 'W', 'X', 'Y', 'Z', '_',
|
||||
];
|
||||
|
||||
const DEC_NUM_CHARS: &'static [char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
|
||||
|
||||
const HEX_NUM_CHARS: &'static [char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C',
|
||||
'D', 'E', 'F',
|
||||
];
|
||||
|
||||
const STR_QUOTE_CHARS: &'static [char] = &['"', '\''];
|
||||
|
||||
const OP_CHARS: &'static [char] = &['=', '+', '*', '-', '/', '>', '<', '~', '!', '%', '^'];
|
||||
|
||||
lazy_static! {
|
||||
static ref OPS: HashMap<&'static str, TokenKind> = hashmap! {
|
||||
"=" => TokenKind::Eq,
|
||||
"->" => TokenKind::Arrow,
|
||||
};
|
||||
|
||||
static ref KEYWORDS: HashMap<&'static str, TokenKind> = hashmap! {
|
||||
"return" => TokenKind::KwReturn,
|
||||
};
|
||||
|
||||
}
|
||||
use std::str::Chars;
|
||||
|
||||
pub struct Lexer<'t> {
|
||||
text: &'t str,
|
||||
@@ -120,6 +81,10 @@ impl<'t> Lexer<'t> {
|
||||
|(?P<comma>,)
|
||||
|(?P<arrow>->)
|
||||
|(?P<eq>=)
|
||||
|(?P<plus>\+)
|
||||
|(?P<minus>-)
|
||||
|(?P<splat>\*)
|
||||
|(?P<fslash>/)
|
||||
|(?P<dq_str>"([^\\"]|\\[ntr0"'])*")
|
||||
|(?P<sq_str>'([^\\"]|\\[ntr0"'])*')
|
||||
"#).ignore_whitespace(true)
|
||||
@@ -129,12 +94,14 @@ impl<'t> Lexer<'t> {
|
||||
|
||||
const CAPTURES: &[(&str, TokenKind)] = &[
|
||||
("kw_return", TokenKind::KwReturn),
|
||||
|
||||
("ident", TokenKind::Ident),
|
||||
("sym", TokenKind::Sym),
|
||||
("dec_num", TokenKind::Num),
|
||||
("hex_num", TokenKind::Num),
|
||||
("dq_str", TokenKind::Str),
|
||||
("sq_str", TokenKind::Str),
|
||||
|
||||
("lparen", TokenKind::LParen),
|
||||
("rparen", TokenKind::RParen),
|
||||
("lbracket", TokenKind::LBracket),
|
||||
@@ -142,15 +109,18 @@ impl<'t> Lexer<'t> {
|
||||
("lbrace", TokenKind::LBrace),
|
||||
("rbrace", TokenKind::RBrace),
|
||||
("comma", TokenKind::Comma),
|
||||
("plus", TokenKind::Plus),
|
||||
("minus", TokenKind::Minus),
|
||||
("splat", TokenKind::Splat),
|
||||
("fslash", TokenKind::FSlash),
|
||||
|
||||
("arrow", TokenKind::Arrow),
|
||||
("eq", TokenKind::Eq),
|
||||
];
|
||||
|
||||
self.skip_whitespace();
|
||||
|
||||
let curr = if let Some(curr) = self.curr_char() {
|
||||
curr
|
||||
} else {
|
||||
if self.curr_char().is_none() {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
@@ -272,6 +242,10 @@ mod test {
|
||||
test_token!("[", TokenKind::LBracket);
|
||||
test_token!("]", TokenKind::RBracket);
|
||||
test_token!(",", TokenKind::Comma);
|
||||
test_token!("+", TokenKind::Plus);
|
||||
test_token!("-", TokenKind::Minus);
|
||||
test_token!("*", TokenKind::Splat);
|
||||
test_token!("/", TokenKind::FSlash);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
pub mod ast;
|
||||
pub mod error;
|
||||
pub mod lexer;
|
||||
pub mod op;
|
||||
pub mod parser;
|
||||
pub mod span;
|
||||
pub mod token;
|
||||
|
||||
6
src/syn/op.rs
Normal file
6
src/syn/op.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
pub enum UnOp {
|
||||
|
||||
}
|
||||
|
||||
pub enum BinOp {
|
||||
}
|
||||
249
src/syn/parser.rs
Normal file
249
src/syn/parser.rs
Normal file
@@ -0,0 +1,249 @@
|
||||
#![allow(dead_code)]
|
||||
use crate::syn::{ast::*, error::*, lexer::Lexer, span::*, token::*};
|
||||
use std::{convert::TryFrom, mem};
|
||||
|
||||
const EXPR_START: &'static [TokenKind] = &[
|
||||
TokenKind::Ident,
|
||||
TokenKind::Num,
|
||||
TokenKind::Str,
|
||||
TokenKind::Sym,
|
||||
TokenKind::LParen,
|
||||
TokenKind::LBracket,
|
||||
TokenKind::LBrace,
|
||||
// TODO unary tokens
|
||||
];
|
||||
|
||||
const VALUE_EXPR_START: &'static [TokenKind] = &[
|
||||
TokenKind::Ident,
|
||||
TokenKind::Num,
|
||||
TokenKind::Str,
|
||||
TokenKind::Sym,
|
||||
];
|
||||
|
||||
pub struct Parser<'t> {
|
||||
lexer: Lexer<'t>,
|
||||
curr_token: Option<Token>,
|
||||
}
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
pub fn new(lexer: Lexer<'t>) -> Result<Self> {
|
||||
let mut parser = Parser {
|
||||
lexer,
|
||||
curr_token: None,
|
||||
};
|
||||
parser.adv_token()?;
|
||||
Ok(parser)
|
||||
}
|
||||
|
||||
pub fn is_eof(&self) -> bool {
|
||||
self.lexer.is_eof()
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> Pos {
|
||||
self.span().start
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Parsing functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
pub fn next_expr(&mut self) -> Result<Expr> {
|
||||
self.next_bin_expr()
|
||||
}
|
||||
|
||||
fn next_bin_expr(&mut self) -> Result<Expr> {
|
||||
let lhs = self.next_un_expr()?;
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn next_un_expr(&mut self) -> Result<Expr> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn next_base_expr(&mut self) -> Result<Expr> {
|
||||
let token =
|
||||
self.expect_token_where(|t| VALUE_EXPR_START.contains(&t.kind()), "base expression")?;
|
||||
let expr: Expr = match token.kind() {
|
||||
TokenKind::Ident => BaseExpr {
|
||||
kind: BaseExprKind::Ident,
|
||||
span: token.span(),
|
||||
}
|
||||
.into(),
|
||||
TokenKind::Num => BaseExpr {
|
||||
kind: BaseExprKind::Num,
|
||||
span: token.span(),
|
||||
}
|
||||
.into(),
|
||||
TokenKind::Str => BaseExpr {
|
||||
kind: BaseExprKind::Str,
|
||||
span: token.span(),
|
||||
}
|
||||
.into(),
|
||||
TokenKind::Sym => BaseExpr {
|
||||
kind: BaseExprKind::Sym,
|
||||
span: token.span(),
|
||||
}
|
||||
.into(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn next_list(&mut self) -> Result<Expr> {
|
||||
let start_token = self.expect_token_where(|t| t.kind() == TokenKind::LBracket, "start of list (left bracket)")?;
|
||||
let mut list_items = Vec::new();
|
||||
|
||||
while ! matches!(self.curr_token.map(|t| t.kind()), Some(TokenKind::RBrace) | None) {
|
||||
let expr = self.next_expr()?;
|
||||
}
|
||||
|
||||
let end_token = self.expect_token_where(
|
||||
|t| t.kind() == TokenKind::RBracket,
|
||||
"end of list (right bracket)",
|
||||
)?;
|
||||
let expr = BaseExpr {
|
||||
kind: BaseExprKind::List(list_items),
|
||||
span: start_token.span().union(end_token.span()),
|
||||
};
|
||||
|
||||
Ok(expr.into())
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Token matching functions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fn adv_token(&mut self) -> Result<Option<Token>> {
|
||||
let next_token = self.lexer.next_token()?;
|
||||
Ok(mem::replace(&mut self.curr_token, next_token))
|
||||
}
|
||||
|
||||
fn match_token_where<P>(&mut self, pred: P) -> Result<Option<Token>>
|
||||
where
|
||||
P: Fn(Token) -> bool,
|
||||
{
|
||||
match self.curr_token {
|
||||
Some(curr) if (pred)(curr) => self.adv_token(),
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_token_where<P>(&mut self, pred: P, expected: impl ToString) -> Result<Token>
|
||||
where
|
||||
P: Fn(Token) -> bool,
|
||||
{
|
||||
self.match_token_where(pred)?
|
||||
.ok_or_else(|| Error::ExpectedGot {
|
||||
expected: expected.to_string(),
|
||||
got: self
|
||||
.curr_token
|
||||
.map(|token| token.kind().to_string())
|
||||
.unwrap_or_else(|| "EOF".to_string()),
|
||||
pos: self.pos(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Spanned for Parser<'t> {
|
||||
fn span(&self) -> Span {
|
||||
self.curr_token
|
||||
.as_ref()
|
||||
.map(Spanned::span)
|
||||
.unwrap_or(Span::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> TryFrom<Lexer<'t>> for Parser<'t> {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(lexer: Lexer<'t>) -> Result<Self> {
|
||||
Parser::new(lexer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> TryFrom<&'t str> for Parser<'t> {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(text: &'t str) -> Result<Self> {
|
||||
Parser::new(Lexer::new(text))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_adv_token() {
|
||||
const EXPECTED: &'static [TokenKind] = &[
|
||||
TokenKind::Num,
|
||||
TokenKind::Ident,
|
||||
TokenKind::Sym,
|
||||
TokenKind::Str,
|
||||
];
|
||||
let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap();
|
||||
|
||||
for expected in EXPECTED.iter().copied() {
|
||||
let token = parser.adv_token().unwrap();
|
||||
let kind = token.unwrap().kind();
|
||||
assert_eq!(kind, expected);
|
||||
}
|
||||
assert!(parser.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_token_where() {
|
||||
let mut parser = Parser::try_from("1 ident :sym 'string'").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Num),
|
||||
Ok(Some(_))
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Num),
|
||||
Ok(None)
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Ident),
|
||||
Ok(Some(_))
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Ident),
|
||||
Ok(None)
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Sym),
|
||||
Ok(Some(_))
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Sym),
|
||||
Ok(None)
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Str),
|
||||
Ok(Some(_))
|
||||
));
|
||||
assert!(matches!(
|
||||
parser.match_token_where(|token| token.kind() == TokenKind::Str),
|
||||
Ok(None)
|
||||
));
|
||||
assert!(parser.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_base_expr() {
|
||||
let mut parser = Parser::try_from("1").unwrap();
|
||||
assert!(matches!(
|
||||
parser.next_base_expr(),
|
||||
Ok(
|
||||
Expr::Base(
|
||||
BaseExpr {
|
||||
kind: BaseExprKind::Num,
|
||||
..
|
||||
}
|
||||
)
|
||||
)
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,8 @@ use std::fmt::{Display, Formatter, self};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TokenKind {
|
||||
KwReturn,
|
||||
|
||||
Ident,
|
||||
Num,
|
||||
Str,
|
||||
@@ -18,14 +20,18 @@ pub enum TokenKind {
|
||||
|
||||
Eq,
|
||||
Arrow,
|
||||
|
||||
KwReturn,
|
||||
Plus,
|
||||
Minus,
|
||||
Splat,
|
||||
FSlash,
|
||||
}
|
||||
|
||||
impl Display for TokenKind {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
|
||||
use TokenKind::*;
|
||||
let s = match self {
|
||||
KwReturn => "return keyword",
|
||||
|
||||
Ident => "identifier",
|
||||
Num => "number",
|
||||
Str => "string",
|
||||
@@ -41,8 +47,10 @@ impl Display for TokenKind {
|
||||
|
||||
Eq => "equals",
|
||||
Arrow => "arrow",
|
||||
|
||||
KwReturn => "return keyword",
|
||||
Plus => "plus",
|
||||
Minus => "minus",
|
||||
Splat => "splat (or times)",
|
||||
FSlash => "fslash (or divide)",
|
||||
};
|
||||
Display::fmt(s, fmt)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user