2022-01-07 20:30:55 -08:00
|
|
|
use crate::syn::{ast::*, error::*, lexer::*, token::*};
|
2021-12-21 11:29:59 -08:00
|
|
|
|
2022-01-09 18:04:32 -08:00
|
|
|
// /////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Parser
|
|
|
|
|
// /////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2022-01-07 20:30:55 -08:00
|
|
|
#[derive(Debug)]
|
2021-12-21 11:29:59 -08:00
|
|
|
pub struct Parser<'t> {
|
|
|
|
|
lexer: Lexer<'t>,
|
2022-01-07 20:30:55 -08:00
|
|
|
token: Result<Option<SpToken>>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'t> From<&'t str> for Parser<'t> {
|
|
|
|
|
fn from(text: &'t str) -> Self {
|
|
|
|
|
Parser::new(Lexer::new(text))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'t> Parser<'t> {
|
|
|
|
|
pub fn new(mut lexer: Lexer<'t>) -> Self {
|
|
|
|
|
let token = lexer.next();
|
|
|
|
|
Self { lexer, token }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn is_eof(&self) -> bool {
|
|
|
|
|
self.lexer.is_eof()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn peek(&self) -> Result<Option<SpToken>> {
|
|
|
|
|
self.token.clone()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn adv(&mut self) -> Result<Option<SpToken>> {
|
|
|
|
|
let next = self.lexer.next();
|
|
|
|
|
std::mem::replace(&mut self.token, next)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Checks if the next token is part of the list, returning it if so.
|
|
|
|
|
fn expect_any_token(&mut self, expected: &[Token]) -> Result<SpToken> {
|
|
|
|
|
let token = self.peek()?;
|
|
|
|
|
|
|
|
|
|
match (token, expected) {
|
|
|
|
|
// Token matches
|
|
|
|
|
(Some(token), expected) if expected.contains(token.inner()) => {
|
|
|
|
|
self.adv()?;
|
|
|
|
|
Ok(token)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Token does not match, only one token expected
|
|
|
|
|
(token, &[expected]) => {
|
|
|
|
|
// get the string version of whether this is a token or EOF
|
|
|
|
|
let got = token
|
|
|
|
|
.map(|t| format!("{} token", t.inner().name()))
|
|
|
|
|
.unwrap_or_else(|| "EOF".to_string());
|
|
|
|
|
Err(SyntaxError::ExpectedGot {
|
|
|
|
|
expected: format!("{} token", expected.name()),
|
|
|
|
|
got,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Token does not match, any of N tokens expected
|
|
|
|
|
(token, expected) => {
|
|
|
|
|
// make the comma-separated list of everything except for the last item
|
|
|
|
|
let expected_str = expected
|
|
|
|
|
.iter()
|
|
|
|
|
.take(expected.len() - 1)
|
|
|
|
|
.map(Token::name)
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join(", ");
|
|
|
|
|
// get the string version of whether this is a token or EOF
|
|
|
|
|
let got = token
|
|
|
|
|
.map(|t| format!("{} token", t.inner().name()))
|
|
|
|
|
.unwrap_or_else(|| "EOF".to_string());
|
|
|
|
|
Err(SyntaxError::ExpectedGot {
|
|
|
|
|
expected: format!(
|
|
|
|
|
"{} or {} token",
|
|
|
|
|
expected_str,
|
|
|
|
|
expected.last().unwrap().name()
|
|
|
|
|
),
|
|
|
|
|
got,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Gets all expressions until EOF is reached, or until a quote end is reached.
|
|
|
|
|
pub fn next_expr_list(&mut self) -> Result<Vec<SpExpr>> {
|
|
|
|
|
let mut exprs = Vec::new();
|
|
|
|
|
while let Some(peek) = self.peek()? {
|
|
|
|
|
match peek.inner() {
|
|
|
|
|
Token::RQuote => break,
|
|
|
|
|
_ => exprs.push(self.next_expr()?),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(exprs)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn next_expr(&mut self) -> Result<SpExpr> {
|
|
|
|
|
// peek ahead and see if we need to handle a quote
|
|
|
|
|
match self.peek()? {
|
|
|
|
|
Some(peek) if *peek.inner() == Token::LQuote => self.next_quote(),
|
|
|
|
|
_ => {
|
|
|
|
|
let atom = self.next_atom()?;
|
|
|
|
|
let span = atom.span();
|
|
|
|
|
Ok(SpExpr::new(span, Expr::Atom(atom)))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn next_quote(&mut self) -> Result<SpExpr> {
|
|
|
|
|
let start = self.expect_any_token(&[Token::LQuote])?;
|
|
|
|
|
let exprs = self.next_expr_list()?;
|
|
|
|
|
let end = self.expect_any_token(&[Token::RQuote])?;
|
|
|
|
|
let span = start.span().union(end.span());
|
|
|
|
|
Ok(SpExpr::new(span, Expr::Quote(exprs)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn next_atom(&mut self) -> Result<SpAtom> {
|
|
|
|
|
use Token::*;
|
|
|
|
|
let token = self.expect_any_token(&[Word, Float, Int, Str])?;
|
|
|
|
|
Ok(self.token_to_atom(token))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn token_to_atom(&self, token: SpToken) -> SpAtom {
|
|
|
|
|
// NOTE - self is required for this because we get the text
|
|
|
|
|
let (span, token) = token.into_split();
|
|
|
|
|
let text = span.text_at(self.lexer.text());
|
|
|
|
|
let atom = match token {
|
|
|
|
|
Token::Word => Atom::Word(text.to_string()),
|
|
|
|
|
Token::Float => Atom::Float(text.parse().unwrap()),
|
|
|
|
|
Token::Int => Atom::Int(text.parse().unwrap()),
|
|
|
|
|
Token::Str => Atom::Str(unescape_string(text)),
|
|
|
|
|
_ => panic!("invalid token specified for token_to_atom, it should be an atom"),
|
|
|
|
|
};
|
|
|
|
|
SpAtom::new(span, atom)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn unescape_string(text: &str) -> String {
|
|
|
|
|
let mut string = String::with_capacity(text.len() - 2);
|
|
|
|
|
let mut chars = text.chars().skip(1).take(text.len() - 2);
|
|
|
|
|
while let Some(c) = chars.next() {
|
|
|
|
|
if c == '\\' {
|
|
|
|
|
let c = match chars
|
|
|
|
|
.next()
|
|
|
|
|
.expect("reached end of string literal before escape")
|
|
|
|
|
{
|
|
|
|
|
'"' => '"',
|
|
|
|
|
'\'' => '\'',
|
|
|
|
|
'\\' => '\\',
|
|
|
|
|
'n' => '\n',
|
|
|
|
|
'r' => '\r',
|
|
|
|
|
't' => '\t',
|
|
|
|
|
u => panic!(
|
|
|
|
|
"unexpected character escape that made it through the lexer: {:?}",
|
|
|
|
|
u
|
|
|
|
|
),
|
|
|
|
|
};
|
|
|
|
|
string.push(c);
|
|
|
|
|
} else {
|
|
|
|
|
string.push(c);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
string
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-09 18:04:32 -08:00
|
|
|
// /////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Tests
|
|
|
|
|
// /////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2022-01-07 20:30:55 -08:00
|
|
|
#[cfg(test)]
|
|
|
|
|
macro_rules! expect_atom {
|
|
|
|
|
($parser:expr, $expected:expr) => {{
|
|
|
|
|
let expr_result = $parser.next_expr();
|
|
|
|
|
assert!(
|
|
|
|
|
expr_result.is_ok(),
|
|
|
|
|
"expected {:?} but got {:?} instead",
|
|
|
|
|
$expected,
|
|
|
|
|
expr_result
|
|
|
|
|
);
|
|
|
|
|
let expr = expr_result.unwrap();
|
|
|
|
|
let (span, expr) = expr.into_split();
|
|
|
|
|
assert_eq!(expr, Expr::Atom(SpAtom::new(span, $expected)));
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
macro_rules! expect_expr {
|
|
|
|
|
($parser:expr, $expected:expr) => {{
|
|
|
|
|
let expr_result = $parser.next_expr();
|
|
|
|
|
assert!(
|
|
|
|
|
expr_result.is_ok(),
|
|
|
|
|
"expected {:?} but got {:?} instead",
|
|
|
|
|
$expected,
|
|
|
|
|
expr_result
|
|
|
|
|
);
|
|
|
|
|
let expr = expr_result.unwrap();
|
|
|
|
|
assert_eq!(expr, $expected);
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Makes an SpExpr Quote value using the given SpExpr values
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
macro_rules! make_quote {
|
|
|
|
|
($($expr:expr),+ $(,)?) => {{
|
|
|
|
|
SpExpr::new(
|
|
|
|
|
Default::default(),
|
|
|
|
|
Expr::Quote(make_quote_vec!($($expr),+))
|
|
|
|
|
)
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Makes a vec appropriate for an Expr::Quote
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
macro_rules! make_quote_vec {
|
|
|
|
|
($($expr:expr),+ $(,)?) => {{
|
|
|
|
|
vec![$(
|
|
|
|
|
SpExpr::new(Default::default(), $expr)
|
|
|
|
|
),+]
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Makes an SpAtom from an Atom type.
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
macro_rules! make_atom {
|
|
|
|
|
($atom:expr) => {{
|
|
|
|
|
SpAtom::new(Default::default(), $atom)
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_parser_atoms() {
|
|
|
|
|
let mut parser = Parser::from(
|
|
|
|
|
r#"
|
|
|
|
|
a ab bcd dcefg foo bar baz
|
|
|
|
|
1 2 3 4 5
|
|
|
|
|
1.2 3.4 5.6 7.8 9.10
|
|
|
|
|
"this is a string"
|
|
|
|
|
"this\nis\na\nstring\nwith\nnewlines"
|
|
|
|
|
"this\tis\ta\tstring\twith\ttabs"
|
|
|
|
|
"#,
|
|
|
|
|
);
|
|
|
|
|
expect_atom!(parser, Atom::Word("a".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Word("ab".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Word("bcd".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Word("dcefg".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Word("foo".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Word("bar".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Word("baz".to_string()));
|
|
|
|
|
expect_atom!(parser, Atom::Int(1));
|
|
|
|
|
expect_atom!(parser, Atom::Int(2));
|
|
|
|
|
expect_atom!(parser, Atom::Int(3));
|
|
|
|
|
expect_atom!(parser, Atom::Int(4));
|
|
|
|
|
expect_atom!(parser, Atom::Int(5));
|
|
|
|
|
expect_atom!(parser, Atom::Float(1.2));
|
|
|
|
|
expect_atom!(parser, Atom::Float(3.4));
|
|
|
|
|
expect_atom!(parser, Atom::Float(5.6));
|
|
|
|
|
expect_atom!(parser, Atom::Float(7.8));
|
|
|
|
|
expect_atom!(parser, Atom::Float(9.1));
|
|
|
|
|
expect_atom!(parser, Atom::Str("this is a string".to_string()));
|
|
|
|
|
expect_atom!(
|
|
|
|
|
parser,
|
|
|
|
|
Atom::Str("this\nis\na\nstring\nwith\nnewlines".to_string())
|
|
|
|
|
);
|
|
|
|
|
expect_atom!(
|
|
|
|
|
parser,
|
|
|
|
|
Atom::Str("this\tis\ta\tstring\twith\ttabs".to_string())
|
|
|
|
|
);
|
|
|
|
|
assert!(parser.is_eof());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_parser_quotes() {
|
|
|
|
|
let mut parser = Parser::from(
|
|
|
|
|
r#"
|
|
|
|
|
[
|
|
|
|
|
a ab bcd dcefg foo bar baz
|
|
|
|
|
]
|
|
|
|
|
[1 2 3 4 5
|
|
|
|
|
[1.2 3.4 5.6 7.8 9.10]
|
|
|
|
|
]
|
|
|
|
|
"#,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect_expr!(
|
|
|
|
|
parser,
|
|
|
|
|
make_quote![
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("a".to_string()))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("ab".to_string()))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("bcd".to_string()))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("dcefg".to_string()))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("foo".to_string()))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("bar".to_string()))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Word("baz".to_string()))),
|
|
|
|
|
]
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect_expr!(
|
|
|
|
|
parser,
|
|
|
|
|
make_quote![
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Int(1))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Int(2))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Int(3))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Int(4))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Int(5))),
|
|
|
|
|
Expr::Quote(make_quote_vec![
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Float(1.2))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Float(3.4))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Float(5.6))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Float(7.8))),
|
|
|
|
|
Expr::Atom(make_atom!(Atom::Float(9.10))),
|
|
|
|
|
]),
|
|
|
|
|
]
|
|
|
|
|
);
|
2021-12-21 11:29:59 -08:00
|
|
|
}
|