Add parser, vm, objects

Big ol thing. You should check it out sometime

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2022-01-07 20:30:55 -08:00
parent 946a927b09
commit 9e20dcf59c
17 changed files with 712 additions and 84 deletions

View File

@@ -1,5 +1,307 @@
use crate::syn::lexer::*;
use crate::syn::{ast::*, error::*, lexer::*, token::*};
#[derive(Debug)]
pub struct Parser<'t> {
lexer: Lexer<'t>,
token: Result<Option<SpToken>>,
}
impl<'t> From<&'t str> for Parser<'t> {
fn from(text: &'t str) -> Self {
Parser::new(Lexer::new(text))
}
}
impl<'t> Parser<'t> {
pub fn new(mut lexer: Lexer<'t>) -> Self {
let token = lexer.next();
Self { lexer, token }
}
pub fn is_eof(&self) -> bool {
self.lexer.is_eof()
}
fn peek(&self) -> Result<Option<SpToken>> {
self.token.clone()
}
fn adv(&mut self) -> Result<Option<SpToken>> {
let next = self.lexer.next();
std::mem::replace(&mut self.token, next)
}
/// Checks if the next token is part of the list, returning it if so.
fn expect_any_token(&mut self, expected: &[Token]) -> Result<SpToken> {
let token = self.peek()?;
match (token, expected) {
// Token matches
(Some(token), expected) if expected.contains(token.inner()) => {
self.adv()?;
Ok(token)
}
// Token does not match, only one token expected
(token, &[expected]) => {
// get the string version of whether this is a token or EOF
let got = token
.map(|t| format!("{} token", t.inner().name()))
.unwrap_or_else(|| "EOF".to_string());
Err(SyntaxError::ExpectedGot {
expected: format!("{} token", expected.name()),
got,
})
}
// Token does not match, any of N tokens expected
(token, expected) => {
// make the comma-separated list of everything except for the last item
let expected_str = expected
.iter()
.take(expected.len() - 1)
.map(Token::name)
.collect::<Vec<_>>()
.join(", ");
// get the string version of whether this is a token or EOF
let got = token
.map(|t| format!("{} token", t.inner().name()))
.unwrap_or_else(|| "EOF".to_string());
Err(SyntaxError::ExpectedGot {
expected: format!(
"{} or {} token",
expected_str,
expected.last().unwrap().name()
),
got,
})
}
}
}
/// Gets all expressions until EOF is reached, or until a quote end is reached.
pub fn next_expr_list(&mut self) -> Result<Vec<SpExpr>> {
let mut exprs = Vec::new();
while let Some(peek) = self.peek()? {
match peek.inner() {
Token::RQuote => break,
_ => exprs.push(self.next_expr()?),
}
}
Ok(exprs)
}
pub fn next_expr(&mut self) -> Result<SpExpr> {
// peek ahead and see if we need to handle a quote
match self.peek()? {
Some(peek) if *peek.inner() == Token::LQuote => self.next_quote(),
_ => {
let atom = self.next_atom()?;
let span = atom.span();
Ok(SpExpr::new(span, Expr::Atom(atom)))
}
}
}
pub fn next_quote(&mut self) -> Result<SpExpr> {
let start = self.expect_any_token(&[Token::LQuote])?;
let exprs = self.next_expr_list()?;
let end = self.expect_any_token(&[Token::RQuote])?;
let span = start.span().union(end.span());
Ok(SpExpr::new(span, Expr::Quote(exprs)))
}
pub fn next_atom(&mut self) -> Result<SpAtom> {
use Token::*;
let token = self.expect_any_token(&[Word, Float, Int, Str])?;
Ok(self.token_to_atom(token))
}
fn token_to_atom(&self, token: SpToken) -> SpAtom {
// NOTE - self is required for this because we get the text
let (span, token) = token.into_split();
let text = span.text_at(self.lexer.text());
let atom = match token {
Token::Word => Atom::Word(text.to_string()),
Token::Float => Atom::Float(text.parse().unwrap()),
Token::Int => Atom::Int(text.parse().unwrap()),
Token::Str => Atom::Str(unescape_string(text)),
_ => panic!("invalid token specified for token_to_atom, it should be an atom"),
};
SpAtom::new(span, atom)
}
}
fn unescape_string(text: &str) -> String {
let mut string = String::with_capacity(text.len() - 2);
let mut chars = text.chars().skip(1).take(text.len() - 2);
while let Some(c) = chars.next() {
if c == '\\' {
let c = match chars
.next()
.expect("reached end of string literal before escape")
{
'"' => '"',
'\'' => '\'',
'\\' => '\\',
'n' => '\n',
'r' => '\r',
't' => '\t',
u => panic!(
"unexpected character escape that made it through the lexer: {:?}",
u
),
};
string.push(c);
} else {
string.push(c);
}
}
string
}
#[cfg(test)]
macro_rules! expect_atom {
($parser:expr, $expected:expr) => {{
let expr_result = $parser.next_expr();
assert!(
expr_result.is_ok(),
"expected {:?} but got {:?} instead",
$expected,
expr_result
);
let expr = expr_result.unwrap();
let (span, expr) = expr.into_split();
assert_eq!(expr, Expr::Atom(SpAtom::new(span, $expected)));
}};
}
#[cfg(test)]
macro_rules! expect_expr {
($parser:expr, $expected:expr) => {{
let expr_result = $parser.next_expr();
assert!(
expr_result.is_ok(),
"expected {:?} but got {:?} instead",
$expected,
expr_result
);
let expr = expr_result.unwrap();
assert_eq!(expr, $expected);
}};
}
/// Makes an SpExpr Quote value using the given SpExpr values
#[cfg(test)]
macro_rules! make_quote {
($($expr:expr),+ $(,)?) => {{
SpExpr::new(
Default::default(),
Expr::Quote(make_quote_vec!($($expr),+))
)
}};
}
/// Makes a vec appropriate for an Expr::Quote
#[cfg(test)]
macro_rules! make_quote_vec {
($($expr:expr),+ $(,)?) => {{
vec![$(
SpExpr::new(Default::default(), $expr)
),+]
}};
}
/// Makes an SpAtom from an Atom type.
#[cfg(test)]
macro_rules! make_atom {
($atom:expr) => {{
SpAtom::new(Default::default(), $atom)
}};
}
#[test]
fn test_parser_atoms() {
let mut parser = Parser::from(
r#"
a ab bcd dcefg foo bar baz
1 2 3 4 5
1.2 3.4 5.6 7.8 9.10
"this is a string"
"this\nis\na\nstring\nwith\nnewlines"
"this\tis\ta\tstring\twith\ttabs"
"#,
);
expect_atom!(parser, Atom::Word("a".to_string()));
expect_atom!(parser, Atom::Word("ab".to_string()));
expect_atom!(parser, Atom::Word("bcd".to_string()));
expect_atom!(parser, Atom::Word("dcefg".to_string()));
expect_atom!(parser, Atom::Word("foo".to_string()));
expect_atom!(parser, Atom::Word("bar".to_string()));
expect_atom!(parser, Atom::Word("baz".to_string()));
expect_atom!(parser, Atom::Int(1));
expect_atom!(parser, Atom::Int(2));
expect_atom!(parser, Atom::Int(3));
expect_atom!(parser, Atom::Int(4));
expect_atom!(parser, Atom::Int(5));
expect_atom!(parser, Atom::Float(1.2));
expect_atom!(parser, Atom::Float(3.4));
expect_atom!(parser, Atom::Float(5.6));
expect_atom!(parser, Atom::Float(7.8));
expect_atom!(parser, Atom::Float(9.1));
expect_atom!(parser, Atom::Str("this is a string".to_string()));
expect_atom!(
parser,
Atom::Str("this\nis\na\nstring\nwith\nnewlines".to_string())
);
expect_atom!(
parser,
Atom::Str("this\tis\ta\tstring\twith\ttabs".to_string())
);
assert!(parser.is_eof());
}
#[test]
fn test_parser_quotes() {
let mut parser = Parser::from(
r#"
[
a ab bcd dcefg foo bar baz
]
[1 2 3 4 5
[1.2 3.4 5.6 7.8 9.10]
]
"#,
);
expect_expr!(
parser,
make_quote![
Expr::Atom(make_atom!(Atom::Word("a".to_string()))),
Expr::Atom(make_atom!(Atom::Word("ab".to_string()))),
Expr::Atom(make_atom!(Atom::Word("bcd".to_string()))),
Expr::Atom(make_atom!(Atom::Word("dcefg".to_string()))),
Expr::Atom(make_atom!(Atom::Word("foo".to_string()))),
Expr::Atom(make_atom!(Atom::Word("bar".to_string()))),
Expr::Atom(make_atom!(Atom::Word("baz".to_string()))),
]
);
expect_expr!(
parser,
make_quote![
Expr::Atom(make_atom!(Atom::Int(1))),
Expr::Atom(make_atom!(Atom::Int(2))),
Expr::Atom(make_atom!(Atom::Int(3))),
Expr::Atom(make_atom!(Atom::Int(4))),
Expr::Atom(make_atom!(Atom::Int(5))),
Expr::Quote(make_quote_vec![
Expr::Atom(make_atom!(Atom::Float(1.2))),
Expr::Atom(make_atom!(Atom::Float(3.4))),
Expr::Atom(make_atom!(Atom::Float(5.6))),
Expr::Atom(make_atom!(Atom::Float(7.8))),
Expr::Atom(make_atom!(Atom::Float(9.10))),
]),
]
);
}