WIP: Implement peg parser using pest
PEG parser using pest is implemented. It was able to run the three examples that we currently have so hopefully there aren't any huge issues. There's a few warnings remaining that I will squash soon. Token, parser, and token modules have been removed. Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -5,7 +5,6 @@ mod scope;
|
||||
mod syn;
|
||||
mod vm;
|
||||
|
||||
use pest::Parser;
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
use structopt::StructOpt;
|
||||
@@ -33,7 +32,7 @@ fn main() -> Result {
|
||||
(input, "<stdin>".to_string())
|
||||
};
|
||||
|
||||
let stmts = syn::peg::parse_file(&text)?;
|
||||
let stmts = syn::peg::parse_file(&path, &text)?;
|
||||
|
||||
/*
|
||||
let mut parser = Parser::new(path, text.as_str());
|
||||
|
||||
256
src/syn/lexer.rs
256
src/syn/lexer.rs
@@ -1,256 +0,0 @@
|
||||
use crate::syn::{error::*, span::*, token::*};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::rc::Rc;
|
||||
|
||||
thread_local! {
|
||||
static LEX_PAT: Regex = RegexBuilder::new(
|
||||
r#"^(
|
||||
(?P<float>[-+]?[0-9]+\.[0-9]+([eE][+\-][0-9]+)?)
|
||||
| (?P<int>[-+]?[0-9]+)
|
||||
| (?P<assign>:[a-zA-Z_?\-*+/=.'@$%^&|~][0-9a-zA-Z_?\-*+/=.'@$%^&|~]*)
|
||||
| (?P<meta>%[a-zA-Z0-9\-_]+)
|
||||
| (?P<word>[a-zA-Z_?\-*+/=.'@$%^&|~][0-9a-zA-Z_?\-*+/=.'@$%^&|~]*)
|
||||
| (?P<lquote>\[)
|
||||
| (?P<rquote>\])
|
||||
| (?P<apply>!)
|
||||
| (?P<str>"([^"\\]|\\["'\\ntrb])*")
|
||||
)"#
|
||||
)
|
||||
.ignore_whitespace(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Lexes things.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Lexer<'t> {
|
||||
source: Rc<String>,
|
||||
text: &'t str,
|
||||
start: Pos,
|
||||
end: Pos,
|
||||
}
|
||||
|
||||
impl<'t> Lexer<'t> {
|
||||
pub fn new(source: impl ToString, text: &'t str) -> Self {
|
||||
Self {
|
||||
source: Rc::new(source.to_string()),
|
||||
text,
|
||||
start: Pos::new('\0'),
|
||||
end: Pos::new('\0'),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn text(&self) -> &'t str {
|
||||
self.text
|
||||
}
|
||||
|
||||
pub fn is_eof(&self) -> bool {
|
||||
self.curr().is_none()
|
||||
}
|
||||
|
||||
pub fn curr(&self) -> Option<char> {
|
||||
if self.end.byte < self.text.as_bytes().len() {
|
||||
self.text[self.end.byte..].chars().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn catchup(&mut self) -> Span {
|
||||
let start = std::mem::replace(&mut self.start, self.end);
|
||||
Span {
|
||||
source: Rc::clone(&self.source),
|
||||
start,
|
||||
end: self.end,
|
||||
}
|
||||
}
|
||||
|
||||
fn make_token(&mut self, token: Token) -> SpToken {
|
||||
let span = self.catchup();
|
||||
SpToken::new(span, token)
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(c) = self.curr() {
|
||||
if c.is_whitespace() {
|
||||
self.end = self.end.next(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.catchup();
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Result<Option<SpToken>> {
|
||||
self.skip_whitespace();
|
||||
if self.is_eof() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
LEX_PAT.with(|lex| {
|
||||
if let Some(cap) = lex.captures(&self.text[self.start.byte..]) {
|
||||
self.end = self.end.next_str(cap.get(0).unwrap().as_str());
|
||||
let sp_token = if let Some(_) = cap.name("assign") {
|
||||
self.make_token(Token::Assign)
|
||||
} else if let Some(_) = cap.name("meta") {
|
||||
self.make_token(Token::Meta)
|
||||
} else if let Some(_) = cap.name("word") {
|
||||
self.make_token(Token::Word)
|
||||
} else if let Some(_) = cap.name("float") {
|
||||
self.make_token(Token::Float)
|
||||
} else if let Some(_) = cap.name("int") {
|
||||
self.make_token(Token::Int)
|
||||
} else if let Some(_) = cap.name("str") {
|
||||
self.make_token(Token::Str)
|
||||
} else if let Some(_) = cap.name("lquote") {
|
||||
self.make_token(Token::LQuote)
|
||||
} else if let Some(_) = cap.name("rquote") {
|
||||
self.make_token(Token::RQuote)
|
||||
} else if let Some(_) = cap.name("apply") {
|
||||
self.make_token(Token::Apply)
|
||||
} else {
|
||||
panic!(
|
||||
"matched lex pattern, but did not catch this capture: {:?}",
|
||||
cap
|
||||
)
|
||||
};
|
||||
Ok(Some(sp_token))
|
||||
} else {
|
||||
Err(SyntaxError::ExpectedGot {
|
||||
expected: "word, literal, or quote".into(),
|
||||
got: expected_got_char(self.curr().unwrap()),
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn expected_got_char(c: char) -> String {
|
||||
format!("character {}", c.escape_debug())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
macro_rules! assert_token {
|
||||
($lexer:expr, $token:expr) => {{
|
||||
let next = $lexer.next();
|
||||
assert!(
|
||||
next.is_ok(),
|
||||
"expected {:?} token, but got this error: {:?}",
|
||||
$token,
|
||||
next.unwrap_err()
|
||||
);
|
||||
let next = next.unwrap();
|
||||
assert!(next.is_some(), "expected {:?} token, but got EOF", $token);
|
||||
let next = next.unwrap();
|
||||
assert_eq!(
|
||||
*next.inner(),
|
||||
$token,
|
||||
"expected {:?} token but got {:?} token, text {:?}",
|
||||
$token,
|
||||
next.inner(),
|
||||
next.span().text_at($lexer.text())
|
||||
);
|
||||
next
|
||||
}};
|
||||
($lexer:expr, $token:expr, $text:expr) => {{
|
||||
let next = assert_token!($lexer, $token);
|
||||
let text_got = next.text_at($lexer.text());
|
||||
assert_eq!(
|
||||
text_got, $text,
|
||||
"expected text {:?} but got {:?}",
|
||||
$text, text_got
|
||||
);
|
||||
next
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_word() {
|
||||
let mut lexer = Lexer::new("test", r"a b c d foo bar baz = == === =a ==a ===a ~= ~==");
|
||||
assert_token!(lexer, Token::Word, "a");
|
||||
assert_token!(lexer, Token::Word, "b");
|
||||
assert_token!(lexer, Token::Word, "c");
|
||||
assert_token!(lexer, Token::Word, "d");
|
||||
|
||||
assert_token!(lexer, Token::Word, "foo");
|
||||
assert_token!(lexer, Token::Word, "bar");
|
||||
assert_token!(lexer, Token::Word, "baz");
|
||||
|
||||
assert_token!(lexer, Token::Word, "=");
|
||||
assert_token!(lexer, Token::Word, "==");
|
||||
assert_token!(lexer, Token::Word, "===");
|
||||
assert_token!(lexer, Token::Word, "=a");
|
||||
assert_token!(lexer, Token::Word, "==a");
|
||||
assert_token!(lexer, Token::Word, "===a");
|
||||
assert_token!(lexer, Token::Word, "~=");
|
||||
assert_token!(lexer, Token::Word, "~==");
|
||||
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assign() {
|
||||
let mut lexer = Lexer::new("test", r":a := :foo :foo-bar :foo-bar-baz :foo~bar~baz");
|
||||
assert_token!(lexer, Token::Assign, ":a");
|
||||
assert_token!(lexer, Token::Assign, ":=");
|
||||
assert_token!(lexer, Token::Assign, ":foo");
|
||||
assert_token!(lexer, Token::Assign, ":foo-bar");
|
||||
assert_token!(lexer, Token::Assign, ":foo-bar-baz");
|
||||
assert_token!(lexer, Token::Assign, ":foo~bar~baz");
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_meta() {
|
||||
let mut lexer = Lexer::new(
|
||||
"test",
|
||||
r"%meta %meta1 %include %include1029 %10239meta % %%",
|
||||
);
|
||||
assert_token!(lexer, Token::Meta, "%meta");
|
||||
assert_token!(lexer, Token::Meta, "%meta1");
|
||||
assert_token!(lexer, Token::Meta, "%include");
|
||||
assert_token!(lexer, Token::Meta, "%include1029");
|
||||
assert_token!(lexer, Token::Meta, "%10239meta");
|
||||
assert_token!(lexer, Token::Word, "%");
|
||||
assert_token!(lexer, Token::Word, "%%");
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numbers() {
|
||||
let mut lexer = Lexer::new(
|
||||
"test",
|
||||
r"1 -12 123 -9 98 987 -987654321 1248 9764321 -1.2 2.3",
|
||||
);
|
||||
assert_token!(lexer, Token::Int, "1");
|
||||
assert_token!(lexer, Token::Int, "-12");
|
||||
assert_token!(lexer, Token::Int, "123");
|
||||
assert_token!(lexer, Token::Int, "-9");
|
||||
assert_token!(lexer, Token::Int, "98");
|
||||
assert_token!(lexer, Token::Int, "987");
|
||||
assert_token!(lexer, Token::Int, "-987654321");
|
||||
assert_token!(lexer, Token::Int, "1248");
|
||||
assert_token!(lexer, Token::Int, "9764321");
|
||||
assert_token!(lexer, Token::Float, "-1.2");
|
||||
assert_token!(lexer, Token::Float, "2.3");
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quotes() {
|
||||
let mut lexer = Lexer::new("test", "[ ] ] ] ] [ [ [ [");
|
||||
assert_token!(lexer, Token::LQuote);
|
||||
assert_token!(lexer, Token::RQuote);
|
||||
assert_token!(lexer, Token::RQuote);
|
||||
assert_token!(lexer, Token::RQuote);
|
||||
assert_token!(lexer, Token::RQuote);
|
||||
assert_token!(lexer, Token::LQuote);
|
||||
assert_token!(lexer, Token::LQuote);
|
||||
assert_token!(lexer, Token::LQuote);
|
||||
assert_token!(lexer, Token::LQuote);
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,3 @@
|
||||
pub mod ast;
|
||||
pub mod error;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
pub mod peg;
|
||||
pub mod span;
|
||||
pub mod token;
|
||||
|
||||
@@ -16,9 +16,9 @@ str = @{
|
||||
apply = @{ "!" }
|
||||
|
||||
assign = { ":" ~ word }
|
||||
atom = { float | int | assign | word | str }
|
||||
atom = { float | int | assign | word | str | apply }
|
||||
quote = { "[" ~ stmt* ~ "]" }
|
||||
expr = { atom | quote | apply }
|
||||
expr = { atom | quote }
|
||||
stmt = { expr }
|
||||
|
||||
file = { SOI ~ stmt* ~ EOI }
|
||||
@@ -1,347 +0,0 @@
|
||||
use crate::syn::{ast::*, error::*, lexer::*, token::*};
|
||||
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
// Parser
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Parser<'t> {
|
||||
lexer: Lexer<'t>,
|
||||
token: Result<Option<SpToken>>,
|
||||
}
|
||||
|
||||
impl<'t> Parser<'t> {
|
||||
pub fn new(source: impl ToString, text: &'t str) -> Self {
|
||||
let mut lexer = Lexer::new(source, text);
|
||||
let token = lexer.next();
|
||||
Self { lexer, token }
|
||||
}
|
||||
|
||||
pub fn is_eof(&self) -> bool {
|
||||
self.lexer.is_eof()
|
||||
}
|
||||
|
||||
fn peek(&self) -> Result<Option<SpToken>> {
|
||||
self.token.clone()
|
||||
}
|
||||
|
||||
fn adv(&mut self) -> Result<Option<SpToken>> {
|
||||
let next = self.lexer.next();
|
||||
std::mem::replace(&mut self.token, next)
|
||||
}
|
||||
|
||||
/// Checks if the next token is part of the list, returning it if so.
|
||||
fn expect_any_token(&mut self, expected: &[Token]) -> Result<SpToken> {
|
||||
let token = self.peek()?;
|
||||
|
||||
match (token, expected) {
|
||||
// Token matches
|
||||
(Some(token), expected) if expected.contains(token.inner()) => {
|
||||
self.adv()?;
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
// Token does not match, only one token expected
|
||||
(token, &[expected]) => {
|
||||
// get the string version of whether this is a token or EOF
|
||||
let got = token
|
||||
.map(|t| format!("{} token", t.inner().name()))
|
||||
.unwrap_or_else(|| "EOF".to_string());
|
||||
Err(SyntaxError::ExpectedGot {
|
||||
expected: format!("{} token", expected.name()),
|
||||
got,
|
||||
})
|
||||
}
|
||||
|
||||
// Token does not match, any of N tokens expected
|
||||
(token, expected) => {
|
||||
// make the comma-separated list of everything except for the last item
|
||||
let expected_str = expected
|
||||
.iter()
|
||||
.take(expected.len() - 1)
|
||||
.map(Token::name)
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
// get the string version of whether this is a token or EOF
|
||||
let got = token
|
||||
.map(|t| format!("{} token", t.inner().name()))
|
||||
.unwrap_or_else(|| "EOF".to_string());
|
||||
Err(SyntaxError::ExpectedGot {
|
||||
expected: format!(
|
||||
"{} or {} token",
|
||||
expected_str,
|
||||
expected.last().unwrap().name()
|
||||
),
|
||||
got,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_stmt_list(&mut self) -> Result<Vec<SpStmt>> {
|
||||
let mut stmts = Vec::new();
|
||||
while let Some(peek) = self.peek()? {
|
||||
match peek.inner() {
|
||||
Token::RQuote => break,
|
||||
_ => stmts.push(self.next_stmt()?),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(stmts)
|
||||
}
|
||||
|
||||
pub fn next_stmt(&mut self) -> Result<SpStmt> {
|
||||
match self.peek()? {
|
||||
Some(peek) if *peek.inner() == Token::Meta => self.next_meta(),
|
||||
_ => {
|
||||
let expr = self.next_expr()?;
|
||||
let span = expr.span();
|
||||
Ok(SpStmt::new(span.clone(), Stmt::Expr(expr)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_meta(&mut self) -> Result<SpStmt> {
|
||||
let (meta_span, _token) = self.expect_any_token(&[Token::Meta])?.into_split();
|
||||
let text = meta_span.text_at(self.lexer.text());
|
||||
match text {
|
||||
"%include" => {
|
||||
// get the include location string
|
||||
let (path_span, _token) = self.expect_any_token(&[Token::Str])?.into_split();
|
||||
let path = unescape_string(path_span.text_at(self.lexer.text()));
|
||||
Ok(SpStmt::new(
|
||||
meta_span.union(&path_span),
|
||||
Stmt::Include(path),
|
||||
))
|
||||
}
|
||||
_ => {
|
||||
todo!(
|
||||
"put a warning message here for an unknown meta statement {:?}",
|
||||
text
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_expr(&mut self) -> Result<SpExpr> {
|
||||
// peek ahead and see if we need to handle a quote
|
||||
match self.peek()? {
|
||||
Some(peek) if *peek.inner() == Token::LQuote => self.next_quote(),
|
||||
_ => {
|
||||
let atom = self.next_atom()?;
|
||||
let span = atom.span();
|
||||
Ok(SpExpr::new(span.clone(), Expr::Atom(atom)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_quote(&mut self) -> Result<SpExpr> {
|
||||
let start = self.expect_any_token(&[Token::LQuote])?;
|
||||
let stmts = self.next_stmt_list()?;
|
||||
let end = self.expect_any_token(&[Token::RQuote])?;
|
||||
let span = start.span().union(end.span());
|
||||
Ok(SpExpr::new(span, Expr::Quote(stmts)))
|
||||
}
|
||||
|
||||
pub fn next_atom(&mut self) -> Result<SpAtom> {
|
||||
use Token::*;
|
||||
let token = self.expect_any_token(&[Assign, Word, Float, Int, Str, Apply])?;
|
||||
Ok(self.token_to_atom(token))
|
||||
}
|
||||
|
||||
fn token_to_atom(&self, token: SpToken) -> SpAtom {
|
||||
// NOTE - self is required for this because we get the text
|
||||
let (span, token) = token.into_split();
|
||||
let text = span.text_at(self.lexer.text());
|
||||
let atom = match token {
|
||||
Token::Assign => Atom::Assign(text[1..].to_string()),
|
||||
Token::Word => Atom::Word(text.to_string()),
|
||||
Token::Float => Atom::Float(text.parse().unwrap()),
|
||||
Token::Int => Atom::Int(text.parse().unwrap()),
|
||||
Token::Str => Atom::Str(unescape_string(text)),
|
||||
Token::Apply => Atom::Apply,
|
||||
_ => panic!("invalid token specified for token_to_atom, it should be an atom"),
|
||||
};
|
||||
SpAtom::new(span, atom)
|
||||
}
|
||||
}
|
||||
|
||||
fn unescape_string(text: &str) -> String {
|
||||
let mut string = String::with_capacity(text.len() - 2);
|
||||
let mut chars = text.chars().skip(1).take(text.len() - 2);
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
let c = match chars
|
||||
.next()
|
||||
.expect("reached end of string literal before escape")
|
||||
{
|
||||
'"' => '"',
|
||||
'\'' => '\'',
|
||||
'\\' => '\\',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
u => panic!(
|
||||
"unexpected character escape that made it through the lexer: {:?}",
|
||||
u
|
||||
),
|
||||
};
|
||||
string.push(c);
|
||||
} else {
|
||||
string.push(c);
|
||||
}
|
||||
}
|
||||
string
|
||||
}
|
||||
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
// Tests
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! expect_atom {
|
||||
($parser:expr, $expected:expr) => {{
|
||||
let expr_result = $parser.next_expr();
|
||||
assert!(
|
||||
expr_result.is_ok(),
|
||||
"expected {:?} but got {:?} instead",
|
||||
$expected,
|
||||
expr_result
|
||||
);
|
||||
let expr = expr_result.unwrap();
|
||||
let (span, expr) = expr.into_split();
|
||||
assert_eq!(expr, Expr::Atom(SpAtom::new(span, $expected)));
|
||||
}};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! expect_expr {
|
||||
($parser:expr, $expected:expr) => {{
|
||||
let expr_result = $parser.next_expr();
|
||||
assert!(
|
||||
expr_result.is_ok(),
|
||||
"expected {:?} but got {:?} instead",
|
||||
$expected,
|
||||
expr_result
|
||||
);
|
||||
let expr = expr_result.unwrap();
|
||||
assert_eq!(expr, $expected);
|
||||
}};
|
||||
}
|
||||
|
||||
/// Makes an SpExpr Quote value using the given SpExpr values
|
||||
#[cfg(test)]
|
||||
macro_rules! make_quote {
|
||||
($($expr:expr),+ $(,)?) => {{
|
||||
SpExpr::new(
|
||||
Default::default(),
|
||||
Expr::Quote(make_quote_vec!($($expr),+))
|
||||
)
|
||||
}};
|
||||
}
|
||||
|
||||
/// Makes a vec appropriate for an Expr::Quote
|
||||
#[cfg(test)]
|
||||
macro_rules! make_quote_vec {
|
||||
($($expr:expr),+ $(,)?) => {{
|
||||
vec![$(
|
||||
SpStmt::new(Default::default(), Stmt::Expr(SpExpr::new(Default::default(), $expr)))
|
||||
),+]
|
||||
}};
|
||||
}
|
||||
|
||||
/// Makes an SpAtom from an Atom type.
|
||||
#[cfg(test)]
|
||||
macro_rules! make_atom {
|
||||
($atom:expr) => {{
|
||||
SpAtom::new(Default::default(), $atom)
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parser_atoms() {
|
||||
let mut parser = Parser::new(
|
||||
"test",
|
||||
r#"
|
||||
a ab bcd dcefg foo bar baz
|
||||
1 2 3 4 5
|
||||
1.2 3.4 5.6 7.8 9.10
|
||||
"this is a string"
|
||||
"this\nis\na\nstring\nwith\nnewlines"
|
||||
"this\tis\ta\tstring\twith\ttabs"
|
||||
"#,
|
||||
);
|
||||
expect_atom!(parser, Atom::Word("a".to_string()));
|
||||
expect_atom!(parser, Atom::Word("ab".to_string()));
|
||||
expect_atom!(parser, Atom::Word("bcd".to_string()));
|
||||
expect_atom!(parser, Atom::Word("dcefg".to_string()));
|
||||
expect_atom!(parser, Atom::Word("foo".to_string()));
|
||||
expect_atom!(parser, Atom::Word("bar".to_string()));
|
||||
expect_atom!(parser, Atom::Word("baz".to_string()));
|
||||
expect_atom!(parser, Atom::Int(1));
|
||||
expect_atom!(parser, Atom::Int(2));
|
||||
expect_atom!(parser, Atom::Int(3));
|
||||
expect_atom!(parser, Atom::Int(4));
|
||||
expect_atom!(parser, Atom::Int(5));
|
||||
expect_atom!(parser, Atom::Float(1.2));
|
||||
expect_atom!(parser, Atom::Float(3.4));
|
||||
expect_atom!(parser, Atom::Float(5.6));
|
||||
expect_atom!(parser, Atom::Float(7.8));
|
||||
expect_atom!(parser, Atom::Float(9.1));
|
||||
expect_atom!(parser, Atom::Str("this is a string".to_string()));
|
||||
expect_atom!(
|
||||
parser,
|
||||
Atom::Str("this\nis\na\nstring\nwith\nnewlines".to_string())
|
||||
);
|
||||
expect_atom!(
|
||||
parser,
|
||||
Atom::Str("this\tis\ta\tstring\twith\ttabs".to_string())
|
||||
);
|
||||
assert!(parser.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parser_quotes() {
|
||||
let mut parser = Parser::new(
|
||||
"test",
|
||||
r#"
|
||||
[
|
||||
a ab bcd dcefg foo bar baz
|
||||
]
|
||||
[1 2 3 4 5
|
||||
[1.2 3.4 5.6 7.8 9.10]
|
||||
]
|
||||
"#,
|
||||
);
|
||||
|
||||
expect_expr!(
|
||||
parser,
|
||||
make_quote![
|
||||
Expr::Atom(make_atom!(Atom::Word("a".to_string()))),
|
||||
Expr::Atom(make_atom!(Atom::Word("ab".to_string()))),
|
||||
Expr::Atom(make_atom!(Atom::Word("bcd".to_string()))),
|
||||
Expr::Atom(make_atom!(Atom::Word("dcefg".to_string()))),
|
||||
Expr::Atom(make_atom!(Atom::Word("foo".to_string()))),
|
||||
Expr::Atom(make_atom!(Atom::Word("bar".to_string()))),
|
||||
Expr::Atom(make_atom!(Atom::Word("baz".to_string()))),
|
||||
]
|
||||
);
|
||||
|
||||
expect_expr!(
|
||||
parser,
|
||||
make_quote![
|
||||
Expr::Atom(make_atom!(Atom::Int(1))),
|
||||
Expr::Atom(make_atom!(Atom::Int(2))),
|
||||
Expr::Atom(make_atom!(Atom::Int(3))),
|
||||
Expr::Atom(make_atom!(Atom::Int(4))),
|
||||
Expr::Atom(make_atom!(Atom::Int(5))),
|
||||
Expr::Quote(make_quote_vec![
|
||||
Expr::Atom(make_atom!(Atom::Float(1.2))),
|
||||
Expr::Atom(make_atom!(Atom::Float(3.4))),
|
||||
Expr::Atom(make_atom!(Atom::Float(5.6))),
|
||||
Expr::Atom(make_atom!(Atom::Float(7.8))),
|
||||
Expr::Atom(make_atom!(Atom::Float(9.10))),
|
||||
]),
|
||||
]
|
||||
);
|
||||
}
|
||||
126
src/syn/peg.rs
126
src/syn/peg.rs
@@ -1,6 +1,8 @@
|
||||
use crate::obj::prelude::{Float, Int, Str};
|
||||
use crate::syn::ast::*;
|
||||
use crate::syn::token::*;
|
||||
use crate::syn::span::*;
|
||||
use pest::{error::Error, iterators::Pair, Parser};
|
||||
use std::rc::Rc;
|
||||
|
||||
#[derive(pest_derive::Parser)]
|
||||
#[grammar = "syn/parser.pest"]
|
||||
@@ -8,46 +10,114 @@ pub struct SybilParser;
|
||||
|
||||
pub type Result<T, E = Error<Rule>> = std::result::Result<T, E>;
|
||||
|
||||
fn parse_atom(pair: Pair<Rule>) -> Result<SpAtom> {
|
||||
match pair.as_rule() {
|
||||
Rule::float => todo!(),
|
||||
Rule::int => todo!(),
|
||||
Rule::assign => todo!(),
|
||||
Rule::word => todo!(),
|
||||
Rule::str => todo!(),
|
||||
_ => unreachable!(),
|
||||
fn unescape_string(text: &str) -> Str {
|
||||
let mut string = String::with_capacity(text.len() - 2);
|
||||
let mut chars = text.chars().skip(1).take(text.len() - 2);
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
let c = match chars
|
||||
.next()
|
||||
.expect("reached end of string literal before escape")
|
||||
{
|
||||
'"' => '"',
|
||||
'\'' => '\'',
|
||||
'\\' => '\\',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
u => panic!(
|
||||
"unexpected character escape that made it through the lexer: {:?}",
|
||||
u
|
||||
),
|
||||
};
|
||||
string.push(c);
|
||||
} else {
|
||||
string.push(c);
|
||||
}
|
||||
}
|
||||
string
|
||||
}
|
||||
|
||||
fn parse_expr(pair: Pair<Rule>) -> Result<SpExpr> {
|
||||
match pair.as_rule() {
|
||||
Rule::atom => {
|
||||
todo!()
|
||||
fn parse_atom(source: &Rc<String>, pair: Pair<Rule>) -> Result<SpAtom> {
|
||||
let pair_span = pair.as_span();
|
||||
let atom = match pair.as_rule() {
|
||||
Rule::float => {
|
||||
let float = pair.as_str().parse::<Float>().unwrap();
|
||||
Atom::Float(float)
|
||||
}
|
||||
Rule::quote => {
|
||||
todo!()
|
||||
Rule::int => {
|
||||
let int = pair.as_str().parse::<Int>().unwrap();
|
||||
Atom::Int(int)
|
||||
}
|
||||
Rule::apply => {
|
||||
todo!()
|
||||
Rule::assign => {
|
||||
let word = pair.into_inner().next().unwrap().as_str().to_string();
|
||||
Atom::Assign(word)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
Rule::word => {
|
||||
let word = pair.as_str().to_string();
|
||||
Atom::Word(word)
|
||||
}
|
||||
Rule::str => {
|
||||
let string = pair.as_str();
|
||||
Atom::Str(unescape_string(string))
|
||||
}
|
||||
Rule::apply => Atom::Apply,
|
||||
rule => unreachable!("{:?}", rule),
|
||||
};
|
||||
let span = Span {
|
||||
source: Rc::clone(source),
|
||||
start: pair_span.start(),
|
||||
end: pair_span.end(),
|
||||
};
|
||||
Ok(SpAtom::new(span, atom))
|
||||
}
|
||||
|
||||
fn parse_stmt(pair: Pair<Rule>) -> Result<SpStmt> {
|
||||
match pair.as_rule() {
|
||||
Rule::expr => {
|
||||
todo!()
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
fn parse_expr(source: &Rc<String>, pair: Pair<Rule>) -> Result<SpExpr> {
|
||||
let pair_span = pair.as_span();
|
||||
let expr = match pair.as_rule() {
|
||||
Rule::atom => Expr::Atom(parse_atom(source, pair.into_inner().next().unwrap())?),
|
||||
Rule::quote => Expr::Quote(
|
||||
pair.into_inner()
|
||||
.map(|pair| parse_stmt(source, pair.into_inner().next().unwrap()))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
),
|
||||
rule => unreachable!("{:?}", rule),
|
||||
};
|
||||
let span = Span {
|
||||
source: Rc::clone(source),
|
||||
start: pair_span.start(),
|
||||
end: pair_span.end(),
|
||||
};
|
||||
Ok(SpExpr::new(span, expr))
|
||||
}
|
||||
|
||||
pub fn parse_file(text: &str) -> Result<Vec<SpStmt>> {
|
||||
fn parse_stmt(source: &Rc<String>, pair: Pair<Rule>) -> Result<SpStmt> {
|
||||
let pair_span = pair.as_span();
|
||||
let stmt = match pair.as_rule() {
|
||||
Rule::expr => Stmt::Expr(parse_expr(source, pair.into_inner().next().unwrap())?),
|
||||
rule => unreachable!("{:?}", rule),
|
||||
};
|
||||
let span = Span {
|
||||
source: Rc::clone(&source),
|
||||
start: pair_span.start(),
|
||||
end: pair_span.end(),
|
||||
};
|
||||
Ok(SpStmt::new(span, stmt))
|
||||
}
|
||||
|
||||
pub fn parse_file(source: impl ToString, text: &str) -> Result<Vec<SpStmt>> {
|
||||
let input = SybilParser::parse(Rule::file, text)?.next().unwrap();
|
||||
let source = Rc::new(source.to_string());
|
||||
let mut stmts = Vec::new();
|
||||
for pair in input.into_inner() {
|
||||
stmts.push(parse_stmt(pair)?);
|
||||
match pair.as_rule() {
|
||||
Rule::EOI => {}
|
||||
Rule::stmt => {
|
||||
let pair = pair.into_inner().next().unwrap();
|
||||
stmts.push(parse_stmt(&source, pair)?);
|
||||
}
|
||||
rule => unreachable!("{:?}", rule),
|
||||
}
|
||||
}
|
||||
Ok(stmts)
|
||||
}
|
||||
|
||||
123
src/syn/span.rs
123
src/syn/span.rs
@@ -1,125 +1,12 @@
|
||||
// TODO - remove this at some point.
|
||||
// I'm happy with this API design and I don't think that it should be clogging
|
||||
// up the warning lists because I'm not using a logical part of the API *at the moment*.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::cmp::{Ord, Ordering, PartialOrd};
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::fmt::{self, Debug};
|
||||
use std::rc::Rc;
|
||||
|
||||
#[cfg_attr(not(test), derive(PartialEq))]
|
||||
#[derive(Debug, Default, Clone, Copy, Eq)]
|
||||
pub struct Pos {
|
||||
pub source: usize,
|
||||
pub line: usize,
|
||||
pub col: usize,
|
||||
pub byte: usize,
|
||||
pub c: char,
|
||||
}
|
||||
|
||||
// when testing, don't actually compare positions
|
||||
#[cfg(test)]
|
||||
impl PartialEq for Pos {
|
||||
fn eq(&self, _other: &Pos) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Pos {
|
||||
pub fn new(c: char) -> Self {
|
||||
Pos {
|
||||
source: 0,
|
||||
line: 1,
|
||||
col: 1,
|
||||
byte: 0,
|
||||
c,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(self, c: char) -> Self {
|
||||
// catch the "pre-scan" case
|
||||
if c == '\0' {
|
||||
let mut next = self;
|
||||
next.c = c;
|
||||
return next;
|
||||
}
|
||||
|
||||
let (line, col) = if c == '\n' {
|
||||
(self.line + 1, 1)
|
||||
} else {
|
||||
(self.line, self.col + 1)
|
||||
};
|
||||
Pos {
|
||||
source: self.source + 1,
|
||||
line,
|
||||
col,
|
||||
byte: self.byte + self.c.len_utf8(),
|
||||
c,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_str(self, s: &str) -> Self {
|
||||
let mut next = self;
|
||||
for c in s.chars() {
|
||||
next = next.next(c);
|
||||
}
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Pos {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
self.source.partial_cmp(&other.source)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Pos {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
PartialOrd::partial_cmp(self, other).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(not(test), derive(PartialEq))]
|
||||
#[derive(Debug, Default, Clone, Eq)]
|
||||
pub struct Span {
|
||||
pub source: Rc<String>,
|
||||
pub start: Pos,
|
||||
pub end: Pos,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn text_at<'t>(&self, text: &'t str) -> &'t str {
|
||||
&text[self.start.byte..self.end.byte]
|
||||
}
|
||||
|
||||
pub fn union(&self, other: &Span) -> Self {
|
||||
let start = self.start.min(other.start);
|
||||
let end = self.end.max(other.end);
|
||||
// TODO - what to do if start.source != end.source
|
||||
Span {
|
||||
source: Rc::clone(&self.source),
|
||||
start,
|
||||
end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Span {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.start.line == self.end.line {
|
||||
write!(
|
||||
fmt,
|
||||
"line {}, column {} in {}",
|
||||
self.start.line, self.start.col, self.source
|
||||
)
|
||||
} else {
|
||||
write!(
|
||||
fmt,
|
||||
"lines {}-{} in {}",
|
||||
self.start.line, self.end.line, self.source
|
||||
)
|
||||
}
|
||||
}
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -163,10 +50,6 @@ impl<T> Spanned<T> {
|
||||
pub fn into_split(self) -> (Span, T) {
|
||||
(self.span().clone(), self.into_inner())
|
||||
}
|
||||
|
||||
pub fn text_at<'t>(&self, text: &'t str) -> &'t str {
|
||||
self.span().text_at(text)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Debug> Debug for Spanned<T> {
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
use crate::syn::span::Spanned;
|
||||
|
||||
/// Token types.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
/// Assignment.
|
||||
Assign,
|
||||
|
||||
/// Word.
|
||||
Word,
|
||||
|
||||
/// Floating point number literal.
|
||||
Float,
|
||||
|
||||
/// Integer literal.
|
||||
Int,
|
||||
|
||||
/// String literal.
|
||||
Str,
|
||||
|
||||
/// Quote start.
|
||||
LQuote,
|
||||
|
||||
/// Quote end.
|
||||
RQuote,
|
||||
|
||||
/// Apply.
|
||||
Apply,
|
||||
|
||||
/// Meta
|
||||
Meta,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn name(&self) -> &'static str {
|
||||
use Token::*;
|
||||
match self {
|
||||
Assign => "assignment",
|
||||
Word => "word",
|
||||
Float => "float",
|
||||
Int => "int",
|
||||
Str => "str",
|
||||
LQuote => "quote begin",
|
||||
RQuote => "quote end",
|
||||
Apply => "apply",
|
||||
Meta => "meta",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Spanned token.
|
||||
pub type SpToken = Spanned<Token>;
|
||||
@@ -18,7 +18,7 @@ pub enum RuntimeError {
|
||||
#[error("expected {0}")]
|
||||
WrongValue(String),
|
||||
|
||||
#[error("at {0}")]
|
||||
#[error("at XXX")]
|
||||
Span(Span, Box<RuntimeError>),
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user