From 7414c7df70de226bda142c4e276c8017d21339df Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Fri, 11 Feb 2022 15:58:10 -0800 Subject: [PATCH] WIP: Implement peg parser using pest PEG parser using pest is implemented. It was able to run the three examples that we currently have so hopefully there aren't any huge issues. There's a few warnings remaining that I will squash soon. Token, parser, and token modules have been removed. Signed-off-by: Alek Ratzloff --- src/main.rs | 3 +- src/syn/lexer.rs | 256 -------------------------------- src/syn/mod.rs | 4 - src/syn/parser.pest | 4 +- src/syn/parser.rs | 347 -------------------------------------------- src/syn/peg.rs | 126 ++++++++++++---- src/syn/span.rs | 123 +--------------- src/syn/token.rs | 52 ------- src/vm/error.rs | 2 +- 9 files changed, 105 insertions(+), 812 deletions(-) delete mode 100644 src/syn/lexer.rs delete mode 100644 src/syn/parser.rs delete mode 100644 src/syn/token.rs diff --git a/src/main.rs b/src/main.rs index 874a65e..dc05679 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,6 @@ mod scope; mod syn; mod vm; -use pest::Parser; use std::io::Read; use std::path::PathBuf; use structopt::StructOpt; @@ -33,7 +32,7 @@ fn main() -> Result { (input, "".to_string()) }; - let stmts = syn::peg::parse_file(&text)?; + let stmts = syn::peg::parse_file(&path, &text)?; /* let mut parser = Parser::new(path, text.as_str()); diff --git a/src/syn/lexer.rs b/src/syn/lexer.rs deleted file mode 100644 index 7dc1414..0000000 --- a/src/syn/lexer.rs +++ /dev/null @@ -1,256 +0,0 @@ -use crate::syn::{error::*, span::*, token::*}; -use regex::{Regex, RegexBuilder}; -use std::rc::Rc; - -thread_local! { - static LEX_PAT: Regex = RegexBuilder::new( - r#"^( - (?P[-+]?[0-9]+\.[0-9]+([eE][+\-][0-9]+)?) - | (?P[-+]?[0-9]+) - | (?P:[a-zA-Z_?\-*+/=.'@$%^&|~][0-9a-zA-Z_?\-*+/=.'@$%^&|~]*) - | (?P%[a-zA-Z0-9\-_]+) - | (?P[a-zA-Z_?\-*+/=.'@$%^&|~][0-9a-zA-Z_?\-*+/=.'@$%^&|~]*) - | (?P\[) - | (?P\]) - | (?P!) - | (?P"([^"\\]|\\["'\\ntrb])*") - )"# - ) - .ignore_whitespace(true) - .build() - .unwrap(); -} - -/// Lexes things. -#[derive(Debug, Clone)] -pub struct Lexer<'t> { - source: Rc, - text: &'t str, - start: Pos, - end: Pos, -} - -impl<'t> Lexer<'t> { - pub fn new(source: impl ToString, text: &'t str) -> Self { - Self { - source: Rc::new(source.to_string()), - text, - start: Pos::new('\0'), - end: Pos::new('\0'), - } - } - - pub fn text(&self) -> &'t str { - self.text - } - - pub fn is_eof(&self) -> bool { - self.curr().is_none() - } - - pub fn curr(&self) -> Option { - if self.end.byte < self.text.as_bytes().len() { - self.text[self.end.byte..].chars().next() - } else { - None - } - } - - fn catchup(&mut self) -> Span { - let start = std::mem::replace(&mut self.start, self.end); - Span { - source: Rc::clone(&self.source), - start, - end: self.end, - } - } - - fn make_token(&mut self, token: Token) -> SpToken { - let span = self.catchup(); - SpToken::new(span, token) - } - - fn skip_whitespace(&mut self) { - while let Some(c) = self.curr() { - if c.is_whitespace() { - self.end = self.end.next(c); - } else { - break; - } - } - self.catchup(); - } - - pub fn next(&mut self) -> Result> { - self.skip_whitespace(); - if self.is_eof() { - return Ok(None); - } - - LEX_PAT.with(|lex| { - if let Some(cap) = lex.captures(&self.text[self.start.byte..]) { - self.end = self.end.next_str(cap.get(0).unwrap().as_str()); - let sp_token = if let Some(_) = cap.name("assign") { - self.make_token(Token::Assign) - } else if let Some(_) = cap.name("meta") { - self.make_token(Token::Meta) - } else if let Some(_) = cap.name("word") { - self.make_token(Token::Word) - } else if let Some(_) = cap.name("float") { - self.make_token(Token::Float) - } else if let Some(_) = cap.name("int") { - self.make_token(Token::Int) - } else if let Some(_) = cap.name("str") { - self.make_token(Token::Str) - } else if let Some(_) = cap.name("lquote") { - self.make_token(Token::LQuote) - } else if let Some(_) = cap.name("rquote") { - self.make_token(Token::RQuote) - } else if let Some(_) = cap.name("apply") { - self.make_token(Token::Apply) - } else { - panic!( - "matched lex pattern, but did not catch this capture: {:?}", - cap - ) - }; - Ok(Some(sp_token)) - } else { - Err(SyntaxError::ExpectedGot { - expected: "word, literal, or quote".into(), - got: expected_got_char(self.curr().unwrap()), - }) - } - }) - } -} - -fn expected_got_char(c: char) -> String { - format!("character {}", c.escape_debug()) -} - -#[cfg(test)] -mod test { - use super::*; - - macro_rules! assert_token { - ($lexer:expr, $token:expr) => {{ - let next = $lexer.next(); - assert!( - next.is_ok(), - "expected {:?} token, but got this error: {:?}", - $token, - next.unwrap_err() - ); - let next = next.unwrap(); - assert!(next.is_some(), "expected {:?} token, but got EOF", $token); - let next = next.unwrap(); - assert_eq!( - *next.inner(), - $token, - "expected {:?} token but got {:?} token, text {:?}", - $token, - next.inner(), - next.span().text_at($lexer.text()) - ); - next - }}; - ($lexer:expr, $token:expr, $text:expr) => {{ - let next = assert_token!($lexer, $token); - let text_got = next.text_at($lexer.text()); - assert_eq!( - text_got, $text, - "expected text {:?} but got {:?}", - $text, text_got - ); - next - }}; - } - - #[test] - fn test_word() { - let mut lexer = Lexer::new("test", r"a b c d foo bar baz = == === =a ==a ===a ~= ~=="); - assert_token!(lexer, Token::Word, "a"); - assert_token!(lexer, Token::Word, "b"); - assert_token!(lexer, Token::Word, "c"); - assert_token!(lexer, Token::Word, "d"); - - assert_token!(lexer, Token::Word, "foo"); - assert_token!(lexer, Token::Word, "bar"); - assert_token!(lexer, Token::Word, "baz"); - - assert_token!(lexer, Token::Word, "="); - assert_token!(lexer, Token::Word, "=="); - assert_token!(lexer, Token::Word, "==="); - assert_token!(lexer, Token::Word, "=a"); - assert_token!(lexer, Token::Word, "==a"); - assert_token!(lexer, Token::Word, "===a"); - assert_token!(lexer, Token::Word, "~="); - assert_token!(lexer, Token::Word, "~=="); - - assert!(lexer.is_eof()); - } - - #[test] - fn test_assign() { - let mut lexer = Lexer::new("test", r":a := :foo :foo-bar :foo-bar-baz :foo~bar~baz"); - assert_token!(lexer, Token::Assign, ":a"); - assert_token!(lexer, Token::Assign, ":="); - assert_token!(lexer, Token::Assign, ":foo"); - assert_token!(lexer, Token::Assign, ":foo-bar"); - assert_token!(lexer, Token::Assign, ":foo-bar-baz"); - assert_token!(lexer, Token::Assign, ":foo~bar~baz"); - assert!(lexer.is_eof()); - } - - #[test] - fn test_meta() { - let mut lexer = Lexer::new( - "test", - r"%meta %meta1 %include %include1029 %10239meta % %%", - ); - assert_token!(lexer, Token::Meta, "%meta"); - assert_token!(lexer, Token::Meta, "%meta1"); - assert_token!(lexer, Token::Meta, "%include"); - assert_token!(lexer, Token::Meta, "%include1029"); - assert_token!(lexer, Token::Meta, "%10239meta"); - assert_token!(lexer, Token::Word, "%"); - assert_token!(lexer, Token::Word, "%%"); - assert!(lexer.is_eof()); - } - - #[test] - fn test_numbers() { - let mut lexer = Lexer::new( - "test", - r"1 -12 123 -9 98 987 -987654321 1248 9764321 -1.2 2.3", - ); - assert_token!(lexer, Token::Int, "1"); - assert_token!(lexer, Token::Int, "-12"); - assert_token!(lexer, Token::Int, "123"); - assert_token!(lexer, Token::Int, "-9"); - assert_token!(lexer, Token::Int, "98"); - assert_token!(lexer, Token::Int, "987"); - assert_token!(lexer, Token::Int, "-987654321"); - assert_token!(lexer, Token::Int, "1248"); - assert_token!(lexer, Token::Int, "9764321"); - assert_token!(lexer, Token::Float, "-1.2"); - assert_token!(lexer, Token::Float, "2.3"); - assert!(lexer.is_eof()); - } - - #[test] - fn test_quotes() { - let mut lexer = Lexer::new("test", "[ ] ] ] ] [ [ [ ["); - assert_token!(lexer, Token::LQuote); - assert_token!(lexer, Token::RQuote); - assert_token!(lexer, Token::RQuote); - assert_token!(lexer, Token::RQuote); - assert_token!(lexer, Token::RQuote); - assert_token!(lexer, Token::LQuote); - assert_token!(lexer, Token::LQuote); - assert_token!(lexer, Token::LQuote); - assert_token!(lexer, Token::LQuote); - assert!(lexer.is_eof()); - } -} diff --git a/src/syn/mod.rs b/src/syn/mod.rs index ab122b9..7f9c536 100644 --- a/src/syn/mod.rs +++ b/src/syn/mod.rs @@ -1,7 +1,3 @@ pub mod ast; -pub mod error; -pub mod lexer; -pub mod parser; pub mod peg; pub mod span; -pub mod token; diff --git a/src/syn/parser.pest b/src/syn/parser.pest index 85b2434..46e6920 100644 --- a/src/syn/parser.pest +++ b/src/syn/parser.pest @@ -16,9 +16,9 @@ str = @{ apply = @{ "!" } assign = { ":" ~ word } -atom = { float | int | assign | word | str } +atom = { float | int | assign | word | str | apply } quote = { "[" ~ stmt* ~ "]" } -expr = { atom | quote | apply } +expr = { atom | quote } stmt = { expr } file = { SOI ~ stmt* ~ EOI } \ No newline at end of file diff --git a/src/syn/parser.rs b/src/syn/parser.rs deleted file mode 100644 index 86da028..0000000 --- a/src/syn/parser.rs +++ /dev/null @@ -1,347 +0,0 @@ -use crate::syn::{ast::*, error::*, lexer::*, token::*}; - -// ///////////////////////////////////////////////////////////////////////////// -// Parser -// ///////////////////////////////////////////////////////////////////////////// - -#[derive(Debug)] -pub struct Parser<'t> { - lexer: Lexer<'t>, - token: Result>, -} - -impl<'t> Parser<'t> { - pub fn new(source: impl ToString, text: &'t str) -> Self { - let mut lexer = Lexer::new(source, text); - let token = lexer.next(); - Self { lexer, token } - } - - pub fn is_eof(&self) -> bool { - self.lexer.is_eof() - } - - fn peek(&self) -> Result> { - self.token.clone() - } - - fn adv(&mut self) -> Result> { - let next = self.lexer.next(); - std::mem::replace(&mut self.token, next) - } - - /// Checks if the next token is part of the list, returning it if so. - fn expect_any_token(&mut self, expected: &[Token]) -> Result { - let token = self.peek()?; - - match (token, expected) { - // Token matches - (Some(token), expected) if expected.contains(token.inner()) => { - self.adv()?; - Ok(token) - } - - // Token does not match, only one token expected - (token, &[expected]) => { - // get the string version of whether this is a token or EOF - let got = token - .map(|t| format!("{} token", t.inner().name())) - .unwrap_or_else(|| "EOF".to_string()); - Err(SyntaxError::ExpectedGot { - expected: format!("{} token", expected.name()), - got, - }) - } - - // Token does not match, any of N tokens expected - (token, expected) => { - // make the comma-separated list of everything except for the last item - let expected_str = expected - .iter() - .take(expected.len() - 1) - .map(Token::name) - .collect::>() - .join(", "); - // get the string version of whether this is a token or EOF - let got = token - .map(|t| format!("{} token", t.inner().name())) - .unwrap_or_else(|| "EOF".to_string()); - Err(SyntaxError::ExpectedGot { - expected: format!( - "{} or {} token", - expected_str, - expected.last().unwrap().name() - ), - got, - }) - } - } - } - - pub fn next_stmt_list(&mut self) -> Result> { - let mut stmts = Vec::new(); - while let Some(peek) = self.peek()? { - match peek.inner() { - Token::RQuote => break, - _ => stmts.push(self.next_stmt()?), - } - } - - Ok(stmts) - } - - pub fn next_stmt(&mut self) -> Result { - match self.peek()? { - Some(peek) if *peek.inner() == Token::Meta => self.next_meta(), - _ => { - let expr = self.next_expr()?; - let span = expr.span(); - Ok(SpStmt::new(span.clone(), Stmt::Expr(expr))) - } - } - } - - pub fn next_meta(&mut self) -> Result { - let (meta_span, _token) = self.expect_any_token(&[Token::Meta])?.into_split(); - let text = meta_span.text_at(self.lexer.text()); - match text { - "%include" => { - // get the include location string - let (path_span, _token) = self.expect_any_token(&[Token::Str])?.into_split(); - let path = unescape_string(path_span.text_at(self.lexer.text())); - Ok(SpStmt::new( - meta_span.union(&path_span), - Stmt::Include(path), - )) - } - _ => { - todo!( - "put a warning message here for an unknown meta statement {:?}", - text - ) - } - } - } - - pub fn next_expr(&mut self) -> Result { - // peek ahead and see if we need to handle a quote - match self.peek()? { - Some(peek) if *peek.inner() == Token::LQuote => self.next_quote(), - _ => { - let atom = self.next_atom()?; - let span = atom.span(); - Ok(SpExpr::new(span.clone(), Expr::Atom(atom))) - } - } - } - - pub fn next_quote(&mut self) -> Result { - let start = self.expect_any_token(&[Token::LQuote])?; - let stmts = self.next_stmt_list()?; - let end = self.expect_any_token(&[Token::RQuote])?; - let span = start.span().union(end.span()); - Ok(SpExpr::new(span, Expr::Quote(stmts))) - } - - pub fn next_atom(&mut self) -> Result { - use Token::*; - let token = self.expect_any_token(&[Assign, Word, Float, Int, Str, Apply])?; - Ok(self.token_to_atom(token)) - } - - fn token_to_atom(&self, token: SpToken) -> SpAtom { - // NOTE - self is required for this because we get the text - let (span, token) = token.into_split(); - let text = span.text_at(self.lexer.text()); - let atom = match token { - Token::Assign => Atom::Assign(text[1..].to_string()), - Token::Word => Atom::Word(text.to_string()), - Token::Float => Atom::Float(text.parse().unwrap()), - Token::Int => Atom::Int(text.parse().unwrap()), - Token::Str => Atom::Str(unescape_string(text)), - Token::Apply => Atom::Apply, - _ => panic!("invalid token specified for token_to_atom, it should be an atom"), - }; - SpAtom::new(span, atom) - } -} - -fn unescape_string(text: &str) -> String { - let mut string = String::with_capacity(text.len() - 2); - let mut chars = text.chars().skip(1).take(text.len() - 2); - while let Some(c) = chars.next() { - if c == '\\' { - let c = match chars - .next() - .expect("reached end of string literal before escape") - { - '"' => '"', - '\'' => '\'', - '\\' => '\\', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - u => panic!( - "unexpected character escape that made it through the lexer: {:?}", - u - ), - }; - string.push(c); - } else { - string.push(c); - } - } - string -} - -// ///////////////////////////////////////////////////////////////////////////// -// Tests -// ///////////////////////////////////////////////////////////////////////////// - -#[cfg(test)] -macro_rules! expect_atom { - ($parser:expr, $expected:expr) => {{ - let expr_result = $parser.next_expr(); - assert!( - expr_result.is_ok(), - "expected {:?} but got {:?} instead", - $expected, - expr_result - ); - let expr = expr_result.unwrap(); - let (span, expr) = expr.into_split(); - assert_eq!(expr, Expr::Atom(SpAtom::new(span, $expected))); - }}; -} - -#[cfg(test)] -macro_rules! expect_expr { - ($parser:expr, $expected:expr) => {{ - let expr_result = $parser.next_expr(); - assert!( - expr_result.is_ok(), - "expected {:?} but got {:?} instead", - $expected, - expr_result - ); - let expr = expr_result.unwrap(); - assert_eq!(expr, $expected); - }}; -} - -/// Makes an SpExpr Quote value using the given SpExpr values -#[cfg(test)] -macro_rules! make_quote { - ($($expr:expr),+ $(,)?) => {{ - SpExpr::new( - Default::default(), - Expr::Quote(make_quote_vec!($($expr),+)) - ) - }}; -} - -/// Makes a vec appropriate for an Expr::Quote -#[cfg(test)] -macro_rules! make_quote_vec { - ($($expr:expr),+ $(,)?) => {{ - vec![$( - SpStmt::new(Default::default(), Stmt::Expr(SpExpr::new(Default::default(), $expr))) - ),+] - }}; -} - -/// Makes an SpAtom from an Atom type. -#[cfg(test)] -macro_rules! make_atom { - ($atom:expr) => {{ - SpAtom::new(Default::default(), $atom) - }}; -} - -#[test] -fn test_parser_atoms() { - let mut parser = Parser::new( - "test", - r#" - a ab bcd dcefg foo bar baz - 1 2 3 4 5 - 1.2 3.4 5.6 7.8 9.10 - "this is a string" - "this\nis\na\nstring\nwith\nnewlines" - "this\tis\ta\tstring\twith\ttabs" - "#, - ); - expect_atom!(parser, Atom::Word("a".to_string())); - expect_atom!(parser, Atom::Word("ab".to_string())); - expect_atom!(parser, Atom::Word("bcd".to_string())); - expect_atom!(parser, Atom::Word("dcefg".to_string())); - expect_atom!(parser, Atom::Word("foo".to_string())); - expect_atom!(parser, Atom::Word("bar".to_string())); - expect_atom!(parser, Atom::Word("baz".to_string())); - expect_atom!(parser, Atom::Int(1)); - expect_atom!(parser, Atom::Int(2)); - expect_atom!(parser, Atom::Int(3)); - expect_atom!(parser, Atom::Int(4)); - expect_atom!(parser, Atom::Int(5)); - expect_atom!(parser, Atom::Float(1.2)); - expect_atom!(parser, Atom::Float(3.4)); - expect_atom!(parser, Atom::Float(5.6)); - expect_atom!(parser, Atom::Float(7.8)); - expect_atom!(parser, Atom::Float(9.1)); - expect_atom!(parser, Atom::Str("this is a string".to_string())); - expect_atom!( - parser, - Atom::Str("this\nis\na\nstring\nwith\nnewlines".to_string()) - ); - expect_atom!( - parser, - Atom::Str("this\tis\ta\tstring\twith\ttabs".to_string()) - ); - assert!(parser.is_eof()); -} - -#[test] -fn test_parser_quotes() { - let mut parser = Parser::new( - "test", - r#" - [ - a ab bcd dcefg foo bar baz - ] - [1 2 3 4 5 - [1.2 3.4 5.6 7.8 9.10] - ] - "#, - ); - - expect_expr!( - parser, - make_quote![ - Expr::Atom(make_atom!(Atom::Word("a".to_string()))), - Expr::Atom(make_atom!(Atom::Word("ab".to_string()))), - Expr::Atom(make_atom!(Atom::Word("bcd".to_string()))), - Expr::Atom(make_atom!(Atom::Word("dcefg".to_string()))), - Expr::Atom(make_atom!(Atom::Word("foo".to_string()))), - Expr::Atom(make_atom!(Atom::Word("bar".to_string()))), - Expr::Atom(make_atom!(Atom::Word("baz".to_string()))), - ] - ); - - expect_expr!( - parser, - make_quote![ - Expr::Atom(make_atom!(Atom::Int(1))), - Expr::Atom(make_atom!(Atom::Int(2))), - Expr::Atom(make_atom!(Atom::Int(3))), - Expr::Atom(make_atom!(Atom::Int(4))), - Expr::Atom(make_atom!(Atom::Int(5))), - Expr::Quote(make_quote_vec![ - Expr::Atom(make_atom!(Atom::Float(1.2))), - Expr::Atom(make_atom!(Atom::Float(3.4))), - Expr::Atom(make_atom!(Atom::Float(5.6))), - Expr::Atom(make_atom!(Atom::Float(7.8))), - Expr::Atom(make_atom!(Atom::Float(9.10))), - ]), - ] - ); -} diff --git a/src/syn/peg.rs b/src/syn/peg.rs index 81dc97d..2a06217 100644 --- a/src/syn/peg.rs +++ b/src/syn/peg.rs @@ -1,6 +1,8 @@ +use crate::obj::prelude::{Float, Int, Str}; use crate::syn::ast::*; -use crate::syn::token::*; +use crate::syn::span::*; use pest::{error::Error, iterators::Pair, Parser}; +use std::rc::Rc; #[derive(pest_derive::Parser)] #[grammar = "syn/parser.pest"] @@ -8,46 +10,114 @@ pub struct SybilParser; pub type Result> = std::result::Result; -fn parse_atom(pair: Pair) -> Result { - match pair.as_rule() { - Rule::float => todo!(), - Rule::int => todo!(), - Rule::assign => todo!(), - Rule::word => todo!(), - Rule::str => todo!(), - _ => unreachable!(), +fn unescape_string(text: &str) -> Str { + let mut string = String::with_capacity(text.len() - 2); + let mut chars = text.chars().skip(1).take(text.len() - 2); + while let Some(c) = chars.next() { + if c == '\\' { + let c = match chars + .next() + .expect("reached end of string literal before escape") + { + '"' => '"', + '\'' => '\'', + '\\' => '\\', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + u => panic!( + "unexpected character escape that made it through the lexer: {:?}", + u + ), + }; + string.push(c); + } else { + string.push(c); + } } + string } -fn parse_expr(pair: Pair) -> Result { - match pair.as_rule() { - Rule::atom => { - todo!() +fn parse_atom(source: &Rc, pair: Pair) -> Result { + let pair_span = pair.as_span(); + let atom = match pair.as_rule() { + Rule::float => { + let float = pair.as_str().parse::().unwrap(); + Atom::Float(float) } - Rule::quote => { - todo!() + Rule::int => { + let int = pair.as_str().parse::().unwrap(); + Atom::Int(int) } - Rule::apply => { - todo!() + Rule::assign => { + let word = pair.into_inner().next().unwrap().as_str().to_string(); + Atom::Assign(word) } - _ => unreachable!(), - } + Rule::word => { + let word = pair.as_str().to_string(); + Atom::Word(word) + } + Rule::str => { + let string = pair.as_str(); + Atom::Str(unescape_string(string)) + } + Rule::apply => Atom::Apply, + rule => unreachable!("{:?}", rule), + }; + let span = Span { + source: Rc::clone(source), + start: pair_span.start(), + end: pair_span.end(), + }; + Ok(SpAtom::new(span, atom)) } -fn parse_stmt(pair: Pair) -> Result { - match pair.as_rule() { - Rule::expr => { - todo!() - } - _ => unreachable!(), - } +fn parse_expr(source: &Rc, pair: Pair) -> Result { + let pair_span = pair.as_span(); + let expr = match pair.as_rule() { + Rule::atom => Expr::Atom(parse_atom(source, pair.into_inner().next().unwrap())?), + Rule::quote => Expr::Quote( + pair.into_inner() + .map(|pair| parse_stmt(source, pair.into_inner().next().unwrap())) + .collect::>>()?, + ), + rule => unreachable!("{:?}", rule), + }; + let span = Span { + source: Rc::clone(source), + start: pair_span.start(), + end: pair_span.end(), + }; + Ok(SpExpr::new(span, expr)) } -pub fn parse_file(text: &str) -> Result> { +fn parse_stmt(source: &Rc, pair: Pair) -> Result { + let pair_span = pair.as_span(); + let stmt = match pair.as_rule() { + Rule::expr => Stmt::Expr(parse_expr(source, pair.into_inner().next().unwrap())?), + rule => unreachable!("{:?}", rule), + }; + let span = Span { + source: Rc::clone(&source), + start: pair_span.start(), + end: pair_span.end(), + }; + Ok(SpStmt::new(span, stmt)) +} + +pub fn parse_file(source: impl ToString, text: &str) -> Result> { let input = SybilParser::parse(Rule::file, text)?.next().unwrap(); + let source = Rc::new(source.to_string()); let mut stmts = Vec::new(); for pair in input.into_inner() { - stmts.push(parse_stmt(pair)?); + match pair.as_rule() { + Rule::EOI => {} + Rule::stmt => { + let pair = pair.into_inner().next().unwrap(); + stmts.push(parse_stmt(&source, pair)?); + } + rule => unreachable!("{:?}", rule), + } } Ok(stmts) } diff --git a/src/syn/span.rs b/src/syn/span.rs index 8466be4..251b1ce 100644 --- a/src/syn/span.rs +++ b/src/syn/span.rs @@ -1,125 +1,12 @@ -// TODO - remove this at some point. -// I'm happy with this API design and I don't think that it should be clogging -// up the warning lists because I'm not using a logical part of the API *at the moment*. -#![allow(dead_code)] - -use std::cmp::{Ord, Ordering, PartialOrd}; -use std::fmt::{self, Debug, Display}; +use std::fmt::{self, Debug}; use std::rc::Rc; -#[cfg_attr(not(test), derive(PartialEq))] -#[derive(Debug, Default, Clone, Copy, Eq)] -pub struct Pos { - pub source: usize, - pub line: usize, - pub col: usize, - pub byte: usize, - pub c: char, -} - -// when testing, don't actually compare positions -#[cfg(test)] -impl PartialEq for Pos { - fn eq(&self, _other: &Pos) -> bool { - true - } -} - -impl Pos { - pub fn new(c: char) -> Self { - Pos { - source: 0, - line: 1, - col: 1, - byte: 0, - c, - } - } - - pub fn next(self, c: char) -> Self { - // catch the "pre-scan" case - if c == '\0' { - let mut next = self; - next.c = c; - return next; - } - - let (line, col) = if c == '\n' { - (self.line + 1, 1) - } else { - (self.line, self.col + 1) - }; - Pos { - source: self.source + 1, - line, - col, - byte: self.byte + self.c.len_utf8(), - c, - } - } - - pub fn next_str(self, s: &str) -> Self { - let mut next = self; - for c in s.chars() { - next = next.next(c); - } - next - } -} - -impl PartialOrd for Pos { - fn partial_cmp(&self, other: &Self) -> Option { - self.source.partial_cmp(&other.source) - } -} - -impl Ord for Pos { - fn cmp(&self, other: &Self) -> Ordering { - PartialOrd::partial_cmp(self, other).unwrap() - } -} - #[cfg_attr(not(test), derive(PartialEq))] #[derive(Debug, Default, Clone, Eq)] pub struct Span { pub source: Rc, - pub start: Pos, - pub end: Pos, -} - -impl Span { - pub fn text_at<'t>(&self, text: &'t str) -> &'t str { - &text[self.start.byte..self.end.byte] - } - - pub fn union(&self, other: &Span) -> Self { - let start = self.start.min(other.start); - let end = self.end.max(other.end); - // TODO - what to do if start.source != end.source - Span { - source: Rc::clone(&self.source), - start, - end, - } - } -} - -impl Display for Span { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - if self.start.line == self.end.line { - write!( - fmt, - "line {}, column {} in {}", - self.start.line, self.start.col, self.source - ) - } else { - write!( - fmt, - "lines {}-{} in {}", - self.start.line, self.end.line, self.source - ) - } - } + pub start: usize, + pub end: usize, } #[cfg(test)] @@ -163,10 +50,6 @@ impl Spanned { pub fn into_split(self) -> (Span, T) { (self.span().clone(), self.into_inner()) } - - pub fn text_at<'t>(&self, text: &'t str) -> &'t str { - self.span().text_at(text) - } } impl Debug for Spanned { diff --git a/src/syn/token.rs b/src/syn/token.rs deleted file mode 100644 index b9df218..0000000 --- a/src/syn/token.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::syn::span::Spanned; - -/// Token types. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Token { - /// Assignment. - Assign, - - /// Word. - Word, - - /// Floating point number literal. - Float, - - /// Integer literal. - Int, - - /// String literal. - Str, - - /// Quote start. - LQuote, - - /// Quote end. - RQuote, - - /// Apply. - Apply, - - /// Meta - Meta, -} - -impl Token { - pub fn name(&self) -> &'static str { - use Token::*; - match self { - Assign => "assignment", - Word => "word", - Float => "float", - Int => "int", - Str => "str", - LQuote => "quote begin", - RQuote => "quote end", - Apply => "apply", - Meta => "meta", - } - } -} - -/// Spanned token. -pub type SpToken = Spanned; diff --git a/src/vm/error.rs b/src/vm/error.rs index 264c8ab..b256dcc 100644 --- a/src/vm/error.rs +++ b/src/vm/error.rs @@ -18,7 +18,7 @@ pub enum RuntimeError { #[error("expected {0}")] WrongValue(String), - #[error("at {0}")] + #[error("at XXX")] Span(Span, Box), }