WIP: Implement peg parser using pest

PEG parser using pest is implemented. It was able to run the three
examples that we currently have so hopefully there aren't any huge
issues. There's a few warnings remaining that I will squash soon. Token,
parser, and token modules have been removed.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2022-02-11 15:58:10 -08:00
parent f4699e5e21
commit 7414c7df70
9 changed files with 105 additions and 812 deletions

View File

@@ -1,6 +1,8 @@
use crate::obj::prelude::{Float, Int, Str};
use crate::syn::ast::*;
use crate::syn::token::*;
use crate::syn::span::*;
use pest::{error::Error, iterators::Pair, Parser};
use std::rc::Rc;
#[derive(pest_derive::Parser)]
#[grammar = "syn/parser.pest"]
@@ -8,46 +10,114 @@ pub struct SybilParser;
pub type Result<T, E = Error<Rule>> = std::result::Result<T, E>;
fn parse_atom(pair: Pair<Rule>) -> Result<SpAtom> {
match pair.as_rule() {
Rule::float => todo!(),
Rule::int => todo!(),
Rule::assign => todo!(),
Rule::word => todo!(),
Rule::str => todo!(),
_ => unreachable!(),
fn unescape_string(text: &str) -> Str {
let mut string = String::with_capacity(text.len() - 2);
let mut chars = text.chars().skip(1).take(text.len() - 2);
while let Some(c) = chars.next() {
if c == '\\' {
let c = match chars
.next()
.expect("reached end of string literal before escape")
{
'"' => '"',
'\'' => '\'',
'\\' => '\\',
'n' => '\n',
'r' => '\r',
't' => '\t',
u => panic!(
"unexpected character escape that made it through the lexer: {:?}",
u
),
};
string.push(c);
} else {
string.push(c);
}
}
string
}
fn parse_expr(pair: Pair<Rule>) -> Result<SpExpr> {
match pair.as_rule() {
Rule::atom => {
todo!()
fn parse_atom(source: &Rc<String>, pair: Pair<Rule>) -> Result<SpAtom> {
let pair_span = pair.as_span();
let atom = match pair.as_rule() {
Rule::float => {
let float = pair.as_str().parse::<Float>().unwrap();
Atom::Float(float)
}
Rule::quote => {
todo!()
Rule::int => {
let int = pair.as_str().parse::<Int>().unwrap();
Atom::Int(int)
}
Rule::apply => {
todo!()
Rule::assign => {
let word = pair.into_inner().next().unwrap().as_str().to_string();
Atom::Assign(word)
}
_ => unreachable!(),
}
Rule::word => {
let word = pair.as_str().to_string();
Atom::Word(word)
}
Rule::str => {
let string = pair.as_str();
Atom::Str(unescape_string(string))
}
Rule::apply => Atom::Apply,
rule => unreachable!("{:?}", rule),
};
let span = Span {
source: Rc::clone(source),
start: pair_span.start(),
end: pair_span.end(),
};
Ok(SpAtom::new(span, atom))
}
fn parse_stmt(pair: Pair<Rule>) -> Result<SpStmt> {
match pair.as_rule() {
Rule::expr => {
todo!()
}
_ => unreachable!(),
}
fn parse_expr(source: &Rc<String>, pair: Pair<Rule>) -> Result<SpExpr> {
let pair_span = pair.as_span();
let expr = match pair.as_rule() {
Rule::atom => Expr::Atom(parse_atom(source, pair.into_inner().next().unwrap())?),
Rule::quote => Expr::Quote(
pair.into_inner()
.map(|pair| parse_stmt(source, pair.into_inner().next().unwrap()))
.collect::<Result<Vec<_>>>()?,
),
rule => unreachable!("{:?}", rule),
};
let span = Span {
source: Rc::clone(source),
start: pair_span.start(),
end: pair_span.end(),
};
Ok(SpExpr::new(span, expr))
}
pub fn parse_file(text: &str) -> Result<Vec<SpStmt>> {
fn parse_stmt(source: &Rc<String>, pair: Pair<Rule>) -> Result<SpStmt> {
let pair_span = pair.as_span();
let stmt = match pair.as_rule() {
Rule::expr => Stmt::Expr(parse_expr(source, pair.into_inner().next().unwrap())?),
rule => unreachable!("{:?}", rule),
};
let span = Span {
source: Rc::clone(&source),
start: pair_span.start(),
end: pair_span.end(),
};
Ok(SpStmt::new(span, stmt))
}
pub fn parse_file(source: impl ToString, text: &str) -> Result<Vec<SpStmt>> {
let input = SybilParser::parse(Rule::file, text)?.next().unwrap();
let source = Rc::new(source.to_string());
let mut stmts = Vec::new();
for pair in input.into_inner() {
stmts.push(parse_stmt(pair)?);
match pair.as_rule() {
Rule::EOI => {}
Rule::stmt => {
let pair = pair.into_inner().next().unwrap();
stmts.push(parse_stmt(&source, pair)?);
}
rule => unreachable!("{:?}", rule),
}
}
Ok(stmts)
}