From f4699e5e210b4e2fff7188ab62cda22034b5d6b4 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Fri, 11 Feb 2022 14:50:09 -0800 Subject: [PATCH] WIP: Re-implement parser in pest Parser implementation is kind of iffy. Let's try to re-implement it using pest. Signed-off-by: Alek Ratzloff --- Cargo.lock | 138 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/main.rs | 6 +- src/syn/mod.rs | 1 + src/syn/parser.pest | 24 ++++++++ src/syn/peg.rs | 53 +++++++++++++++++ 6 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 src/syn/parser.pest create mode 100644 src/syn/peg.rs diff --git a/Cargo.lock b/Cargo.lock index 872cc92..1921d0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,39 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + [[package]] name = "clap" version = "2.34.0" @@ -52,6 +85,21 @@ dependencies = [ "vec_map", ] +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + [[package]] name = "gc" version = "0.4.1" @@ -73,6 +121,15 @@ dependencies = [ "synstructure", ] +[[package]] +name = "generic-array" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" +dependencies = [ + "typenum", +] + [[package]] name = "heck" version = "0.3.3" @@ -103,12 +160,67 @@ version = "0.2.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + [[package]] name = "memchr" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d" +dependencies = [ + "maplit", + "pest", + "sha-1", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -168,6 +280,18 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer", + "digest", + "fake-simd", + "opaque-debug", +] + [[package]] name = "strsim" version = "0.8.0" @@ -203,6 +327,8 @@ name = "sybil" version = "0.1.0" dependencies = [ "gc", + "pest", + "pest_derive", "regex", "structopt", "thiserror", @@ -260,6 +386,18 @@ dependencies = [ "syn", ] +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + [[package]] name = "unicode-segmentation" version = "1.8.0" diff --git a/Cargo.toml b/Cargo.toml index 60235cb..af68862 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +pest = "2.1" +pest_derive = "2.1" thiserror = "1.0" structopt = "0.3" regex = "1.5" diff --git a/src/main.rs b/src/main.rs index 349e569..874a65e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,10 +5,10 @@ mod scope; mod syn; mod vm; +use pest::Parser; use std::io::Read; use std::path::PathBuf; use structopt::StructOpt; -use syn::parser::Parser; use vm::{error::RuntimeError, machine::MachineBuilder}; #[derive(Debug, StructOpt)] @@ -33,11 +33,15 @@ fn main() -> Result { (input, "".to_string()) }; + let stmts = syn::peg::parse_file(&text)?; + + /* let mut parser = Parser::new(path, text.as_str()); let mut stmts = Vec::new(); while !parser.is_eof() { stmts.extend(parser.next_stmt_list()?); } + */ let mut machine = MachineBuilder::default() .max_stack_size(opt.max_stack_size) diff --git a/src/syn/mod.rs b/src/syn/mod.rs index 63ccd89..ab122b9 100644 --- a/src/syn/mod.rs +++ b/src/syn/mod.rs @@ -2,5 +2,6 @@ pub mod ast; pub mod error; pub mod lexer; pub mod parser; +pub mod peg; pub mod span; pub mod token; diff --git a/src/syn/parser.pest b/src/syn/parser.pest new file mode 100644 index 0000000..85b2434 --- /dev/null +++ b/src/syn/parser.pest @@ -0,0 +1,24 @@ +WHITESPACE = _{ " " | "\r" | "\t" | "\n" } + +int = @{ ASCII_DIGIT+ } +float = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ ~ ("e" ~ ("+" | "-")? ~ ASCII_DIGIT+)? } +word_char = @{ ASCII_ALPHA | "_" | "?" | "-" | "+" | "*" | "/" | "=" | "@" | "$" | "%" | "^" | "&" | "|" | "~" } +word = @{ word_char+ } +// meta = ${ "%" ~ word } +str = @{ + "\"" ~ + ( + (!("\"" | "\\") ~ ANY) + | "\\" ~ ("n" | "t" | "r" | "b" | "\\" | "\"" | "\'") + )* ~ + "\"" +} +apply = @{ "!" } + +assign = { ":" ~ word } +atom = { float | int | assign | word | str } +quote = { "[" ~ stmt* ~ "]" } +expr = { atom | quote | apply } +stmt = { expr } + +file = { SOI ~ stmt* ~ EOI } \ No newline at end of file diff --git a/src/syn/peg.rs b/src/syn/peg.rs new file mode 100644 index 0000000..81dc97d --- /dev/null +++ b/src/syn/peg.rs @@ -0,0 +1,53 @@ +use crate::syn::ast::*; +use crate::syn::token::*; +use pest::{error::Error, iterators::Pair, Parser}; + +#[derive(pest_derive::Parser)] +#[grammar = "syn/parser.pest"] +pub struct SybilParser; + +pub type Result> = std::result::Result; + +fn parse_atom(pair: Pair) -> Result { + match pair.as_rule() { + Rule::float => todo!(), + Rule::int => todo!(), + Rule::assign => todo!(), + Rule::word => todo!(), + Rule::str => todo!(), + _ => unreachable!(), + } +} + +fn parse_expr(pair: Pair) -> Result { + match pair.as_rule() { + Rule::atom => { + todo!() + } + Rule::quote => { + todo!() + } + Rule::apply => { + todo!() + } + _ => unreachable!(), + } +} + +fn parse_stmt(pair: Pair) -> Result { + match pair.as_rule() { + Rule::expr => { + todo!() + } + _ => unreachable!(), + } +} + +pub fn parse_file(text: &str) -> Result> { + let input = SybilParser::parse(Rule::file, text)?.next().unwrap(); + let mut stmts = Vec::new(); + for pair in input.into_inner() { + stmts.push(parse_stmt(pair)?); + } + Ok(stmts) +}