From 946a927b095273414117706f1a603d6232ed30be Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Tue, 21 Dec 2021 11:29:59 -0800 Subject: [PATCH] Initial commit WIP Signed-off-by: Alek Ratzloff --- .gitignore | 1 + Cargo.lock | 280 ++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 12 ++ src/main.rs | 34 ++++++ src/object.rs | 245 ++++++++++++++++++++++++++++++++++++++++ src/syn/error.rs | 12 ++ src/syn/lexer.rs | 216 +++++++++++++++++++++++++++++++++++ src/syn/mod.rs | 6 + src/syn/parser.rs | 5 + src/syn/span.rs | 109 ++++++++++++++++++ src/syn/token.rs | 29 +++++ src/syn/util.rs | 11 ++ 12 files changed, 960 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs create mode 100644 src/object.rs create mode 100644 src/syn/error.rs create mode 100644 src/syn/lexer.rs create mode 100644 src/syn/mod.rs create mode 100644 src/syn/parser.rs create mode 100644 src/syn/span.rs create mode 100644 src/syn/token.rs create mode 100644 src/syn/util.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1e6659c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,280 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb37d2df5df740e582f28f8560cf425f52bb267d872fe58358eadb554909f07a" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9788f4202aa75c240ecc9c15c65185e6a39ccdeb0fd5d008b98825464c87c" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sybil" +version = "0.1.0" +dependencies = [ + "lazy_static", + "regex", + "structopt", + "thiserror", +] + +[[package]] +name = "syn" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-segmentation" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1dd4d24 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "sybil" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +thiserror = "1.0" +structopt = "0.3" +regex = "1.5" +lazy_static = "1.4" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..08c4b70 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,34 @@ +mod object; +mod syn; + +use std::io::Read; +use std::path::PathBuf; +use structopt::StructOpt; +use syn::lexer::Lexer; + +#[derive(Debug, StructOpt)] +struct Opt { + #[structopt(name = "PATH", parse(from_os_str))] + path: Option, +} + +type Result> = std::result::Result; + +fn main() -> Result { + let opt = Opt::from_args(); + + let text = if let Some(path) = opt.path.as_ref() { + std::fs::read_to_string(path)? + } else { + let mut input = String::new(); + std::io::stdin().read_to_string(&mut input)?; + input + }; + + let mut lexer = Lexer::new(&text); + while let Some(token) = lexer.next()? { + println!("{:?}", token); + } + + Ok(()) +} diff --git a/src/object.rs b/src/object.rs new file mode 100644 index 0000000..6f16979 --- /dev/null +++ b/src/object.rs @@ -0,0 +1,245 @@ +use std::cell::RefCell; +use std::collections::{BTreeMap, HashMap, VecDeque}; +use std::rc::{Rc, Weak}; + +pub type Str = String; +pub type Int = i64; +pub type Float = f64; + +#[derive(Debug, Clone)] +pub enum Value { + Array(Vec), + Float(Float), + Int(Int), + Str(String), + Obj(ObjPtr), +} + +#[derive(Debug, Clone)] +pub struct ObjPtr { + arena: Weak, + slot: usize, +} + +#[derive(Debug)] +pub struct Obj { + vtable: HashMap, +} + +#[derive(Debug)] +pub struct Arena { + slots: Vec, + slots_dirty: bool, + objects: BTreeMap>, + max_size: Option, +} + +impl Arena { + pub fn new(max_size: Option) -> Self { + Arena { + slots: vec![SlotRange::Open(0)], + slots_dirty: false, + objects: Default::default(), + max_size, + } + } + + /// Compress the slots in the arena. + /// + /// This will: + /// 1. Sort all slots by their starting position, + /// 2. Merge all slots where necessary. + pub fn compress_slots(&mut self) { + use SlotRange::*; + self.slots.sort_by(|a, b| match (a, b) { + (Range(s1, _), Range(s2, _)) => s1.cmp(s2), + (Open(o1), Open(o2)) => o1.cmp(o2), + (Range(_, _), Open(_)) => std::cmp::Ordering::Less, + (Open(_), Range(_, _)) => std::cmp::Ordering::Greater, + }); + let mut slots: Vec = Vec::with_capacity(self.slots.len()); + for slot in &self.slots { + match slot { + // Remove invalid slots + Range(start, end) if start > end => continue, + _ => {} + } + if let Some(last) = slots.last().copied() { + if let Some(merged) = last.try_merge(slot) { + slots.pop(); + slots.push(merged); + } else { + slots.push(*slot); + } + } else { + slots.push(*slot); + } + } + self.slots = slots; + } + + pub fn obj_new(self: &mut Rc, obj: Obj) -> Option { + use SlotRange::*; + + // Compress if necessary + if !self.slots_dirty { + let self_mut = Rc::get_mut(self).expect("could not get arena mutably from Rc pointer"); + self_mut.compress_slots(); + } + if self + .max_size + .map(|max_size| self.objects.len() >= max_size) + .unwrap_or(false) + { + // TODO : return err instead of option + // Could not allocate a new object because of configuration + return None; + } + + // Get the next slot + let self_mut = Rc::get_mut(self).expect("could not get arena mutably from Rc pointer"); + let slots = &mut self_mut.slots; + let slot = match slots.first().copied().unwrap() { + Range(start, end) => { + if start == end { + slots.remove(0); + } else { + slots[0] = Range(start + 1, end); + } + start + } + Open(index) => { + slots[0] = Open(index + 1); + index + } + }; + Some(ObjPtr { + arena: Rc::downgrade(self), + slot, + }) + } + + pub fn free_obj(&mut self, obj_ptr: ObjPtr) { + // Compress if necessary + if !self.slots_dirty { + self.compress_slots(); + } + self.slots + .push(SlotRange::Range(obj_ptr.slot, obj_ptr.slot)); + self.slots_dirty = true; // not my problem + } +} + +#[test] +fn test_arena_compress_slots() { + use SlotRange::*; + let tests = [ + (vec![Range(0, 4), Range(2, 6), Open(0)], vec![Open(0)]), + (vec![Open(7), Range(0, 4), Range(2, 6)], vec![Open(0)]), + ( + vec![Open(8), Range(0, 4), Range(2, 6)], + vec![Range(0, 6), Open(8)], + ), + (vec![Range(0, 4), Range(2, 6)], vec![Range(0, 6)]), + (vec![Range(0, 4), Range(2, 6)], vec![Range(0, 6)]), + ( + vec![Range(0, 1), Range(2, 2), Range(3, 4)], + vec![Range(0, 4)], + ), + ( + vec![Range(0, 4), Range(3, 4), Range(2, 2)], + vec![Range(0, 4)], + ), + ( + vec![Range(3, 6), Range(0, 4), Range(2, 2)], + vec![Range(0, 6)], + ), + (vec![Range(0, 6), Range(6, 6)], vec![Range(0, 6)]), + (vec![Range(7, 6), Range(6, 6)], vec![Range(6, 6)]), + ]; + + for (slots, expected) in tests { + let mut arena = Arena { + slots, + slots_dirty: true, + objects: Default::default(), + max_size: None, + }; + arena.compress_slots(); + assert_eq!(arena.slots, expected); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SlotRange { + /// A list of slots that are in a range, inclusive. + Range(usize, usize), + /// Everything from here and onward is open + Open(usize), +} + +impl SlotRange { + /// Try to merge this range with another range. + /// + /// If these ranges don't overlap one another, then None is returned. + /// + /// If a range is open, while the other is closed, the open range will take + /// precedent over the closed range. + /// + /// If both ranges are open, the smaller range is given precedent. + pub fn try_merge(&self, other: &SlotRange) -> Option { + use SlotRange::*; + match (self, other) { + (Range(s1, e1), Range(s2, e2)) if (e1 + 1) >= *s2 => { + let start = s1.min(s2); + let end = e1.max(e2); + Some(Range(*start, *end)) + } + (Range(s, e), Open(o)) | (Open(o), Range(s, e)) => { + if s <= o && (e + 1) >= *o { + Some(Open(*s)) + } else if s >= o { + Some(Open(*o)) + } else { + None + } + } + (Open(o1), Open(o2)) => Some(Open(*o1.min(o2))), + _ => None, + } + } +} + +#[test] +fn test_slot_range_merge() { + use SlotRange::*; + let tests = [ + (Range(0, 4), Range(0, 5), Some(Range(0, 5))), + (Range(1, 4), Range(0, 5), Some(Range(0, 5))), + (Range(0, 4), Range(1, 5), Some(Range(0, 5))), + // + (Range(0, 5), Range(0, 3), Some(Range(0, 5))), + (Range(0, 5), Range(1, 3), Some(Range(0, 5))), + (Range(1, 5), Range(0, 4), Some(Range(0, 5))), + // + (Range(0, 4), Open(4), Some(Open(0))), + (Range(1, 4), Open(4), Some(Open(1))), + (Range(2, 4), Open(4), Some(Open(2))), + // + (Range(0, 3), Range(4, 4), Some(Range(0, 4))), + (Range(0, 4), Range(6, 6), None), + // + (Range(0, 4), Range(2, 2), Some(Range(0, 4))), + ]; + + for (a, b, expected) in tests { + assert_eq!( + a.try_merge(&b), + expected, + "expected merge of {:?} and {:?} to be {:?}", + a, + b, + expected + ); + } +} diff --git a/src/syn/error.rs b/src/syn/error.rs new file mode 100644 index 0000000..1f2bf08 --- /dev/null +++ b/src/syn/error.rs @@ -0,0 +1,12 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum SyntaxError { + #[error("unexpected {0}")] + Unexpected(String), + + #[error("expected {expected}, but got {got}")] + ExpectedGot { expected: String, got: String }, +} + +pub type Result = std::result::Result; diff --git a/src/syn/lexer.rs b/src/syn/lexer.rs new file mode 100644 index 0000000..545f643 --- /dev/null +++ b/src/syn/lexer.rs @@ -0,0 +1,216 @@ +use crate::syn::{error::*, span::*, token::*, util::*}; +use lazy_static::lazy_static; +use regex::{Regex, RegexBuilder}; + +lazy_static! { + static ref LEX_PAT: Regex = RegexBuilder::new( + r#"^( + (?P[a-zA-Z_?\-*+/=.'@$%^&|][0-9a-zA-Z_?\-*+/=.'@$%^&|]*) + | (?P[0-9]+\.[0-9]+([eE][+\-][0-9]+)?) + | (?P[0-9]+) + | (?P\[) + | (?P\]) + | (?P:) + )"# + ) + .ignore_whitespace(true) + .build() + .unwrap(); +} + +/// Lexes things. +pub struct Lexer<'t> { + text: &'t str, + start: Pos, + end: Pos, +} + +impl<'t> Lexer<'t> { + pub fn new(text: &'t str) -> Self { + let c = text.chars().next().unwrap_or('\0'); + Self { + text, + start: Pos::new(c), + end: Pos::new(c), + } + } + + pub fn text(&self) -> &'t str { + self.text + } + + pub fn is_eof(&self) -> bool { + self.curr().is_none() + } + + pub fn curr(&self) -> Option { + if self.end.byte < self.text.as_bytes().len() { + self.text[self.end.byte..].chars().next() + } else { + None + } + } + + fn catchup(&mut self) -> Span { + let start = std::mem::replace(&mut self.start, self.end); + Span { + start, + end: self.end, + } + } + + fn make_token(&mut self, token: Token) -> SpToken { + let span = self.catchup(); + SpToken::new(span, token) + } + + fn skip_whitespace(&mut self) { + while let Some(c) = self.curr() { + if c.is_whitespace() { + self.end = self.end.next(c); + } else { + break; + } + } + self.catchup(); + } + + pub fn next(&mut self) -> Result> { + self.skip_whitespace(); + if self.is_eof() { + return Ok(None); + } + + if let Some(cap) = LEX_PAT.captures(&self.text[self.end.byte..]) { + self.end = self.end.next_str(cap.get(0).unwrap().as_str()); + let sp_token = if let Some(_) = cap.name("word") { + self.make_token(Token::Word) + } else if let Some(_) = cap.name("float") { + self.make_token(Token::Float) + } else if let Some(_) = cap.name("int") { + self.make_token(Token::Int) + } else if let Some(_) = cap.name("str") { + self.make_token(Token::Str) + } else if let Some(_) = cap.name("lquote") { + self.make_token(Token::LQuote) + } else if let Some(_) = cap.name("rquote") { + self.make_token(Token::RQuote) + } else if let Some(_) = cap.name("colon") { + self.make_token(Token::Colon) + } else { + panic!( + "matched lex pattern, but did not catch this capture: {:?}", + cap + ) + }; + Ok(Some(sp_token)) + } else { + Err(SyntaxError::ExpectedGot { + expected: "word, literal, or quote".into(), + got: expected_got_char(self.curr().unwrap()), + }) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + macro_rules! assert_token { + ($lexer:expr, $token:expr) => {{ + let next = $lexer.next(); + assert!( + next.is_ok(), + "expected {:?} token, but got this error: {:?}", + $token, + next.unwrap_err() + ); + let next = next.unwrap(); + assert!(next.is_some(), "expected {:?} token, but got EOF", $token); + let next = next.unwrap(); + assert_eq!( + *next.inner(), + $token, + "expected {:?} token but got {:?} token, text {:?}", + $token, + next.inner(), + next.span().text_at($lexer.text()) + ); + next + }}; + ($lexer:expr, $token:expr, $text:expr) => {{ + let next = assert_token!($lexer, $token); + let text_got = next.text_at($lexer.text()); + assert_eq!( + text_got, $text, + "expected text {:?} but got {:?}", + $text, text_got + ); + next + }}; + } + + #[test] + fn test_word() { + let mut lexer = Lexer::new(r"a b c d foo bar baz"); + assert_token!(lexer, Token::Word, "a"); + assert_token!(lexer, Token::Word, "b"); + assert_token!(lexer, Token::Word, "c"); + assert_token!(lexer, Token::Word, "d"); + + assert_token!(lexer, Token::Word, "foo"); + assert_token!(lexer, Token::Word, "bar"); + assert_token!(lexer, Token::Word, "baz"); + + assert!(lexer.is_eof()); + } + + #[test] + fn test_numbers() { + let mut lexer = Lexer::new(r"1 12 123 9 98 987 987654321 1248 9764321 1.2 2.3"); + assert_token!(lexer, Token::Int, "1"); + assert_token!(lexer, Token::Int, "12"); + assert_token!(lexer, Token::Int, "123"); + assert_token!(lexer, Token::Int, "9"); + assert_token!(lexer, Token::Int, "98"); + assert_token!(lexer, Token::Int, "987"); + assert_token!(lexer, Token::Int, "987654321"); + assert_token!(lexer, Token::Int, "1248"); + assert_token!(lexer, Token::Int, "9764321"); + assert_token!(lexer, Token::Float, "1.2"); + assert_token!(lexer, Token::Float, "2.3"); + assert!(lexer.is_eof()); + } + + #[test] + fn test_quotes() { + let mut lexer = Lexer::new("[ ] ] ] ] [ [ [ ["); + assert_token!(lexer, Token::LQuote); + assert_token!(lexer, Token::RQuote); + assert_token!(lexer, Token::RQuote); + assert_token!(lexer, Token::RQuote); + assert_token!(lexer, Token::RQuote); + assert_token!(lexer, Token::LQuote); + assert_token!(lexer, Token::LQuote); + assert_token!(lexer, Token::LQuote); + assert_token!(lexer, Token::LQuote); + assert!(lexer.is_eof()); + } + + #[test] + fn test_colon() { + let mut lexer = Lexer::new(": :: ::: ::::"); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert_token!(lexer, Token::Colon); + assert!(lexer.is_eof()); + } +} diff --git a/src/syn/mod.rs b/src/syn/mod.rs new file mode 100644 index 0000000..0a257c8 --- /dev/null +++ b/src/syn/mod.rs @@ -0,0 +1,6 @@ +pub mod error; +pub mod lexer; +pub mod parser; +pub mod span; +pub mod token; +pub mod util; diff --git a/src/syn/parser.rs b/src/syn/parser.rs new file mode 100644 index 0000000..debed32 --- /dev/null +++ b/src/syn/parser.rs @@ -0,0 +1,5 @@ +use crate::syn::lexer::*; + +pub struct Parser<'t> { + lexer: Lexer<'t>, +} diff --git a/src/syn/span.rs b/src/syn/span.rs new file mode 100644 index 0000000..ed687ac --- /dev/null +++ b/src/syn/span.rs @@ -0,0 +1,109 @@ +use std::cmp::{Ord, Ordering, PartialOrd}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Pos { + pub source: usize, + pub line: usize, + pub col: usize, + pub byte: usize, + pub c: char, +} + +impl Pos { + pub fn new(c: char) -> Self { + Pos { + source: 0, + line: 1, + col: 1, + byte: 0, + c, + } + } + + pub fn next(self, c: char) -> Self { + let (line, col) = if self.c == '\n' { + (self.line + 1, 0) + } else { + (self.line, self.col + 1) + }; + Pos { + source: self.source + 1, + line, + col, + byte: self.byte + self.c.len_utf8(), + c, + } + } + + pub fn next_str(self, s: &str) -> Self { + let mut next = self; + for c in s.chars() { + next = next.next(c); + } + next + } +} + +impl PartialOrd for Pos { + fn partial_cmp(&self, other: &Self) -> Option { + self.source.partial_cmp(&other.source) + } +} + +impl Ord for Pos { + fn cmp(&self, other: &Self) -> Ordering { + PartialOrd::partial_cmp(self, other).unwrap() + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Span { + pub start: Pos, + pub end: Pos, +} + +impl Span { + pub fn text_at<'t>(&self, text: &'t str) -> &'t str { + &text[self.start.byte..self.end.byte] + } +} + +#[derive(Debug, Clone)] +pub struct Spanned { + span: Span, + inner: T, +} + +impl Spanned { + pub fn new(span: Span, inner: T) -> Self { + Self { span, inner } + } + + pub fn span(&self) -> Span { + self.span + } + + pub fn inner(&self) -> &T { + &self.inner + } + + pub fn inner_mut(&mut self) -> &mut T { + &mut self.inner + } + + pub fn into_inner(self) -> T { + self.inner + } + + pub fn split(&self) -> (Span, &T) { + (self.span(), self.inner()) + } + + pub fn into_split(self) -> (Span, T) { + (self.span(), self.into_inner()) + } + + pub fn text_at<'t>(&self, text: &'t str) -> &'t str { + self.span().text_at(text) + } +} diff --git a/src/syn/token.rs b/src/syn/token.rs new file mode 100644 index 0000000..092f702 --- /dev/null +++ b/src/syn/token.rs @@ -0,0 +1,29 @@ +use crate::syn::span::Spanned; + +/// Token types. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Token { + /// Word. + Word, + + /// Floating point number literal. + Float, + + /// Integer literal. + Int, + + /// String literal. + Str, + + /// Quote start. + LQuote, + + /// Quote end. + RQuote, + + /// Colon. + Colon, +} + +/// Spanned token. +pub type SpToken = Spanned; diff --git a/src/syn/util.rs b/src/syn/util.rs new file mode 100644 index 0000000..38ba1ae --- /dev/null +++ b/src/syn/util.rs @@ -0,0 +1,11 @@ +pub fn got_char_or_eof(got: Option) -> String { + if let Some(got) = got { + expected_got_char(got) + } else { + "EOF".to_string() + } +} + +pub fn expected_got_char(c: char) -> String { + format!("character {}", c.escape_debug()) +}