Initial commit WIP

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2021-12-21 11:29:59 -08:00
commit 946a927b09
12 changed files with 960 additions and 0 deletions

34
src/main.rs Normal file
View File

@@ -0,0 +1,34 @@
mod object;
mod syn;
use std::io::Read;
use std::path::PathBuf;
use structopt::StructOpt;
use syn::lexer::Lexer;
#[derive(Debug, StructOpt)]
struct Opt {
#[structopt(name = "PATH", parse(from_os_str))]
path: Option<PathBuf>,
}
type Result<T = (), E = Box<dyn std::error::Error>> = std::result::Result<T, E>;
fn main() -> Result {
let opt = Opt::from_args();
let text = if let Some(path) = opt.path.as_ref() {
std::fs::read_to_string(path)?
} else {
let mut input = String::new();
std::io::stdin().read_to_string(&mut input)?;
input
};
let mut lexer = Lexer::new(&text);
while let Some(token) = lexer.next()? {
println!("{:?}", token);
}
Ok(())
}

245
src/object.rs Normal file
View File

@@ -0,0 +1,245 @@
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::rc::{Rc, Weak};
pub type Str = String;
pub type Int = i64;
pub type Float = f64;
#[derive(Debug, Clone)]
pub enum Value {
Array(Vec<Value>),
Float(Float),
Int(Int),
Str(String),
Obj(ObjPtr),
}
#[derive(Debug, Clone)]
pub struct ObjPtr {
arena: Weak<Arena>,
slot: usize,
}
#[derive(Debug)]
pub struct Obj {
vtable: HashMap<String, Value>,
}
#[derive(Debug)]
pub struct Arena {
slots: Vec<SlotRange>,
slots_dirty: bool,
objects: BTreeMap<usize, RefCell<Obj>>,
max_size: Option<usize>,
}
impl Arena {
pub fn new(max_size: Option<usize>) -> Self {
Arena {
slots: vec![SlotRange::Open(0)],
slots_dirty: false,
objects: Default::default(),
max_size,
}
}
/// Compress the slots in the arena.
///
/// This will:
/// 1. Sort all slots by their starting position,
/// 2. Merge all slots where necessary.
pub fn compress_slots(&mut self) {
use SlotRange::*;
self.slots.sort_by(|a, b| match (a, b) {
(Range(s1, _), Range(s2, _)) => s1.cmp(s2),
(Open(o1), Open(o2)) => o1.cmp(o2),
(Range(_, _), Open(_)) => std::cmp::Ordering::Less,
(Open(_), Range(_, _)) => std::cmp::Ordering::Greater,
});
let mut slots: Vec<SlotRange> = Vec::with_capacity(self.slots.len());
for slot in &self.slots {
match slot {
// Remove invalid slots
Range(start, end) if start > end => continue,
_ => {}
}
if let Some(last) = slots.last().copied() {
if let Some(merged) = last.try_merge(slot) {
slots.pop();
slots.push(merged);
} else {
slots.push(*slot);
}
} else {
slots.push(*slot);
}
}
self.slots = slots;
}
pub fn obj_new(self: &mut Rc<Self>, obj: Obj) -> Option<ObjPtr> {
use SlotRange::*;
// Compress if necessary
if !self.slots_dirty {
let self_mut = Rc::get_mut(self).expect("could not get arena mutably from Rc pointer");
self_mut.compress_slots();
}
if self
.max_size
.map(|max_size| self.objects.len() >= max_size)
.unwrap_or(false)
{
// TODO : return err instead of option
// Could not allocate a new object because of configuration
return None;
}
// Get the next slot
let self_mut = Rc::get_mut(self).expect("could not get arena mutably from Rc pointer");
let slots = &mut self_mut.slots;
let slot = match slots.first().copied().unwrap() {
Range(start, end) => {
if start == end {
slots.remove(0);
} else {
slots[0] = Range(start + 1, end);
}
start
}
Open(index) => {
slots[0] = Open(index + 1);
index
}
};
Some(ObjPtr {
arena: Rc::downgrade(self),
slot,
})
}
pub fn free_obj(&mut self, obj_ptr: ObjPtr) {
// Compress if necessary
if !self.slots_dirty {
self.compress_slots();
}
self.slots
.push(SlotRange::Range(obj_ptr.slot, obj_ptr.slot));
self.slots_dirty = true; // not my problem
}
}
#[test]
fn test_arena_compress_slots() {
use SlotRange::*;
let tests = [
(vec![Range(0, 4), Range(2, 6), Open(0)], vec![Open(0)]),
(vec![Open(7), Range(0, 4), Range(2, 6)], vec![Open(0)]),
(
vec![Open(8), Range(0, 4), Range(2, 6)],
vec![Range(0, 6), Open(8)],
),
(vec![Range(0, 4), Range(2, 6)], vec![Range(0, 6)]),
(vec![Range(0, 4), Range(2, 6)], vec![Range(0, 6)]),
(
vec![Range(0, 1), Range(2, 2), Range(3, 4)],
vec![Range(0, 4)],
),
(
vec![Range(0, 4), Range(3, 4), Range(2, 2)],
vec![Range(0, 4)],
),
(
vec![Range(3, 6), Range(0, 4), Range(2, 2)],
vec![Range(0, 6)],
),
(vec![Range(0, 6), Range(6, 6)], vec![Range(0, 6)]),
(vec![Range(7, 6), Range(6, 6)], vec![Range(6, 6)]),
];
for (slots, expected) in tests {
let mut arena = Arena {
slots,
slots_dirty: true,
objects: Default::default(),
max_size: None,
};
arena.compress_slots();
assert_eq!(arena.slots, expected);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SlotRange {
/// A list of slots that are in a range, inclusive.
Range(usize, usize),
/// Everything from here and onward is open
Open(usize),
}
impl SlotRange {
/// Try to merge this range with another range.
///
/// If these ranges don't overlap one another, then None is returned.
///
/// If a range is open, while the other is closed, the open range will take
/// precedent over the closed range.
///
/// If both ranges are open, the smaller range is given precedent.
pub fn try_merge(&self, other: &SlotRange) -> Option<SlotRange> {
use SlotRange::*;
match (self, other) {
(Range(s1, e1), Range(s2, e2)) if (e1 + 1) >= *s2 => {
let start = s1.min(s2);
let end = e1.max(e2);
Some(Range(*start, *end))
}
(Range(s, e), Open(o)) | (Open(o), Range(s, e)) => {
if s <= o && (e + 1) >= *o {
Some(Open(*s))
} else if s >= o {
Some(Open(*o))
} else {
None
}
}
(Open(o1), Open(o2)) => Some(Open(*o1.min(o2))),
_ => None,
}
}
}
#[test]
fn test_slot_range_merge() {
use SlotRange::*;
let tests = [
(Range(0, 4), Range(0, 5), Some(Range(0, 5))),
(Range(1, 4), Range(0, 5), Some(Range(0, 5))),
(Range(0, 4), Range(1, 5), Some(Range(0, 5))),
//
(Range(0, 5), Range(0, 3), Some(Range(0, 5))),
(Range(0, 5), Range(1, 3), Some(Range(0, 5))),
(Range(1, 5), Range(0, 4), Some(Range(0, 5))),
//
(Range(0, 4), Open(4), Some(Open(0))),
(Range(1, 4), Open(4), Some(Open(1))),
(Range(2, 4), Open(4), Some(Open(2))),
//
(Range(0, 3), Range(4, 4), Some(Range(0, 4))),
(Range(0, 4), Range(6, 6), None),
//
(Range(0, 4), Range(2, 2), Some(Range(0, 4))),
];
for (a, b, expected) in tests {
assert_eq!(
a.try_merge(&b),
expected,
"expected merge of {:?} and {:?} to be {:?}",
a,
b,
expected
);
}
}

12
src/syn/error.rs Normal file
View File

@@ -0,0 +1,12 @@
use thiserror::Error;
#[derive(Error, Debug)]
pub enum SyntaxError {
#[error("unexpected {0}")]
Unexpected(String),
#[error("expected {expected}, but got {got}")]
ExpectedGot { expected: String, got: String },
}
pub type Result<T, E = SyntaxError> = std::result::Result<T, E>;

216
src/syn/lexer.rs Normal file
View File

@@ -0,0 +1,216 @@
use crate::syn::{error::*, span::*, token::*, util::*};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
lazy_static! {
static ref LEX_PAT: Regex = RegexBuilder::new(
r#"^(
(?P<word>[a-zA-Z_?\-*+/=.'@$%^&|][0-9a-zA-Z_?\-*+/=.'@$%^&|]*)
| (?P<float>[0-9]+\.[0-9]+([eE][+\-][0-9]+)?)
| (?P<int>[0-9]+)
| (?P<lquote>\[)
| (?P<rquote>\])
| (?P<colon>:)
)"#
)
.ignore_whitespace(true)
.build()
.unwrap();
}
/// Lexes things.
pub struct Lexer<'t> {
text: &'t str,
start: Pos,
end: Pos,
}
impl<'t> Lexer<'t> {
pub fn new(text: &'t str) -> Self {
let c = text.chars().next().unwrap_or('\0');
Self {
text,
start: Pos::new(c),
end: Pos::new(c),
}
}
pub fn text(&self) -> &'t str {
self.text
}
pub fn is_eof(&self) -> bool {
self.curr().is_none()
}
pub fn curr(&self) -> Option<char> {
if self.end.byte < self.text.as_bytes().len() {
self.text[self.end.byte..].chars().next()
} else {
None
}
}
fn catchup(&mut self) -> Span {
let start = std::mem::replace(&mut self.start, self.end);
Span {
start,
end: self.end,
}
}
fn make_token(&mut self, token: Token) -> SpToken {
let span = self.catchup();
SpToken::new(span, token)
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.curr() {
if c.is_whitespace() {
self.end = self.end.next(c);
} else {
break;
}
}
self.catchup();
}
pub fn next(&mut self) -> Result<Option<SpToken>> {
self.skip_whitespace();
if self.is_eof() {
return Ok(None);
}
if let Some(cap) = LEX_PAT.captures(&self.text[self.end.byte..]) {
self.end = self.end.next_str(cap.get(0).unwrap().as_str());
let sp_token = if let Some(_) = cap.name("word") {
self.make_token(Token::Word)
} else if let Some(_) = cap.name("float") {
self.make_token(Token::Float)
} else if let Some(_) = cap.name("int") {
self.make_token(Token::Int)
} else if let Some(_) = cap.name("str") {
self.make_token(Token::Str)
} else if let Some(_) = cap.name("lquote") {
self.make_token(Token::LQuote)
} else if let Some(_) = cap.name("rquote") {
self.make_token(Token::RQuote)
} else if let Some(_) = cap.name("colon") {
self.make_token(Token::Colon)
} else {
panic!(
"matched lex pattern, but did not catch this capture: {:?}",
cap
)
};
Ok(Some(sp_token))
} else {
Err(SyntaxError::ExpectedGot {
expected: "word, literal, or quote".into(),
got: expected_got_char(self.curr().unwrap()),
})
}
}
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! assert_token {
($lexer:expr, $token:expr) => {{
let next = $lexer.next();
assert!(
next.is_ok(),
"expected {:?} token, but got this error: {:?}",
$token,
next.unwrap_err()
);
let next = next.unwrap();
assert!(next.is_some(), "expected {:?} token, but got EOF", $token);
let next = next.unwrap();
assert_eq!(
*next.inner(),
$token,
"expected {:?} token but got {:?} token, text {:?}",
$token,
next.inner(),
next.span().text_at($lexer.text())
);
next
}};
($lexer:expr, $token:expr, $text:expr) => {{
let next = assert_token!($lexer, $token);
let text_got = next.text_at($lexer.text());
assert_eq!(
text_got, $text,
"expected text {:?} but got {:?}",
$text, text_got
);
next
}};
}
#[test]
fn test_word() {
let mut lexer = Lexer::new(r"a b c d foo bar baz");
assert_token!(lexer, Token::Word, "a");
assert_token!(lexer, Token::Word, "b");
assert_token!(lexer, Token::Word, "c");
assert_token!(lexer, Token::Word, "d");
assert_token!(lexer, Token::Word, "foo");
assert_token!(lexer, Token::Word, "bar");
assert_token!(lexer, Token::Word, "baz");
assert!(lexer.is_eof());
}
#[test]
fn test_numbers() {
let mut lexer = Lexer::new(r"1 12 123 9 98 987 987654321 1248 9764321 1.2 2.3");
assert_token!(lexer, Token::Int, "1");
assert_token!(lexer, Token::Int, "12");
assert_token!(lexer, Token::Int, "123");
assert_token!(lexer, Token::Int, "9");
assert_token!(lexer, Token::Int, "98");
assert_token!(lexer, Token::Int, "987");
assert_token!(lexer, Token::Int, "987654321");
assert_token!(lexer, Token::Int, "1248");
assert_token!(lexer, Token::Int, "9764321");
assert_token!(lexer, Token::Float, "1.2");
assert_token!(lexer, Token::Float, "2.3");
assert!(lexer.is_eof());
}
#[test]
fn test_quotes() {
let mut lexer = Lexer::new("[ ] ] ] ] [ [ [ [");
assert_token!(lexer, Token::LQuote);
assert_token!(lexer, Token::RQuote);
assert_token!(lexer, Token::RQuote);
assert_token!(lexer, Token::RQuote);
assert_token!(lexer, Token::RQuote);
assert_token!(lexer, Token::LQuote);
assert_token!(lexer, Token::LQuote);
assert_token!(lexer, Token::LQuote);
assert_token!(lexer, Token::LQuote);
assert!(lexer.is_eof());
}
#[test]
fn test_colon() {
let mut lexer = Lexer::new(": :: ::: ::::");
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert_token!(lexer, Token::Colon);
assert!(lexer.is_eof());
}
}

6
src/syn/mod.rs Normal file
View File

@@ -0,0 +1,6 @@
pub mod error;
pub mod lexer;
pub mod parser;
pub mod span;
pub mod token;
pub mod util;

5
src/syn/parser.rs Normal file
View File

@@ -0,0 +1,5 @@
use crate::syn::lexer::*;
pub struct Parser<'t> {
lexer: Lexer<'t>,
}

109
src/syn/span.rs Normal file
View File

@@ -0,0 +1,109 @@
use std::cmp::{Ord, Ordering, PartialOrd};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Pos {
pub source: usize,
pub line: usize,
pub col: usize,
pub byte: usize,
pub c: char,
}
impl Pos {
pub fn new(c: char) -> Self {
Pos {
source: 0,
line: 1,
col: 1,
byte: 0,
c,
}
}
pub fn next(self, c: char) -> Self {
let (line, col) = if self.c == '\n' {
(self.line + 1, 0)
} else {
(self.line, self.col + 1)
};
Pos {
source: self.source + 1,
line,
col,
byte: self.byte + self.c.len_utf8(),
c,
}
}
pub fn next_str(self, s: &str) -> Self {
let mut next = self;
for c in s.chars() {
next = next.next(c);
}
next
}
}
impl PartialOrd for Pos {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.source.partial_cmp(&other.source)
}
}
impl Ord for Pos {
fn cmp(&self, other: &Self) -> Ordering {
PartialOrd::partial_cmp(self, other).unwrap()
}
}
#[derive(Debug, Clone, Copy)]
pub struct Span {
pub start: Pos,
pub end: Pos,
}
impl Span {
pub fn text_at<'t>(&self, text: &'t str) -> &'t str {
&text[self.start.byte..self.end.byte]
}
}
#[derive(Debug, Clone)]
pub struct Spanned<T> {
span: Span,
inner: T,
}
impl<T> Spanned<T> {
pub fn new(span: Span, inner: T) -> Self {
Self { span, inner }
}
pub fn span(&self) -> Span {
self.span
}
pub fn inner(&self) -> &T {
&self.inner
}
pub fn inner_mut(&mut self) -> &mut T {
&mut self.inner
}
pub fn into_inner(self) -> T {
self.inner
}
pub fn split(&self) -> (Span, &T) {
(self.span(), self.inner())
}
pub fn into_split(self) -> (Span, T) {
(self.span(), self.into_inner())
}
pub fn text_at<'t>(&self, text: &'t str) -> &'t str {
self.span().text_at(text)
}
}

29
src/syn/token.rs Normal file
View File

@@ -0,0 +1,29 @@
use crate::syn::span::Spanned;
/// Token types.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Token {
/// Word.
Word,
/// Floating point number literal.
Float,
/// Integer literal.
Int,
/// String literal.
Str,
/// Quote start.
LQuote,
/// Quote end.
RQuote,
/// Colon.
Colon,
}
/// Spanned token.
pub type SpToken = Spanned<Token>;

11
src/syn/util.rs Normal file
View File

@@ -0,0 +1,11 @@
pub fn got_char_or_eof(got: Option<char>) -> String {
if let Some(got) = got {
expected_got_char(got)
} else {
"EOF".to_string()
}
}
pub fn expected_got_char(c: char) -> String {
format!("character {}", c.escape_debug())
}