From 16f3dc960c6d4ade17c97b117e5ce14487c14074 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Fri, 20 Sep 2024 16:04:30 -0700 Subject: [PATCH] Base initial commit Still WIP, working on object system still, which in Rust, makes me want to kill myself Signed-off-by: Alek Ratzloff --- Cargo.lock | 272 ++++++++++++ Cargo.toml | 10 + src/ast.rs | 289 ++++++++++++ src/builtins.rs | 13 + src/compiler.rs | 988 +++++++++++++++++++++++++++++++++++++++++ src/disassemble.rs | 160 +++++++ src/main.rs | 70 +++ src/obj.rs | 1059 ++++++++++++++++++++++++++++++++++++++++++++ src/parser.rs | 801 +++++++++++++++++++++++++++++++++ src/token.rs | 67 +++ src/vm.rs | 350 +++++++++++++++ 11 files changed, 4079 insertions(+) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/ast.rs create mode 100644 src/builtins.rs create mode 100644 src/compiler.rs create mode 100644 src/disassemble.rs create mode 100644 src/main.rs create mode 100644 src/obj.rs create mode 100644 src/parser.rs create mode 100644 src/token.rs create mode 100644 src/vm.rs diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..39e863c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,272 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + +[[package]] +name = "clap" +version = "4.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "common_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6d59c71e7dc3af60f0af9db32364d96a16e9310f3f5db2b55ed642162dd35" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "not-python-rs" +version = "0.1.0" +dependencies = [ + "assert_matches", + "clap", + "common_macros", + "thiserror", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..36c1228 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "not-python-rs" +version = "0.1.0" +edition = "2021" + +[dependencies] +assert_matches = "1.5.0" +clap = { version = "4.5.8", features = ["derive"] } +common_macros = "0.1.1" +thiserror = "1.0.63" diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..e0ee845 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,289 @@ +// This is an auto-generated file. Any changes made to this file may be overwritten. +// This file was created at: 2024-09-18 09:28:21 +#![allow(dead_code)] +use std::fmt::Debug; +use std::any::Any; + +use crate::token::Token; + +pub trait ExprVisitor { + fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<(), Box>; + fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<(), Box>; + fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<(), Box>; + fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<(), Box>; + fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<(), Box>; + fn visit_function_expr(&mut self, expr: &FunctionExpr) -> Result<(), Box>; +} + +pub trait StmtVisitor { + fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<(), Box>; + fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<(), Box>; + fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<(), Box>; + fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<(), Box>; + fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<(), Box>; + fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<(), Box>; +} + +pub trait Expr: Debug + Any { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>; + fn as_any(self: Box) -> Box; + fn as_any_ref(&self) -> &dyn Any; +} + +pub type ExprP = Box; + +#[derive(Debug)] +pub struct BinaryExpr { + pub lhs: ExprP, + pub op: Token, + pub rhs: ExprP, +} + +impl Expr for BinaryExpr { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>{ + visitor.visit_binary_expr(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct UnaryExpr { + pub op: Token, + pub expr: ExprP, +} + +impl Expr for UnaryExpr { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>{ + visitor.visit_unary_expr(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct CallExpr { + pub expr: ExprP, + pub args: Vec, + pub rparen: Token, +} + +impl Expr for CallExpr { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>{ + visitor.visit_call_expr(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct GetExpr { + pub expr: ExprP, + pub name: Token, +} + +impl Expr for GetExpr { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>{ + visitor.visit_get_expr(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct PrimaryExpr { + pub token: Token, +} + +impl Expr for PrimaryExpr { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>{ + visitor.visit_primary_expr(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct FunctionExpr { + pub lparen: Token, + pub params: Vec<(Token , Option)>, + pub return_type: Option, + pub body: Vec, + pub rbrace: Token, +} + +impl Expr for FunctionExpr { + fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box>{ + visitor.visit_function_expr(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +pub trait Stmt: Debug + Any { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>; + fn as_any(self: Box) -> Box; + fn as_any_ref(&self) -> &dyn Any; +} + +pub type StmtP = Box; + +#[derive(Debug)] +pub struct ExprStmt { + pub expr: ExprP, +} + +impl Stmt for ExprStmt { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>{ + visitor.visit_expr_stmt(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct AssignStmt { + pub lhs: Token, + pub rhs: ExprP, +} + +impl Stmt for AssignStmt { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>{ + visitor.visit_assign_stmt(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct SetStmt { + pub expr: ExprP, + pub name: Token, + pub rhs: ExprP, +} + +impl Stmt for SetStmt { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>{ + visitor.visit_set_stmt(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct BlockStmt { + pub lbrace: Token, + pub stmts: Vec, + pub rbrace: Token, +} + +impl Stmt for BlockStmt { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>{ + visitor.visit_block_stmt(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct ReturnStmt { + pub return_kw: Token, + pub expr: Option, +} + +impl Stmt for ReturnStmt { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>{ + visitor.visit_return_stmt(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + +#[derive(Debug)] +pub struct IfStmt { + pub if_kw: Token, + pub condition: ExprP, + pub then_branch: BlockStmt, + pub else_branch: Vec, +} + +impl Stmt for IfStmt { + fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box>{ + visitor.visit_if_stmt(self) + } + + fn as_any(self: Box) -> Box { + self + } + + fn as_any_ref(&self) -> &dyn Any { + self + } +} + diff --git a/src/builtins.rs b/src/builtins.rs new file mode 100644 index 0000000..76bdf40 --- /dev/null +++ b/src/builtins.rs @@ -0,0 +1,13 @@ +//! Builtin functions. +use crate::obj::{NilInst, ObjP}; +use crate::vm::Vm; + +pub(crate) fn println(_vm: &mut Vm, args: Vec) -> ObjP { + println!("{}", args[0].try_read().unwrap()); + NilInst::create() +} + +pub(crate) fn print(_vm: &mut Vm, args: Vec) -> ObjP { + print!("{}", args[0].try_read().unwrap()); + NilInst::create() +} diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 0000000..a893804 --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,988 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{self, Display}; +use std::sync::{Arc, LazyLock}; + +use assert_matches::assert_matches; +use common_macros::hash_map; +use thiserror::Error; + +use crate::ast::*; +use crate::obj::*; +use crate::token::TokenKind; +use crate::vm::*; + +pub type Result = std::result::Result>; + +//////////////////////////////////////////////////////////////////////////////// +// LineNumber visitor +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Default)] +struct LineNumber { + lock_start: bool, + start: usize, + end: usize, +} + +impl LineNumber { + fn update_start(&mut self, start: usize) { + if !self.lock_start { + self.start = start; + self.lock_start = true; + } + } + + fn update_end(&mut self, end: usize) { + self.end = end; + } +} + +impl StmtVisitor for LineNumber { + fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> { + stmt.expr.accept(self).unwrap(); + Ok(()) + } + + fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> { + self.update_start(stmt.lhs.line); + stmt.rhs.accept(self).unwrap(); + Ok(()) + } + + fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> { + stmt.expr.accept(self).unwrap(); + stmt.rhs.accept(self).unwrap(); + Ok(()) + } + + fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> { + self.update_start(stmt.lbrace.line); + self.update_end(stmt.rbrace.line); + Ok(()) + } + fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> { + self.update_start(stmt.return_kw.line); + self.update_end(stmt.return_kw.line); + if let Some(expr) = stmt.expr.as_ref() { + expr.accept(self).unwrap(); + } + Ok(()) + } + fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> { + self.update_start(stmt.if_kw.line); + stmt.condition.accept(self).unwrap(); + stmt.then_branch.accept(self).unwrap(); + for stmt in &stmt.else_branch { + stmt.accept(self).unwrap(); + } + Ok(()) + } +} + +impl ExprVisitor for LineNumber { + fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> { + expr.lhs.accept(self).unwrap(); + expr.rhs.accept(self).unwrap(); + Ok(()) + } + + fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> { + self.update_start(expr.op.line); + expr.expr.accept(self).unwrap(); + Ok(()) + } + + fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> { + expr.expr.accept(self).unwrap(); + self.update_end(expr.rparen.line); + Ok(()) + } + + fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> { + expr.expr.accept(self).unwrap(); + self.update_end(expr.name.line); + Ok(()) + } + + fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<()> { + self.update_start(expr.token.line); + self.update_end(expr.token.line); + Ok(()) + } + + fn visit_function_expr(&mut self, expr: &FunctionExpr) -> Result<()> { + self.update_start(expr.lparen.line); + self.update_end(expr.rbrace.line); + Ok(()) + } +} + +fn expr_line_number(expr: &dyn Expr) -> LineRange { + let mut line_number = LineNumber::default(); + expr.accept(&mut line_number).unwrap(); + (line_number.start, line_number.end) +} + +fn stmt_line_number(stmt: &dyn Stmt) -> LineRange { + let mut line_number = LineNumber::default(); + stmt.accept(&mut line_number).unwrap(); + (line_number.start, line_number.end) +} + +//////////////////////////////////////////////////////////////////////////////// +// LocalAssignCollector and LocalNameCollector +//////////////////////////////////////////////////////////////////////////////// + +// TODO - reduce copy/paste stuff here? + +#[derive(Default)] +struct LocalAssignCollector { + names: HashSet, +} + +impl LocalAssignCollector { + fn collect(body: &Vec) -> HashSet { + let mut collector = Self::default(); + for stmt in body { + stmt.accept(&mut collector).unwrap(); + } + collector.names + } +} + +impl StmtVisitor for LocalAssignCollector { + fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> { + stmt.expr.accept(self)?; + Ok(()) + } + + fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> { + self.names.insert(stmt.lhs.text.to_string()); + Ok(()) + } + + fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> { + stmt.expr.accept(self)?; + stmt.rhs.accept(self)?; + Ok(()) + } + + fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> { + // we visit the block statement because even though it goes below the current "local" + // scope, we're ultimately trying to get a list of ALL local names that are assigned to in + // this scope. + // FIXME BUG this does create some weirdness, for example take this: + // outer_function = () { + // some_value = 1234 + // inner_function = () { + // { + // # this is a local value because we're assigning to it + // some_value = 5678 + // } + // # our local named "some_value" has gone out of scope, so hypothetically we + // # should be using the "some_value" that was defined in the scope above us. + // # however, since we're collecting local assignments in all blocks, this should + // # error out as "unknown local 'some_value'" + // println(some_value) + // } + // return inner_function + // } + // + // Ideally, we would be checking nonlocals with every new scope layer, and every new block. + // This is a pretty tough bug to solve with how things are set up right now. not sure how + // we'll go about solving this one. + for stmt in &stmt.stmts { + stmt.accept(self)?; + } + Ok(()) + } + + fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> { + if let Some(expr) = stmt.expr.as_ref() { + expr.accept(self)?; + } + Ok(()) + } + + fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> { + stmt.condition.accept(self)?; + stmt.then_branch.accept(self)?; + for stmt in &stmt.else_branch { + stmt.accept(self)?; + } + Ok(()) + } +} + +impl ExprVisitor for LocalAssignCollector { + fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> { + expr.lhs.accept(self)?; + expr.rhs.accept(self)?; + Ok(()) + } + + fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> { + expr.expr.accept(self)?; + Ok(()) + } + + fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> { + expr.expr.accept(self)?; + Ok(()) + } + + fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> { + expr.expr.accept(self)?; + Ok(()) + } + + fn visit_primary_expr(&mut self, _expr: &PrimaryExpr) -> Result<()> { + Ok(()) + } + + fn visit_function_expr(&mut self, _expr: &FunctionExpr) -> Result<()> { + // don't visit function expr, we're only collecting local assigns + Ok(()) + } +} + +#[derive(Default)] +struct LocalNameCollector { + names: HashSet, +} + +impl LocalNameCollector { + fn collect(body: &Vec) -> HashSet { + let mut collector = Self::default(); + for stmt in body { + stmt.accept(&mut collector).unwrap(); + } + collector.names + } +} + +impl StmtVisitor for LocalNameCollector { + fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> { + stmt.expr.accept(self)?; + Ok(()) + } + + fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> { + stmt.rhs.accept(self)?; + Ok(()) + } + + fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> { + stmt.expr.accept(self)?; + stmt.rhs.accept(self)?; + Ok(()) + } + + fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> { + for stmt in &stmt.stmts { + stmt.accept(self)?; + } + Ok(()) + } + + fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> { + if let Some(expr) = stmt.expr.as_ref() { + expr.accept(self)?; + } + Ok(()) + } + + fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> { + stmt.condition.accept(self)?; + stmt.then_branch.accept(self)?; + for stmt in &stmt.else_branch { + stmt.accept(self)?; + } + Ok(()) + } +} + +impl ExprVisitor for LocalNameCollector { + fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> { + expr.lhs.accept(self)?; + expr.rhs.accept(self)?; + Ok(()) + } + + fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> { + expr.expr.accept(self)?; + Ok(()) + } + + fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> { + expr.expr.accept(self)?; + Ok(()) + } + + fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> { + expr.expr.accept(self)?; + Ok(()) + } + + fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<()> { + if expr.token.kind == TokenKind::Name { + self.names.insert(expr.token.text.to_string()); + } + Ok(()) + } + + fn visit_function_expr(&mut self, _expr: &FunctionExpr) -> Result<()> { + // don't visit function expr, we're only collecting local assigns + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Misc +//////////////////////////////////////////////////////////////////////////////// + +fn unescape(s: &str) -> String { + s.chars() + .skip(1) + .take(s.len() - 2) // first and last chars are guaranteed to be 1 byte long + .collect::() + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\\"", "\"") + .replace("\\\'", "\'") + .replace("\\\\", "\\") +} + +//////////////////////////////////////////////////////////////////////////////// +// Scope +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, PartialEq)] +enum ScopeKind { + Local, + Function, + //Class, +} + +#[derive(Debug)] +struct Scope { + kind: ScopeKind, + scope: Vec, +} + +impl Scope { + pub fn new(kind: ScopeKind) -> Self { + Self { + kind, + scope: Default::default(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// CompileError +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub struct CompileError { + pub line: Option, + pub message: String, +} + +impl Display for CompileError { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + if let Some(line) = &self.line { + write!(fmt, "line {:?}: {}", line, self.message) + } else { + write!(fmt, "{}", self.message) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Compiler +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct Compiler { + chunks: Vec, + scopes: Vec, + constants: Vec, + globals: Vec, +} + +impl Default for Compiler { + fn default() -> Self { + Compiler { + chunks: Default::default(), + scopes: Default::default(), + constants: Default::default(), + globals: vec!["print".to_string(), "println".to_string()], + } + } +} + +impl Compiler { + fn chunk(&self) -> &Chunk { + self.chunks.last().expect("no chunk") + } + + fn chunk_mut(&mut self) -> &mut Chunk { + self.chunks.last_mut().expect("no chunk") + } + + fn scope(&self) -> &Scope { + self.scopes.last().expect("no scope") + } + + fn scope_mut(&mut self) -> &mut Scope { + self.scopes.last_mut().expect("no scope") + } + + fn is_global_scope(&self) -> bool { + self.scopes.is_empty() + } + + /// Compiles a body of code. + /// + /// This returns a tuple of `Chunk`, the constants table, and the list of globals. + pub fn compile(mut self, body: &Vec) -> Result<(Chunk, Vec, Vec)> { + self.chunks.push(Chunk::default()); + + for stmt in body { + self.compile_stmt(stmt)?; + } + + // add halt instruction with last line, if any + let mut last_line = (0, 0); + if let Some(last) = body.last() { + last_line = stmt_line_number(last.as_ref()); + } + self.emit(last_line, Op::Halt); + + let chunk = self.chunks.pop().expect("no chunk"); + + Ok((chunk, self.constants, self.globals)) + } + + fn compile_stmt(&mut self, stmt: &StmtP) -> Result<()> { + stmt.accept(self) + } + + fn compile_expr(&mut self, expr: &ExprP) -> Result<()> { + expr.accept(self) + } + + fn insert_constant(&mut self, constant: ObjP) -> Result { + // simple interning - try to find a constant that is exactly equal to this one and just + // return its value instead + for (index, interned) in self.constants.iter().enumerate() { + if constant + .try_read() + .unwrap() + .equals(&*interned.try_read().unwrap()) + { + return Ok(index as ConstantId); + } + } + + let index = self.constants.len(); + if index > (ConstantId::MAX as usize) { + return Err(CompileError { + line: None, + message: format!("too many constants (maximum {})", ConstantId::MAX), + } + .into()); + } + self.constants.push(constant); + Ok(index as ConstantId) + } + + fn get_global(&self, name: &str) -> Option { + self.globals + .iter() + .position(|global| global == &name) + .map(|id| id as GlobalId) + } + + fn insert_global(&mut self, name: &str) -> Result { + if let Some(id) = self.get_global(name) { + return Ok(id); + } + + let index = self.globals.len(); + if index > (GlobalId::MAX as usize) { + return Err(CompileError { + line: None, + message: format!("too many globals (maximum {})", GlobalId::MAX), + } + .into()); + } + + self.globals.push(name.to_string()); + Ok(index as GlobalId) + } + + /// Get a nonlocal binding to a variable. + /// + /// This will return how many stack frames up we should look for this nonlocal, the `Local` + /// that defines this binding. + fn get_nonlocal(&self, name: &str) -> Option<(FrameDepth, &Local)> { + let mut is_local = true; + let mut depth = 0; + for scope in self.scopes.iter().rev() { + if scope.kind == ScopeKind::Function { + // no longer inside the local scope + if is_local { + is_local = false; + continue; + } + // increase stack frame search + depth += 1; + } + // skip local variables + if is_local { + continue; + } + // outside of the local scope, check if we hvae defined the sought-after name + for local in &scope.scope { + if local.name == name { + return Some((depth, local)); + } + } + } + None + } + + fn get_local(&self, name: &str) -> Option<&Local> { + for scope in self.scopes.iter().rev() { + for local in &scope.scope { + if local.name == name { + return Some(local); + } + } + if scope.kind == ScopeKind::Function { + break; + } + } + None + } + + fn insert_local(&mut self, name: String) -> Result<&Local> { + let index = self.chunk().locals.len(); + if index > (LocalIndex::MAX as usize) { + return Err(CompileError { + line: None, + message: format!("too many locals (maximum: {})", LocalIndex::MAX), + } + .into()); + } + let mut local = Local { + slot: 0, + index: index as LocalIndex, + name, + }; + + // get the last allocated slot + for scope in self.scopes.iter().rev() { + if scope.scope.len() == 0 { + if scope.kind == ScopeKind::Function { + // don't go above the current function's scope (which was just determined to be + // empty) + break; + } + continue; + } + // get the last allocated slot and increment by one + let last = &scope.scope.last().unwrap(); + if last.slot == LocalSlot::MAX { + return Err(CompileError { + line: None, + message: format!( + "too many stack slots used by locals(maximum: {})", + LocalSlot::MAX + ), + } + .into()); + } + local.slot = last.slot + 1; + break; + } + + self.scope_mut().scope.push(local.clone()); + self.chunk_mut().locals.push(local); + Ok(self.scope().scope.last().unwrap()) + } + + fn begin_scope(&mut self, kind: ScopeKind) { + self.scopes.push(Scope::new(kind)); + } + + fn end_scope(&mut self, line: LineRange) { + let scope = self.scopes.pop().expect("no scope"); + for _local in scope.scope { + self.emit(line, Op::Pop); + } + } + + fn emit(&mut self, line: LineRange, op: Op) { + let chunk = self.chunk_mut(); + chunk.code.push(op); + chunk.lines.push(line); + } +} + +impl StmtVisitor for Compiler { + fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> { + self.compile_expr(&stmt.expr)?; + self.emit(stmt_line_number(stmt), Op::Pop); + Ok(()) + } + + fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> { + let name = &stmt.lhs.text; + if self.is_global_scope() { + let global = self.insert_global(name)?; + self.compile_expr(&stmt.rhs)?; + self.emit(stmt_line_number(stmt), Op::SetGlobal(global)); + } else { + let mut declare = false; + let local = if let Some(local) = self.get_local(name) { + local + } else { + declare = true; + self.insert_local(name.to_string())? + } + .clone(); // gotta clone so we can borrow self as mutable for compile_expr + self.compile_expr(&stmt.rhs)?; + if !declare { + self.emit(stmt_line_number(stmt), Op::SetLocal(local.index)); + } + } + + // If the last value that was assigned to is a function, set its name here + // TODO - maybe this would be smarter to set up in the AST. I'm 99% sure that the last + // object created, if it were a function object, will be what we're assigning it to, but I + // want to be 100% sure instead of 99%. + let mut obj = self.constants.last_mut().unwrap().try_write().unwrap(); + if let Some(fun) = obj.as_any_mut().downcast_mut::() { + fun.set_name(Arc::new(name.to_string())); + } + + Ok(()) + } + + fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> { + self.compile_expr(&stmt.expr)?; + let name = self.insert_constant(StrInst::create(&stmt.name.text))?; + self.compile_expr(&stmt.rhs)?; + self.emit(stmt_line_number(stmt), Op::SetAttr(name)); + Ok(()) + } + + fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> { + self.begin_scope(ScopeKind::Local); + for s in &stmt.stmts { + self.compile_stmt(s)?; + } + self.end_scope((stmt.rbrace.line, stmt.rbrace.line)); + Ok(()) + } + + fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> { + if let Some(expr) = &stmt.expr { + self.compile_expr(expr)?; + } else { + let nil = self.insert_constant(NilInst::create())?; + self.emit(stmt_line_number(stmt), Op::PushConstant(nil)); + } + Ok(()) + } + fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> { + // condition + self.compile_expr(&stmt.condition)?; + // call obj.__bool__() + let bool_attr = self.insert_constant(StrInst::create("__bool__"))?; + self.emit(expr_line_number(&*stmt.condition), Op::GetAttr(bool_attr)); + self.emit(expr_line_number(&*stmt.condition), Op::Call(0)); + let condition_patch_index = self.chunk().code.len(); + self.emit(expr_line_number(&*stmt.condition), Op::JumpFalse(0)); + + // then branch + // pop the condition on top of the stack (no jump taken) + self.emit(expr_line_number(&*stmt.condition), Op::Pop); + // not using compile_stmt because then_branch isn't a pointer, it's an honest-to-goodness + // value + stmt.then_branch.accept(self)?; + let exit_patch_index = self.chunk().code.len(); + self.emit(stmt_line_number(&stmt.then_branch), Op::Jump(0)); + + // else branch + // patch the condition index - this is where the JUMP_FALSE will jump to + assert_matches!(self.chunk().code[condition_patch_index], Op::JumpFalse(_)); + let offset = self.chunk().code.len() - condition_patch_index; + assert!( + offset <= (JumpOpArg::MAX as usize), + "jump offset too large between lines {:?} - this is a compiler limitation, sorry", + stmt_line_number(&stmt.then_branch) + ); + self.chunk_mut().code[condition_patch_index] = Op::JumpFalse(offset as JumpOpArg); + + // pop the condition on top of the stack (jump taken) + self.emit(expr_line_number(&*stmt.condition), Op::Pop); + for s in &stmt.else_branch { + self.compile_stmt(s)?; + } + + // patch the "then" branch exit jump address - this is where Op::Jump will jump to. + // TODO : see if we can eliminate duplicates by checking the last two instructions + assert_matches!(self.chunk().code[exit_patch_index], Op::Jump(_)); + let offset = self.chunk().code.len() - condition_patch_index; + assert!( + offset <= (JumpOpArg::MAX as usize), + "jump offset too large between lines {:?} - this is a compiler limitation, sorry", + stmt_line_number(&stmt.then_branch) + ); + self.chunk_mut().code[exit_patch_index] = Op::Jump(offset as JumpOpArg); + + Ok(()) + } +} + +impl ExprVisitor for Compiler { + fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> { + static OP_NAMES: LazyLock> = LazyLock::new(|| { + hash_map! { + TokenKind::Plus => "__add__", + TokenKind::Minus => "__sub__", + TokenKind::Star => "__mul__", + TokenKind::Slash => "__div__", + TokenKind::And => "__and__", + TokenKind::Or => "__or__", + TokenKind::BangEq => "__ne__", + TokenKind::EqEq => "__eq__", + TokenKind::Greater => "__gt__", + TokenKind::GreaterEq => "__ge__", + TokenKind::Less => "__lt__", + TokenKind::LessEq => "__le__", + } + }); + + self.compile_expr(&expr.lhs)?; + + // short-circuit setup + let mut exit_patch_index = 0; + + if let TokenKind::And | TokenKind::Or = expr.op.kind { + let constant_id = self.insert_constant(StrInst::create("__bool__"))?; + self.emit(expr_line_number(&*expr.lhs), Op::GetAttr(constant_id)); + self.emit(expr_line_number(&*expr.lhs), Op::Call(0)); + exit_patch_index = self.chunk().code.len(); + if expr.op.kind == TokenKind::And { + self.emit((expr.op.line, expr.op.line), Op::JumpFalse(0)); + } else { + self.emit((expr.op.line, expr.op.line), Op::JumpTrue(0)); + } + } + + let name = OP_NAMES + .get(&expr.op.kind) + .expect("invalid binary operator"); + let constant_id = self.insert_constant(StrInst::create(name))?; + self.emit(expr_line_number(expr), Op::GetAttr(constant_id)); + + // convert RHS to a bool if we're doing AND or OR + if let TokenKind::And | TokenKind::Or = expr.op.kind { + let constant_id = self.insert_constant(StrInst::create("__bool__"))?; + self.emit(expr_line_number(&*expr.rhs), Op::GetAttr(constant_id)); + self.emit(expr_line_number(&*expr.rhs), Op::Call(0)); + } + + // call operator function + self.emit(expr_line_number(expr), Op::Call(1)); + + // patch exit if we're doing a short circuit + if exit_patch_index != 0 { + assert_matches!( + self.chunk().code[exit_patch_index], + Op::JumpTrue(_) | Op::JumpFalse(_) + ); + let offset = self.chunk().code.len() - exit_patch_index; + // don't worry about doing a check on if offset is small enough for JumpOpArg, if you + // have 4 billion instructions between jumps that is probably your own fault + let new_op = match self.chunk().code[exit_patch_index] { + Op::JumpTrue(_) => Op::JumpTrue(offset as JumpOpArg), + Op::JumpFalse(_) => Op::JumpFalse(offset as JumpOpArg), + _ => unreachable!(), + }; + self.chunk_mut().code[exit_patch_index] = new_op; + } + + Ok(()) + } + + fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> { + static OP_NAMES: LazyLock> = LazyLock::new(|| { + hash_map! { + TokenKind::Plus => "__pos__", + TokenKind::Minus => "__neg__", + TokenKind::Bang => "__not__", + } + }); + self.compile_expr(&expr.expr)?; + let name = OP_NAMES.get(&expr.op.kind).expect("invalid unary operator"); + let constant_id = self.insert_constant(StrInst::create(name))?; + self.emit(expr_line_number(expr), Op::GetAttr(constant_id)); + self.emit(expr_line_number(expr), Op::Call(0)); + Ok(()) + } + + fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> { + self.compile_expr(&expr.expr)?; + for arg in &expr.args { + self.compile_expr(arg)?; + } + if expr.args.len() > (Argc::MAX as usize) { + return Err(CompileError { + line: Some(expr_line_number(expr)), + message: format!("too many function arguments (maximum: {})", Argc::MAX), + } + .into()); + } + self.emit(expr_line_number(expr), Op::Call(expr.args.len() as Argc)); + Ok(()) + } + + fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> { + self.compile_expr(&expr.expr)?; + let constant_id = self.insert_constant(StrInst::create(&expr.name.text))?; + self.emit(expr_line_number(expr), Op::GetAttr(constant_id)); + Ok(()) + } + + fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<()> { + match expr.token.kind { + TokenKind::Name => { + let name = &expr.token.text; + // check if there's a local with this name, otherwise check globals + if let Some(local) = self.get_local(name) { + self.emit(expr_line_number(expr), Op::GetLocal(local.index)); + } else { + let global = self.get_global(name).ok_or_else(|| CompileError { + line: Some(expr_line_number(expr)), + message: if self.is_global_scope() { + format!("unknown global {}", name) + } else { + format!("unknown local {}", name) + }, + })?; + self.emit(expr_line_number(expr), Op::GetGlobal(global)); + } + } + TokenKind::Number => { + let obj = if expr.token.text.contains('.') { + FloatInst::create(expr.token.text.parse().unwrap()) as ObjP + } else { + IntInst::create(expr.token.text.parse().unwrap()) as ObjP + }; + let constant_id = self.insert_constant(obj)?; + self.emit(expr_line_number(expr), Op::PushConstant(constant_id)); + } + TokenKind::String => { + let constant_id = + self.insert_constant(StrInst::create(unescape(&expr.token.text)))?; + self.emit(expr_line_number(expr), Op::PushConstant(constant_id)); + } + TokenKind::True | TokenKind::False => { + let constant_id = + self.insert_constant(BoolInst::create(expr.token.kind == TokenKind::True))?; + self.emit(expr_line_number(expr), Op::PushConstant(constant_id)); + } + TokenKind::Nil => { + let constant_id = self.insert_constant(NilInst::create())?; + self.emit(expr_line_number(expr), Op::PushConstant(constant_id)); + } + _ => unreachable!(), + } + Ok(()) + } + + fn visit_function_expr(&mut self, expr: &FunctionExpr) -> Result<()> { + let end_line = (expr.rbrace.line, expr.rbrace.line); + self.begin_scope(ScopeKind::Function); + + self.chunks.push(Chunk::default()); + let mut locals: HashSet = Default::default(); + for (param, _ty) in &expr.params { + // register all params as locals + locals.insert(param.text.to_string()); + // also insert them as locals in the scope + self.insert_local(param.text.to_string())?; + } + + // closures: figure out all other locals that are assigned to in the function + for local in LocalAssignCollector::collect(&expr.body) { + locals.insert(local); + } + + // figure out all nonlocals being used, and then re-register them as locals + // when a user function is called, all values of the nonlocal are pushed to the top of the + // stack on top of the function parameters. + let all_names = LocalNameCollector::collect(&expr.body); + // these are the nonlocals that we're copying/re-registering as locals + let mut captures: HashMap = Default::default(); + let mut nonlocals: HashMap = Default::default(); + for name in &all_names { + // already registered as a local + if locals.contains(name) { + continue; + } + // already captured + if captures.contains_key(name) { + continue; + } + if let Some((depth, nonlocal)) = self.get_nonlocal(name) { + let nonlocal = nonlocal.clone(); + nonlocals.insert(name.to_string(), (depth, nonlocal)); + captures.insert( + name.to_string(), + self.insert_local(name.to_string())?.clone(), + ); + } + } + + // compile body + for stmt in &expr.body { + self.compile_stmt(stmt)?; + } + + // always end with a "return nil" + let nil = self.insert_constant(NilInst::create())?; + self.emit(end_line, Op::PushConstant(nil)); + self.emit(end_line, Op::Return); + + self.end_scope(end_line); + + // create the function + let chunk = self.chunks.pop().unwrap(); + let fun = UserFunctionInst::create(chunk, expr.params.len() as Argc); + + // register the function as a constant + let fun_constant = self.insert_constant(fun)?; + self.emit(expr_line_number(expr), Op::PushConstant(fun_constant)); + + // close over the captured values + for (depth, local) in nonlocals.values() { + self.emit( + expr_line_number(expr), + Op::CloseOver { + depth: *depth, + slot: local.slot, + }, + ); + } + + Ok(()) + } +} diff --git a/src/disassemble.rs b/src/disassemble.rs new file mode 100644 index 0000000..db409cb --- /dev/null +++ b/src/disassemble.rs @@ -0,0 +1,160 @@ +use crate::obj::{ObjP, UserFunctionInst}; +use crate::vm::{Chunk, JumpOpArg, Op}; + +type Row = (String, String, &'static str, String, String); + +fn disassemble_chunk(chunk: &Chunk, constants: &Vec, globals: &Vec) { + let mut rows: Vec = vec![( + "ADDR".into(), + "LINE".into(), + "OP".into(), + "ARG".into(), + "INFO".into(), + )]; + + for (index, op) in chunk.code.iter().enumerate() { + let (start_line, end_line) = chunk.lines[index]; + + let addr: String = index.to_string(); + let line = if start_line == end_line { + start_line.to_string() + } else { + format!("{start_line}-{end_line}") + }; + let op_str: &'static str; + let arg: String; + let info: String; + + match op { + Op::Pop => { + op_str = "POP"; + arg = String::new(); + info = String::new(); + } + Op::PushConstant(constant_id) => { + op_str = "PUSH_CONSTANT"; + arg = format!("{}", &constants[*constant_id as usize].try_read().unwrap()); + info = format!("(constant ID {constant_id})"); + } + Op::GetLocal(local_id) => { + op_str = "GET_LOCAL"; + let local = &chunk.locals[*local_id as usize]; + arg = local.name.to_string(); + info = format!("(slot {}, local ID {})", local.slot, local.index); + } + Op::SetLocal(local_id) => { + op_str = "SET_LOCAL"; + let local = &chunk.locals[*local_id as usize]; + arg = local.name.to_string(); + info = format!("(slot {}, local ID {})", local.slot, local.index); + } + Op::GetGlobal(global_id) => { + op_str = "GET_GLOBAL"; + arg = globals[*global_id as usize].clone(); + info = format!("(global ID {global_id})"); + } + Op::SetGlobal(global_id) => { + op_str = "SET_GLOBAL"; + arg = globals[*global_id as usize].clone(); + info = format!("(global ID {global_id})"); + } + Op::GetAttr(constant_id) => { + op_str = "GET_ATTR"; + arg = format!("{}", &constants[*constant_id as usize].try_read().unwrap()); + info = format!("(constant ID {constant_id})"); + } + Op::SetAttr(constant_id) => { + op_str = "SET_ATTR"; + arg = format!("{}", &constants[*constant_id as usize].try_read().unwrap()); + info = format!("(constant ID {constant_id})"); + } + Op::Jump(jump_offset) => { + op_str = "JUMP"; + arg = format!("{}", jump_offset); + info = format!("(address {})", (index as JumpOpArg) + *jump_offset); + } + Op::JumpFalse(jump_offset) => { + op_str = "JUMP_FALSE"; + arg = format!("{}", jump_offset); + info = format!("(address {})", (index as JumpOpArg) + *jump_offset); + } + Op::JumpTrue(jump_offset) => { + op_str = "JUMP_FALSE"; + arg = format!("{}", jump_offset); + info = format!("(address {})", (index as JumpOpArg) + *jump_offset); + } + Op::Call(argc) => { + op_str = "CALL"; + arg = format!("argc {argc}"); + info = String::new(); + } + Op::Return => { + op_str = "RETURN"; + arg = String::new(); + info = String::new(); + } + Op::CloseOver { depth, slot } => { + op_str = "CLOSE_OVER"; + arg = format!("{depth}"); + info = format!("slot {slot} (name unknown)"); + } + Op::Halt => { + op_str = "HALT"; + arg = String::new(); + info = String::new(); + } + } + + rows.push((addr, line, op_str, arg, info)); + } + + display_rows(&rows); +} + +fn display_rows(rows: &Vec) { + // get the longest width of each row + let mut addr_width = 0; + let mut line_width = 0; + let mut op_width = 0; + let mut arg_width = 0; + let mut info_width = 0; + for (addr, line, op, arg, info) in rows { + addr_width = addr_width.max(addr.len()); + line_width = line_width.max(line.len()); + op_width = op_width.max(op.len()); + arg_width = arg_width.max(arg.len()); + info_width = info_width.max(info.len()); + } + + addr_width += 2; + line_width += 2; + op_width += 2; + arg_width += 2; + info_width += 2; + + for (addr, line, op, arg, info) in rows { + println!( + "{addr:>addr_width$} {line:>line_width$} {op:>op_width$} {arg:arg_width$} {info:info_width$}" + ); + } +} + +pub fn disassemble(chunk: &Chunk, constants: &Vec, globals: &Vec) { + println!("== main chunk"); + println!(); + disassemble_chunk(chunk, constants, globals); + + for constant in constants { + let borrowed = constant.try_read().unwrap(); + if let Some(fun) = borrowed.as_any().downcast_ref::() { + println!(); + println!( + "== {} starting on line {}", + fun.name(), + fun.chunk().lines[0].0 + ); + println!(); + disassemble_chunk(fun.chunk(), constants, globals); + } + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..819c9c8 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,70 @@ +// trait_upcasting - https://github.com/rust-lang/rust/issues/65991 +// stabilization in progress +#![feature(trait_upcasting)] + +mod ast; +mod builtins; +mod compiler; +mod disassemble; +mod obj; +mod parser; +mod token; +mod vm; + +use std::fmt; +use std::fs::File; +use std::io::prelude::*; +use std::path::PathBuf; + +use clap::Parser as ClapParser; +use thiserror::Error; + +#[derive(ClapParser, Debug)] +#[command(version, about, long_about = None)] +struct Args { + #[arg(short, long, help = "Dump program disassembly and exit")] + disassemble: bool, + #[arg(help = "The path of the file to run")] + path: PathBuf, +} + +#[derive(Debug, Error)] +struct ProgramError(String); + +impl fmt::Display for ProgramError { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.0) + } +} + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + let mut file = File::open(&args.path)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + let mut parser = parser::Parser::new(contents, &args.path)?; + let ast = parser.parse_all()?; + + if parser.was_error() { + return Err(ProgramError("error occurred, exiting".to_string()).into()); + } + + // initialize type system + obj::init_types(); + + // compile + let (chunk, constants, globals) = compiler::Compiler::default().compile(&ast)?; + + if args.disassemble { + disassemble::disassemble(&chunk, &constants, &globals); + return Ok(()); + } + + // run + let mut vm = vm::Vm::new(chunk.into(), constants, globals); + vm.run(); + + Ok(()) +} diff --git a/src/obj.rs b/src/obj.rs new file mode 100644 index 0000000..2a9d4f8 --- /dev/null +++ b/src/obj.rs @@ -0,0 +1,1059 @@ +// TODO obj.rs - remove the warning suppression +#![allow(unused_variables, dead_code)] + +use std::any::Any; +use std::collections::HashMap; +use std::fmt::{self, Debug, Display}; +use std::ptr; +use std::sync::{Arc, LazyLock, Mutex, RwLock}; + +use common_macros::hash_map; + +use crate::vm::{Argc, Chunk, Frame, Vm}; + +pub type Ptr = Arc>; +pub type ObjP = Ptr; +pub type Attrs = HashMap; + +/// Downcast an object pointer to a concrete type, and do something with that object. +pub fn with_obj_downcast(ptr: ObjP, closure: impl FnOnce(&T) -> Out) -> Out +where + T: Obj + 'static, +{ + let borrowed = ptr.try_read().expect("could not lock object for reading"); + if let Some(obj) = borrowed.as_any().downcast_ref::() { + closure(obj) + } else { + panic!( + "could not downcast '{:?}' to {}", + borrowed, + std::any::type_name::() + ) + } +} + +pub fn obj_is_inst(ptr: &ObjP) -> bool +where + T: Obj + 'static, +{ + let borrowed = ptr.try_read().expect("could not lock object for reading"); + borrowed.as_any().downcast_ref::().is_some() +} + +/// Builtin types macro +macro_rules! builtin_types { + ( + $( + $type_name:ident => { $( $vtable_name:ident => $vtable_value:expr ),* $(,)? } + ),+ $(,)? + ) => { + pub static TYPES: LazyLock>>> = LazyLock::new(|| { + RwLock::new(hash_map! { + $( + stringify!($type_name).to_string() => make_ptr(TypeInst::new(stringify!($type_name))) + ),+ + }) + }); + + static TYPE_SYSTEM_INIT: LazyLock> = LazyLock::new(|| Mutex::new(false)); + + /// Initialize types. + /// + /// This should only be called once. + fn init_base_types() { + // instantiate + $( + if stringify!($type_name) != "Type" { + let ty = Ptr::clone(&TYPES.try_read().unwrap()[stringify!($type_name)]); + ty.try_write().unwrap().instantiate(); + } + )+ + + // __name__ + $( + { + let name = StrInst::create(stringify!($type_name)); + let ty = Ptr::clone(&TYPES.try_read().unwrap()[stringify!($type_name)]); + ty.try_write() + .unwrap() + .set_attr("__name__", name); + } + )+ + + // vtable + $( + { + let ptr = Ptr::clone(&TYPES.try_read().unwrap()[stringify!($type_name)]); + let ty = ptr.try_write().unwrap(); + $( + ty.vtable.insert($vtable_name.into(), $vtable_value); + )* + } + )+ + } + }; +} + +pub(crate) fn init_types() { + // Taking the lock here will lock the entire function from being run twice + // simulataneously and prevent race conditions. + // + // Race conditions really can only happen during testing so this is just a precaution. + let mut lock_guard = TYPE_SYSTEM_INIT.lock().unwrap(); + if *lock_guard { + if cfg!(test) { + return; + } else { + panic!("do not initialize type system twice"); + } + } + + // Init type_type here + { + let type_type_ptr = Ptr::clone(&TYPES.try_read().unwrap()["Type"]); + let mut type_type = type_type_ptr.try_write().unwrap(); + type_type.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Type"]) as ObjP, + ); + type_type.base.is_instantiated = true; + } + + // Init the rest of the types + init_base_types(); + + *lock_guard = true; +} + +fn placeholder(_: &mut Vm, _: Vec) -> ObjP { + NilInst::create() +} + +fn to_string(_: &mut Vm, args: Vec) -> ObjP { + let str_value = format!("{}", args[0].try_read().unwrap()); + StrInst::create(str_value) +} + +builtin_types! { + Type => {}, + Obj => {}, + Str => {}, + Int => {}, + Float => {}, + Bool => {}, + Nil => {}, + BuiltinFunction => {}, + UserFunction => {}, + Method => {}, +} + +/// Convenience function for creating pointers, in case the `Arc>` pointer type has to +/// change. +/// +/// I would implement this as a `From` but it doesn't seem to work for a foreign type, and I'm +/// not sure why. +pub fn make_ptr(obj: T) -> Ptr { + Arc::new(RwLock::new(obj)) +} + +//////////////////////////////////////////////////////////////////////////////// +// Obj +//////////////////////////////////////////////////////////////////////////////// + +pub trait Obj: Debug + Display + Any + Send + Sync { + fn instantiate(&mut self); + + fn is_instantiated(&self) -> bool; + fn attrs(&self) -> &Attrs; + fn attrs_mut(&mut self) -> &mut Attrs; + + fn set_attr(&mut self, name: &str, value: ObjP) { + self.attrs_mut().insert(name.to_string(), value); + } + + fn get_attr(&self, name: &str) -> Option { + self.attrs().get(name).map(Arc::clone) + } + + fn type_inst(&self) -> ObjP { + self.get_attr("__type__").unwrap() + } + + fn type_name(&self) -> Arc { + with_obj_downcast(self.type_inst(), |type_inst: &TypeInst| { + Arc::clone(&type_inst.name) + }) + } + + fn arity(&self) -> Option { + None + } + + fn call(&self, vm: &mut Vm, argc: Argc) { + // TODO Obj::call - need to handle "this object cannot be called" errors + // BLOCKED-ON: exceptions + todo!("Raise some kind of not implemented/not callable error for non-callable objects") + } + + fn is_truthy(&self) -> bool { + true + } + + fn equals(&self, other: &dyn Obj) -> bool; + + fn as_any(&self) -> &dyn Any; + + fn as_any_mut(&mut self) -> &mut dyn Any; +} + +//////////////////////////////////////////////////////////////////////////////// +// BaseObjInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Default, Clone)] +struct BaseObjInst { + attrs: HashMap, + is_instantiated: bool, +} + +impl Display for BaseObjInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "", (self as *const _ as usize)) + } +} + +impl Obj for BaseObjInst { + fn instantiate(&mut self) { + if self.get_attr("__type__").is_none() { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Obj"]) as ObjP, + ); + } + + // TODO BaseObjInst::instantiate - instantiate VTable + // Okay, we are running into a little snag here: + // * TypeInst::vtable holds a collection of named objects that will get copied into the + // attributes during instatiation. + // * If an object that gets copied is a function (UserFunctionInst, BuiltinFunctionInst), + // it will be wrapped by a MethodInst + // * MethodInst requires a pointer to the function being wrapped, as well as a pointer to + // the "self" object. + // * This is the root of the problem - ***we need the pointer to the object that we are + // currently instantiating.*** + /* + let type_inst_ptr = Ptr::clone(&self.type_inst()); + with_obj_downcast(type_inst_ptr, |type_inst: &TypeInst| { + for (key, value_ptr) in type_inst.vtable.iter() { + // copy functions over as MethodInst + if obj_is_inst::(&value_ptr) + || obj_is_inst::(&value_ptr) + { + self.set_attr(key, MethodInst::create(value_ptr)); + } + } + }); + */ + + self.is_instantiated = true; + } + + fn is_instantiated(&self) -> bool { + self.is_instantiated + } + + fn attrs(&self) -> &Attrs { + &self.attrs + } + + fn attrs_mut(&mut self) -> &mut Attrs { + &mut self.attrs + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + // compare all attrs + self.attrs.iter().all(|(k1, v1)| { + other + .attrs + .get(k1) + .map(|v2| v2.try_read().unwrap().equals(&*v1.try_read().unwrap())) + .unwrap_or(false) + }) && self.is_instantiated == other.is_instantiated + } else { + false + } + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +macro_rules! impl_base_obj { + ($base_name:ident) => { + fn is_instantiated(&self) -> bool { + self.$base_name.is_instantiated() + } + + fn attrs(&self) -> &Attrs { + self.$base_name.attrs() + } + + fn attrs_mut(&mut self) -> &mut Attrs { + self.$base_name.attrs_mut() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + }; + () => { + impl_base_obj! { base } + }; +} + +//////////////////////////////////////////////////////////////////////////////// +// ObjInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct ObjInst { + base: BaseObjInst, +} + +impl ObjInst { + pub fn new() -> Self { + Self { + base: Default::default(), + } + } + + pub fn create() -> Ptr { + let mut new = Self::new(); + new.instantiate(); + make_ptr(new) + } +} + +impl Display for ObjInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "", (self as *const _ as usize)) + } +} + +impl Obj for ObjInst { + fn instantiate(&mut self) { + self.base.instantiate(); + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + self.base.equals(&other.base) + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// TypeInst +//////////////////////////////////////////////////////////////////////////////// + +pub struct TypeInst { + name: Arc, + base: BaseObjInst, + vtable: HashMap, +} + +impl TypeInst { + pub fn new(name: impl ToString) -> Self { + Self { + name: Arc::new(name.to_string()), + base: Default::default(), + vtable: Default::default(), + } + } + + pub fn create(name: impl ToString) -> Ptr { + let mut new = Self::new(name); + new.instantiate(); + make_ptr(new) + } + + pub fn name(&self) -> &Arc { + &self.name + } +} + +impl Debug for TypeInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "", + self.name, + (self as *const _ as usize) + ) + } +} + +impl Display for TypeInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "", + self.name, + (self as *const _ as usize) + ) + } +} + +impl Obj for TypeInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Type"]) as ObjP, + ); + self.base.instantiate(); + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + // TODO TypeInst::equals : something more robust than this + // Types should hold equality if they have the same name + // the problem is that Type.get_attr("__type__") is going to return itself, so we have + // to go through attributes to specially exclude to the __type__ attribute if it points + // to ourself. + // How do we detect that it's pointing to ourself? I suppose pointers are the way + self.name == other.name + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// StrInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct StrInst { + str_value: Arc, + base: BaseObjInst, +} + +impl Display for StrInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.str_value) + } +} + +impl StrInst { + pub fn new(str_value: impl ToString) -> Self { + Self { + str_value: Arc::new(str_value.to_string()), + base: Default::default(), + } + } + + pub fn create(str_value: impl ToString) -> Ptr { + let mut new = Self::new(str_value); + new.instantiate(); + make_ptr(new) + } + + pub fn str_value(&self) -> &Arc { + &self.str_value + } +} + +impl Obj for StrInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Str"]) as ObjP, + ); + self.base.instantiate(); + } + + fn is_truthy(&self) -> bool { + !self.str_value.is_empty() + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + self.str_value == other.str_value + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// IntInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct IntInst { + int_value: i64, + base: BaseObjInst, +} + +impl IntInst { + pub fn new(int_value: i64) -> Self { + Self { + int_value, + base: Default::default(), + } + } + + pub fn create(int_value: i64) -> Ptr { + let mut new = Self::new(int_value); + new.instantiate(); + make_ptr(new) + } + + pub fn int_value(&self) -> i64 { + self.int_value + } +} + +impl Display for IntInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.int_value) + } +} + +impl Obj for IntInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Int"]) as ObjP, + ); + self.base.instantiate(); + } + + fn is_truthy(&self) -> bool { + self.int_value != 0 + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + self.int_value == other.int_value + } else if let Some(other) = other.as_any().downcast_ref::() { + self.int_value as f64 == other.float_value + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// FloatInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct FloatInst { + float_value: f64, + base: BaseObjInst, +} + +impl FloatInst { + pub fn new(float_value: f64) -> Self { + Self { + float_value, + base: Default::default(), + } + } + + pub fn create(float_value: f64) -> Ptr { + let mut new = Self::new(float_value); + new.instantiate(); + make_ptr(new) + } + + pub fn float_value(&self) -> f64 { + self.float_value + } +} + +impl Display for FloatInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.float_value) + } +} + +impl Obj for FloatInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Float"]) as ObjP, + ); + self.base.instantiate(); + } + + fn is_truthy(&self) -> bool { + self.float_value != 0.0 + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + self.float_value == other.float_value + } else if let Some(other) = other.as_any().downcast_ref::() { + self.float_value == other.int_value as f64 + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// BoolInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct BoolInst { + bool_value: bool, + base: BaseObjInst, +} + +impl BoolInst { + pub fn new(bool_value: bool) -> Self { + Self { + bool_value, + base: Default::default(), + } + } + + pub fn create(bool_value: bool) -> Ptr { + // TODO BoolInst::create : interning + let mut new = Self::new(bool_value); + new.instantiate(); + make_ptr(new) + } + + pub fn bool_value(&self) -> bool { + self.bool_value + } +} + +impl Display for BoolInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.bool_value) + } +} + +impl Obj for BoolInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Bool"]) as ObjP, + ); + self.base.instantiate(); + } + + fn is_truthy(&self) -> bool { + self.bool_value + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + self.bool_value == other.bool_value + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// NilInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Default)] +pub struct NilInst { + base: BaseObjInst, +} + +impl NilInst { + pub fn new() -> Self { + Default::default() + } + + pub fn create() -> Ptr { + // TODO NilInst::create : interning + let mut new = Self::new(); + new.instantiate(); + make_ptr(new) + } +} + +impl Display for NilInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "nil") + } +} + +impl Obj for NilInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Nil"]) as ObjP, + ); + self.base.instantiate(); + } + + fn is_truthy(&self) -> bool { + false + } + + fn equals(&self, other: &dyn Obj) -> bool { + other.as_any().downcast_ref::().is_some() + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// BuiltinFunctionInst +//////////////////////////////////////////////////////////////////////////////// + +pub type BuiltinFunctionPtr = fn(vm: &mut Vm, args: Vec) -> ObjP; + +#[derive(Debug)] +pub struct BuiltinFunctionInst { + base: BaseObjInst, + name: String, + function: BuiltinFunctionPtr, + arity: Argc, +} + +impl BuiltinFunctionInst { + pub fn new(name: String, function: BuiltinFunctionPtr, arity: Argc) -> Self { + Self { + base: Default::default(), + name, + function, + arity, + } + } + + pub fn create(name: String, function: BuiltinFunctionPtr, arity: Argc) -> Ptr { + let mut new = Self::new(name, function, arity); + new.instantiate(); + make_ptr(new) + } + + pub fn name(&self) -> &String { + &self.name + } +} + +impl Display for BuiltinFunctionInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "", + self.name(), + self.arity().unwrap(), + self.function as *const BuiltinFunctionPtr as usize + ) + } +} + +impl Obj for BuiltinFunctionInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["UserFunction"]) as ObjP, + ); + self.base.instantiate(); + } + + fn arity(&self) -> Option { + Some(self.arity) + } + + fn call(&self, vm: &mut Vm, argc: Argc) { + // args + let mut args = Vec::with_capacity(argc as usize); + for _ in 0..argc { + args.push(vm.pop()); + } + args.reverse(); + // callee (self) + vm.pop(); + let result = (self.function)(vm, args); + vm.push(result); + } + + fn equals(&self, other: &dyn Obj) -> bool { + // TODO BuiltinFunctionInst::equals : need something more robust than checking addr_eq, + // maybe check the self_binding pointer too? + if let Some(other) = other.as_any().downcast_ref::() { + ptr::addr_eq(self, other) + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// UserFunctionInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Clone)] +pub struct UserFunctionInst { + base: BaseObjInst, + name: Arc, + chunk: Arc, + arity: Argc, + captures: Vec, +} + +impl UserFunctionInst { + pub fn new(chunk: Chunk, arity: Argc) -> Self { + Self { + base: Default::default(), + name: Arc::new("(anonymous)".to_string()), + chunk: Arc::new(chunk), + arity, + captures: Default::default(), + } + } + + pub fn create(chunk: Chunk, arity: Argc) -> Ptr { + let mut new = Self::new(chunk, arity); + new.instantiate(); + make_ptr(new) + } + + pub fn name(&self) -> &String { + &self.name + } + + pub fn set_name(&mut self, name: Arc) { + self.name = name; + } + + pub fn chunk(&self) -> &Chunk { + &self.chunk + } + + pub fn push_capture(&mut self, value: ObjP) { + self.captures.push(value); + } +} + +impl Display for UserFunctionInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "", + self.name(), + self.arity().unwrap(), + self as *const _ as usize + ) + } +} + +impl Obj for UserFunctionInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["UserFunction"]) as ObjP, + ); + self.base.instantiate(); + } + + fn arity(&self) -> Option { + Some(self.arity) + } + + fn call(&self, vm: &mut Vm, argc: Argc) { + assert_eq!(argc, self.arity, "argc must match arity"); + let new_frame = Frame { + name: Arc::clone(&self.name), + chunk: Arc::clone(&self.chunk), + ip: 0, + stack_base: vm.stack().len() - (argc as usize), + }; + vm.push_frame(new_frame); + for capture in &self.captures { + vm.push(Ptr::clone(&capture)); + } + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + // TODO UserFunctionInst::equals : need something more robust than checking addr_eq. + ptr::addr_eq(self, other) + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// MethodInst +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct MethodInst { + base: BaseObjInst, + self_binding: ObjP, + function: ObjP, +} + +impl MethodInst { + pub fn new(self_binding: ObjP, function: ObjP) -> Self { + Self { + base: Default::default(), + self_binding, + function, + } + } + + pub fn create(self_binding: ObjP, function: ObjP) -> Ptr { + let mut new = Self::new(self_binding, function); + new.instantiate(); + make_ptr(new) + } + + pub fn self_binding(&self) -> &ObjP { + &self.self_binding + } +} + +impl Display for MethodInst { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.function.try_read().unwrap()) + } +} + +impl Obj for MethodInst { + fn instantiate(&mut self) { + self.set_attr( + "__type__", + Ptr::clone(&TYPES.try_read().unwrap()["Method"]) as ObjP, + ); + self.base.instantiate(); + } + + fn arity(&self) -> Option { + self.function.try_read().unwrap().arity() + } + + fn call(&self, vm: &mut Vm, argc: Argc) { + self.function.try_read().unwrap().call(vm, argc) + } + + fn equals(&self, other: &dyn Obj) -> bool { + if let Some(other) = other.as_any().downcast_ref::() { + ptr::addr_eq(&*self.self_binding, &*other.self_binding) + && ptr::addr_eq(&*self.function, &*other.function) + } else { + false + } + } + + impl_base_obj!(); +} + +//////////////////////////////////////////////////////////////////////////////// +// Tests +//////////////////////////////////////////////////////////////////////////////// + +#[test] +fn test_new_objects() { + init_types(); + + let type_value = TypeInst::create("Type"); + assert_eq!(&*type_value.try_read().unwrap().type_name(), "Type"); + + let str_value = StrInst::create("asdfasdfasdfasdfasdf"); + assert_eq!(&*str_value.try_read().unwrap().type_name(), "Str"); + + let int_value = IntInst::create(1234); + assert_eq!(&*int_value.try_read().unwrap().type_name(), "Int"); + + let float_value = FloatInst::create(1234.5678); + assert_eq!(&*float_value.try_read().unwrap().type_name(), "Float"); + + let nil_value = NilInst::create(); + assert_eq!(&*nil_value.try_read().unwrap().type_name(), "Nil"); +} + +#[test] +fn test_obj_equals() { + init_types(); + + let int1 = IntInst::create(1234); + let int2 = IntInst::create(1234); + + assert!(int1.try_read().unwrap().equals(&*int2.try_read().unwrap())); + assert!(int2.try_read().unwrap().equals(&*int1.try_read().unwrap())); + + let float1 = FloatInst::create(1234.0); + assert!(int1 + .try_read() + .unwrap() + .equals(&*float1.try_read().unwrap())); + assert!(float1 + .try_read() + .unwrap() + .equals(&*int2.try_read().unwrap())); + + // self-equality + let str1 = StrInst::create("1234"); + assert!(str1.try_read().unwrap().equals(&*str1.try_read().unwrap())); + + let str2 = StrInst::create("1234"); + assert!(str1.try_read().unwrap().equals(&*str2.try_read().unwrap())); + assert!(str2.try_read().unwrap().equals(&*str1.try_read().unwrap())); + + assert!(!str1 + .try_read() + .unwrap() + .equals(&*float1.try_read().unwrap())); + assert!(!str1.try_read().unwrap().equals(&*int1.try_read().unwrap())); + + let obj1 = ObjInst::create(); + let obj2 = ObjInst::create(); + assert!(obj1.try_read().unwrap().equals(&*obj2.try_read().unwrap())); + + // these objects aren't equal anymore + obj1.try_write() + .unwrap() + .set_attr("my_attr", Ptr::clone(&str2) as ObjP); + assert!(!obj1.try_read().unwrap().equals(&*obj2.try_read().unwrap())); + + // but now they are! + obj2.try_write() + .unwrap() + .set_attr("my_attr", Ptr::clone(&str2) as ObjP); + assert!(obj2.try_read().unwrap().equals(&*obj1.try_read().unwrap())); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..24fa75c --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,801 @@ +use common_macros::hash_map; +use thiserror::Error; + +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +use crate::ast::*; +use crate::token::{Token, TokenKind}; + +//////////////////////////////////////////////////////////////////////////////// +// ParseError +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub struct ParseError { + pub message: String, + pub line: usize, + pub path: PathBuf, +} + +pub type Result = std::result::Result; + +impl Display for ParseError { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "in {} at line {}: {}", + self.path.as_os_str().to_str().unwrap(), + self.line, + self.message + ) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Constants +//////////////////////////////////////////////////////////////////////////////// + +const WHITESPACE: &str = " \t\r"; +const NAME_START_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"; +const NAME_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789-"; +const NUMBER_START_CHARS: &str = "0123456789"; +const NUMBER_CHARS: &str = "0123456789."; +const STRING_START_CHARS: &str = "'\""; +const STRING_ESCAPES: &str = "nrt\\\"'"; + +//////////////////////////////////////////////////////////////////////////////// +// Lexer +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct Lexer { + line: usize, + index: usize, + start: usize, + text: String, + path: PathBuf, + paren_stack: Vec, + was_error: bool, +} + +impl Lexer { + pub fn new(text: String, path: &dyn AsRef) -> Self { + Self { + line: 1, + index: 1, + start: 0, + text, + path: path.as_ref().into(), + paren_stack: Vec::new(), + was_error: false, + } + } + + pub fn is_eof(&self) -> bool { + self.index >= self.text.len() + } + + pub fn lexeme(&self) -> &str { + &self.text[self.start..self.index - 1] + } + + pub fn was_error(&self) -> bool { + self.was_error + } + + fn current(&self) -> char { + if self.is_eof() { + return '\0'; + } + self.text[self.index - 1..].chars().nth(0).unwrap() + } + + fn ignore_newlines(&self) -> bool { + self.paren_stack.len() > 0 && self.paren_stack.last() != Some(&'}') + } + + fn error(&mut self, message: impl ToString) -> ParseError { + self.was_error = true; + ParseError { + message: message.to_string(), + line: self.line, + path: self.path.clone(), + } + } + + fn advance(&mut self) { + if self.is_eof() { + return; + } + if self.current() == '\n' { + self.line += 1; + } + self.index += self.text[self.index - 1..] + .chars() + .nth(0) + .unwrap() + .len_utf8(); + } + + fn mat(&mut self, c: char) -> bool { + if self.current() == c { + self.advance(); + return true; + } else { + return false; + } + } + + fn skip_whitespace(&mut self) { + while WHITESPACE.contains(self.current()) + || (self.current() == '\n' && self.ignore_newlines()) + || self.current() == '#' + { + if self.current() == '#' { + self.advance(); + while self.current() != '\n' && !self.is_eof() { + self.advance(); + } + self.mat('\n'); + } else { + self.advance(); + } + } + self.start = self.index - 1; + } + + fn make_token(&mut self, kind: TokenKind) -> Token { + let token = Token { + line: self.line, + //index: self.start, + text: self.lexeme().to_string(), + kind, + }; + self.start = self.index - 1; + token + } + + pub fn next(&mut self) -> Result { + self.skip_whitespace(); + if self.is_eof() { + return Ok(self.make_token(TokenKind::Eof)); + } else if NAME_START_CHARS.contains(self.current()) { + return Ok(self.name()); + } else if NUMBER_START_CHARS.contains(self.current()) { + return Ok(self.number()); + } else if STRING_START_CHARS.contains(self.current()) { + return self.string(); + } else if self.mat('+') { + return Ok(self.make_token(TokenKind::Plus)); + } else if self.mat('-') { + if self.mat('>') { + return Ok(self.make_token(TokenKind::Arrow)); + } else { + return Ok(self.make_token(TokenKind::Minus)); + } + } else if self.mat('*') { + return Ok(self.make_token(TokenKind::Star)); + } else if self.mat('/') { + return Ok(self.make_token(TokenKind::Slash)); + } else if self.mat('&') { + if self.mat('&') { + return Ok(self.make_token(TokenKind::And)); + } + } else if self.mat('|') { + if self.mat('|') { + return Ok(self.make_token(TokenKind::Or)); + } + } else if self.mat('!') { + if self.mat('=') { + return Ok(self.make_token(TokenKind::BangEq)); + } else { + return Ok(self.make_token(TokenKind::Bang)); + } + } else if self.mat('=') { + if self.mat('=') { + return Ok(self.make_token(TokenKind::EqEq)); + } else { + return Ok(self.make_token(TokenKind::Eq)); + } + } else if self.mat('<') { + if self.mat('=') { + return Ok(self.make_token(TokenKind::LessEq)); + } else { + return Ok(self.make_token(TokenKind::Less)); + } + } else if self.mat('>') { + if self.mat('=') { + return Ok(self.make_token(TokenKind::Greater)); + } else { + return Ok(self.make_token(TokenKind::GreaterEq)); + } + } else if self.mat('(') { + self.paren_stack.push(')'); + return Ok(self.make_token(TokenKind::LParen)); + } else if self.mat(')') { + return match self.paren_stack.last() { + None => Err(self.error("')' has unmatched '('")), + Some(')') => { + self.paren_stack.pop(); + Ok(self.make_token(TokenKind::RParen)) + } + Some(c) => Err(self.error(format!("mismatched ')' (expected {:?})", c))), + }; + } else if self.mat('{') { + self.paren_stack.push('}'); + return Ok(self.make_token(TokenKind::LBrace)); + } else if self.mat('}') { + return match self.paren_stack.last() { + None => Err(self.error("'}' has unmatched '{'")), + Some('}') => { + self.paren_stack.pop(); + Ok(self.make_token(TokenKind::RBrace)) + } + Some(c) => Err(self.error(format!("mismatched '}}' (expected {:?})", c))), + }; + } else if self.mat('[') { + self.paren_stack.push(']'); + return Ok(self.make_token(TokenKind::LBracket)); + } else if self.mat(']') { + return match self.paren_stack.last() { + None => Err(self.error("']' has unmatched '['")), + Some(']') => { + self.paren_stack.pop(); + Ok(self.make_token(TokenKind::RBracket)) + } + Some(c) => Err(self.error(format!("mismatched ']' (expected {:?})", c))), + }; + } else if self.mat('.') { + return Ok(self.make_token(TokenKind::Dot)); + } else if self.mat(',') { + return Ok(self.make_token(TokenKind::Comma)); + } else if self.mat(':') { + return Ok(self.make_token(TokenKind::Colon)); + } else if self.mat('\n') { + assert!(!self.ignore_newlines()); + // fix the line number since it will have already advanced when we make the token + self.line -= 1; + let token = self.make_token(TokenKind::Eol); + self.line += 1; + return Ok(token); + } else if self.mat(';') { + return Ok(self.make_token(TokenKind::Eol)); + } + + Err(self.error(format!("unexpected character: {:?}", self.current()))) + } + + fn name(&mut self) -> Token { + static KEYWORDS: OnceLock> = OnceLock::new(); + let keywords = KEYWORDS.get_or_init(|| { + hash_map! { + "return" => TokenKind::Return, + "if" => TokenKind::If, + "else" => TokenKind::Else, + "true" => TokenKind::True, + "false" => TokenKind::False, + "nil" => TokenKind::Nil, + } + }); + + while NAME_CHARS.contains(self.current()) { + self.advance(); + } + if let Some(kind) = keywords.get(self.lexeme()) { + self.make_token(*kind) + } else { + self.make_token(TokenKind::Name) + } + } + + fn number(&mut self) -> Token { + while NUMBER_CHARS.contains(self.current()) { + self.advance(); + } + self.make_token(TokenKind::Number) + } + + fn string(&mut self) -> Result { + let terminator = self.current(); + self.advance(); + + while self.current() != terminator && !self.is_eof() { + if self.current() == '\\' { + self.advance(); + if STRING_ESCAPES.contains(self.current()) { + self.advance(); + } else { + return Err(self.error(format!("unknown string escape {:?}", self.current()))); + } + } else { + self.advance(); + } + } + + if self.current() == terminator { + self.advance(); + Ok(self.make_token(TokenKind::String)) + } else { + Err(self.error("unterminated string")) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Parser +//////////////////////////////////////////////////////////////////////////////// + +macro_rules! mat { + ($self:expr, $($op:expr),+ $(,)?) => { + $($self.mat($op)?)||+ + }; +} + +macro_rules! expect { + ($self:expr, $message:expr, $($kind:expr),+ $(,)?) => {{ + if mat!($self, $($kind),+) { + Ok($self.prev.clone().unwrap()) + } else { + Err($self.error($message)) + } + }}; +} + +macro_rules! bin_expr { + ($name:ident, $next:ident, $($op:expr),+ $(,)?) => { + fn $name(&mut self) -> Result { + let mut expr = self.$next()?; + while $(self.mat($op)?)||+ { + let op = self.prev.clone().unwrap(); + let rhs = self.$next()?; + expr = Box::new(BinaryExpr {lhs: expr, op, rhs}); + } + Ok(expr) + } + }; +} + +pub struct Parser { + lexer: Lexer, + prev: Option, + current: Token, + next: Token, + was_error: bool, +} + +impl Parser { + pub fn new(text: String, path: &dyn AsRef) -> Result { + let mut lexer = Lexer::new(text, path); + let prev = None; + let current = lexer.next()?; + let next = lexer.next()?; + Ok(Self { + lexer, + prev, + current, + next, + was_error: false, + }) + } + + pub fn parse_all(&mut self) -> Result> { + let mut stmts = Vec::new(); + while !self.is_eof() { + if let Some(s) = self.stmt()? { + stmts.push(s); + } + } + Ok(stmts) + } + + // + // Properties + // + + fn line(&self) -> usize { + self.lexer.line + } + + fn path(&self) -> &Path { + &self.lexer.path + } + + fn is_eof(&self) -> bool { + self.lexer.is_eof() + } + + pub fn was_error(&self) -> bool { + self.was_error || self.lexer.was_error() + } + + // + // Parser primitives + // + + fn advance(&mut self) -> Result<()> { + self.prev = Some(self.current.clone()); + self.current = self.next.clone(); + self.next = self.lexer.next()?; + Ok(()) + } + + fn check(&self, what: TokenKind) -> bool { + self.current.kind == what + } + + fn mat(&mut self, what: TokenKind) -> Result { + if self.check(what) { + self.advance()?; + Ok(true) + } else { + Ok(false) + } + } + + fn expect(&mut self, message: impl Display, what: TokenKind) -> Result<&Token> { + if self.mat(what)? { + Ok(self.prev.as_ref().unwrap()) + } else { + Err(self.error(format!( + "{message} (NOTE: got {:?} {:?})", + self.current.kind, self.current.text + ))) + } + } + + fn error(&mut self, message: impl ToString) -> ParseError { + self.was_error = true; + ParseError { + message: message.to_string(), + line: self.line(), + path: self.path().into(), + } + } + + fn synchronize(&mut self) -> Result<()> { + while !self.is_eof() { + match self.current.kind { + TokenKind::Return | TokenKind::If | TokenKind::LBrace => { + break; + } + _ => self.advance()?, + } + } + Ok(()) + } + + // + // Statements + // + + fn stmt(&mut self) -> Result> { + // skip past end-lines to get to the good stuff + while self.mat(TokenKind::Eol)? { + continue; + } + + // nothing left after EOLs + if self.is_eof() { + return Ok(None); + } + + match self.stmt_wrapped() { + Ok(result) => Ok(Some(result)), + Err(e) => { + eprintln!("{}", e); + self.synchronize()?; + Ok(None) + } + } + } + + fn stmt_wrapped(&mut self) -> Result { + if self.mat(TokenKind::Return)? { + self.return_stmt() + } else if self.mat(TokenKind::If)? { + self.if_stmt() + } else if self.mat(TokenKind::LBrace)? { + let lbrace = self.prev.clone().unwrap(); + let stmts = self.block()?; + let rbrace = self.prev.clone().unwrap(); + Ok(Box::new(BlockStmt { + lbrace, + stmts, + rbrace, + }) as Box) + } else if self.current.kind == TokenKind::Name && self.next.kind == TokenKind::Eq { + self.assign_stmt() + } else { + let expr = self.expr()?; + let stmt: StmtP; + + if expr.as_any_ref().downcast_ref::().is_some() && self.mat(TokenKind::Eq)? { + let expr = expr.as_any().downcast::().unwrap(); + let rhs = self.expr()?; + // unpack the GetExpr and turn it into a SetExpr instead + stmt = Box::new(SetStmt { + expr: expr.expr, + name: expr.name, + rhs, + }); + } else { + stmt = Box::new(ExprStmt { expr }); + } + expect!( + self, + "expect end of line after expression", + TokenKind::Eol, + TokenKind::Eof, + )?; + Ok(stmt) + } + } + + fn return_stmt(&mut self) -> Result { + let return_kw = self.prev.clone().unwrap(); + let mut expr = None; + + if !self.check(TokenKind::Eol) && !self.check(TokenKind::RBrace) { + expr = Some(self.expr()?); + } + if !self.check(TokenKind::RBrace) { + expect!( + self, + "expected end of line after return statement", + TokenKind::Eol, + TokenKind::Eof, + )?; + } + Ok(Box::new(ReturnStmt { return_kw, expr })) + } + + fn if_stmt(&mut self) -> Result { + let if_kw = self.prev.clone().unwrap(); + let condition = self.expr()?; + self.expect("expect '{' after 'if' condition", TokenKind::LBrace)?; + let then_branch = self.block_stmt()?; + let mut else_branch = Vec::new(); + if self.mat(TokenKind::Else)? { + if self.mat(TokenKind::If)? { + else_branch.push(self.if_stmt()?); + } else { + self.expect("expect '{' after else statement", TokenKind::LBrace)?; + else_branch = self.block()?; + } + } + Ok(Box::new(IfStmt { + if_kw, + condition, + then_branch, + else_branch, + })) + } + + fn block_stmt(&mut self) -> Result { + let lbrace = self.prev.clone().unwrap(); + assert_eq!(lbrace.kind, TokenKind::LBrace); + let stmts = self.block()?; + let rbrace = self.prev.clone().unwrap(); + assert_eq!(rbrace.kind, TokenKind::RBrace); + Ok(BlockStmt { + lbrace, + stmts, + rbrace, + }) + } + + fn block(&mut self) -> Result> { + let mut stmts = Vec::new(); + // the stmt rule is skipping past EOLs too. however if there's nothing *except* for EOLs + // remaining for the rest of the block, we want to know about that head of time rather than + // let the statement rule handle it. + // so we handle a bunch of EOLs right here and now. + while self.mat(TokenKind::Eol)? { + continue; + } + while !self.check(TokenKind::RBrace) && !self.is_eof() { + let s = self.stmt()?; + if let Some(s) = s { + stmts.push(s); + } else { + break; + } + while self.mat(TokenKind::Eol)? { + continue; + } + } + self.expect("expect '}' after statement block", TokenKind::RBrace)?; + Ok(stmts) + } + + fn assign_stmt(&mut self) -> Result { + let name = self + .expect("expect name for assign statement", TokenKind::Name)? + .clone(); + self.expect("expect '=' after name", TokenKind::Eq)?; + let expr = self.expr()?; + if !self.check(TokenKind::RBrace) { + expect!( + self, + "expected end of line after assign statement", + TokenKind::Eol, + TokenKind::Eof + )?; + } + Ok(Box::new(AssignStmt { + lhs: name, + rhs: expr, + })) + } + + // + // Expressions + // + fn expr(&mut self) -> Result { + self.logical_or_expr() + } + + bin_expr!(logical_or_expr, logical_and_expr, TokenKind::Or); + + bin_expr!(logical_and_expr, equality_expr, TokenKind::And); + + bin_expr!( + equality_expr, + compare_expr, + TokenKind::BangEq, + TokenKind::EqEq + ); + + bin_expr!( + compare_expr, + binary_term, + TokenKind::Less, + TokenKind::LessEq, + TokenKind::Greater, + TokenKind::GreaterEq + ); + + bin_expr!( + binary_term, + binary_factor, + TokenKind::Plus, + TokenKind::Minus + ); + + bin_expr!(binary_factor, unary_expr, TokenKind::Star, TokenKind::Slash); + + fn unary_expr(&mut self) -> Result { + if mat!(self, TokenKind::Bang, TokenKind::Minus, TokenKind::Plus) { + let op = self.prev.clone().unwrap(); + let expr = self.unary_expr()?; + Ok(Box::new(UnaryExpr { op, expr })) + } else { + self.call_expr() + } + } + + fn call_expr(&mut self) -> Result { + let mut expr = self.primary_expr()?; + loop { + if self.mat(TokenKind::LParen)? { + expr = self.finish_call_expr(expr)?; + } else if self.mat(TokenKind::Dot)? { + let name = self + .expect("expect name after '.'", TokenKind::Name)? + .clone(); + expr = Box::new(GetExpr { expr, name }); + } else { + break; + } + } + Ok(expr) + } + + fn finish_call_expr(&mut self, callee: ExprP) -> Result { + let mut args = Vec::new(); + if !self.check(TokenKind::RParen) { + args.push(self.expr()?); + while self.mat(TokenKind::Comma)? { + // this allows a trailing comma + if self.check(TokenKind::RParen) { + break; + } + args.push(self.expr()?); + } + } + let rparen = self + .expect("expect ')' after function arguments", TokenKind::RParen)? + .clone(); + Ok(Box::new(CallExpr { + expr: callee, + args, + rparen, + })) + } + + fn primary_expr(&mut self) -> Result { + if mat!( + self, + TokenKind::Name, + TokenKind::Number, + TokenKind::String, + TokenKind::True, + TokenKind::False, + TokenKind::Nil + ) { + Ok(Box::new(PrimaryExpr { + token: self.prev.clone().unwrap(), + })) + } else if self.mat(TokenKind::LParen)? { + let expr: ExprP; + // check if we're defining a function + if self.check(TokenKind::RParen) { + expr = self.finish_function_expr()?; + } else if self.current.kind == TokenKind::Name + && (self.next.kind == TokenKind::RParen + || self.next.kind == TokenKind::Colon + || self.next.kind == TokenKind::Comma) + { + expr = self.finish_function_expr()?; + } else { + expr = self.expr()?; + self.expect("expect ')' after expression", TokenKind::RParen)?; + } + Ok(expr) + } else { + Err(self.error(format!("unexpected token {:?}", self.current.kind))) + } + } + + fn finish_function_expr(&mut self) -> Result { + let lparen = self.prev.clone().unwrap(); + + let mut params = Vec::new(); + if !self.check(TokenKind::RParen) { + self.parse_param(&mut params)?; + while self.mat(TokenKind::Comma)? { + if self.check(TokenKind::RParen) { + break; + } + self.parse_param(&mut params)?; + } + } + + self.expect( + "expect ')' after function definition parameters", + TokenKind::RParen, + )?; + + let mut return_type = None; + if self.mat(TokenKind::Arrow)? { + return_type = Some(self.expr()?); + } + + self.expect("expect '{' after function signature", TokenKind::LBrace)?; + let body = self.block()?; + let rbrace = self.prev.clone().unwrap(); + + Ok(Box::new(FunctionExpr { + lparen, + params, + return_type, + body, + rbrace, + })) + } + + fn parse_param(&mut self, params: &mut Vec<(Token, Option)>) -> Result<()> { + let name = self + .expect("expect name after function declaration", TokenKind::Name)? + .clone(); + let mut ty = None; + if self.mat(TokenKind::Colon)? { + ty = Some(self.expr()?); + } + params.push((name, ty)); + Ok(()) + } +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..ead0bdc --- /dev/null +++ b/src/token.rs @@ -0,0 +1,67 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum TokenKind { + // Keywords + Return, + If, + Else, + True, + False, + Nil, + + // Expressions + Name, + Number, + String, + + // Binary operators + Plus, + Minus, + Star, + Slash, + + // Unary operators (not already covered) + Bang, + + // Boolean operators + And, + Or, + + // Comparison + BangEq, + EqEq, + Greater, + GreaterEq, + Less, + LessEq, + + // Braces, parens, etc + LParen, + RParen, + LBrace, + RBrace, + LBracket, + RBracket, + + // Assignment + Eq, + + // Dot, comma + Dot, + Comma, + Arrow, + Colon, + + // Line end + Eol, + + // File end + Eof, +} + +#[derive(Debug, Clone)] +pub struct Token { + pub line: usize, + //pub index: usize, + pub text: String, + pub kind: TokenKind, +} diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..9056fd8 --- /dev/null +++ b/src/vm.rs @@ -0,0 +1,350 @@ +use std::sync::Arc; + +use crate::builtins; +use crate::obj::*; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Op { + // Stack functions + Pop, + PushConstant(LongOpArg), + + // Variables + GetLocal(LocalIndex), + SetLocal(LocalIndex), + GetGlobal(GlobalId), + SetGlobal(GlobalId), + + // Attributes + GetAttr(ConstantId), + SetAttr(ConstantId), + + // Jumps + Jump(JumpOpArg), + JumpFalse(JumpOpArg), + JumpTrue(JumpOpArg), + + // Functions + Call(Argc), + Return, + CloseOver { depth: ShortOpArg, slot: ShortOpArg }, + + // VM control + Halt, +} + +pub type LineRange = (usize, usize); + +type ShortOpArg = u16; +type LongOpArg = u32; + +pub type JumpOpArg = i32; +pub type LocalIndex = LongOpArg; +pub type LocalSlot = ShortOpArg; +pub type ConstantId = LongOpArg; +pub type GlobalId = LongOpArg; +pub type Argc = LongOpArg; +pub type FrameDepth = ShortOpArg; + +#[derive(Debug, Clone)] +pub struct Local { + pub(crate) slot: LocalSlot, + pub(crate) index: LocalIndex, + pub(crate) name: String, +} + +#[derive(Debug, Default, Clone)] +pub struct Chunk { + pub(crate) code: Vec, + pub(crate) lines: Vec, + pub(crate) locals: Vec, +} + +#[derive(Debug)] +pub struct Frame { + pub(crate) name: Arc, + pub(crate) chunk: Arc, + pub(crate) ip: usize, + pub(crate) stack_base: usize, +} + +impl Frame { + pub fn new(name: Arc, chunk: Arc, stack_base: usize) -> Self { + Self { + name, + chunk, + ip: 0, + stack_base, + } + } +} + +pub struct Vm { + constants: Vec, + //global_names: Vec, + globals: Vec, + stack: Vec, + frames: Vec, +} + +impl Vm { + /// Create a new virtual machine with the given chunk, constants, and global names. + pub fn new(chunk: Arc, constants: Vec, global_names: Vec) -> Self { + // set up globals + let nil = NilInst::create(); + let mut globals: Vec<_> = global_names + .iter() + .map(|_| Ptr::clone(&nil) as ObjP) + .collect(); + + let mut register_global = |name: &str, value: ObjP| { + let index = global_names + .iter() + .position(|global| global == name) + .expect("could not find global"); + globals[index] = value; + }; + + register_global( + "print", + BuiltinFunctionInst::create("print".to_string(), builtins::print, 1), + ); + register_global( + "println", + BuiltinFunctionInst::create("println".to_string(), builtins::println, 1), + ); + + // stack and frames + let stack = Vec::new(); + let frames = vec![Frame::new("__main__".to_string().into(), chunk, 0)]; + + Vm { + constants, + //global_names, + globals, + stack, + frames, + } + } + + /// Get the stack. + pub fn stack(&self) -> &Vec { + &self.stack + } + + /// Current stack frame. + pub fn frame(&self) -> &Frame { + self.frames.last().unwrap() + } + + /// Current stack frame, mutably. + pub fn frame_mut(&mut self) -> &mut Frame { + self.frames.last_mut().unwrap() + } + + /// Push a new stack frame. + pub fn push_frame(&mut self, frame: Frame) { + self.frames.push(frame); + } + + /// Pop the current stack frame. + pub fn pop_frame(&mut self) -> Frame { + self.frames.pop().expect("no frame") + } + + /// Gets the chunk of the currently executing frame. + pub fn chunk(&self) -> &Chunk { + &self.frame().chunk + } + + /// Instruction pointer of the current frame. + pub fn ip(&self) -> usize { + self.frame().ip + } + + /// Update the current instruction pointer. + pub fn set_ip(&mut self, ip: usize) { + self.frame_mut().ip = ip; + } + + /* + /// Gets the line of the current instruction. + fn line(&self, offset: isize) -> LineRange { + let index = (((self.ip() as isize) + offset).max(0) as usize).min(self.chunk().lines.len()); + self.chunk().lines[index] + } + */ + + /// Get the current instruction and advance the IP. + fn next(&mut self) -> Op { + let ip = self.ip(); + self.set_ip(ip + 1); + self.chunk().code[ip] + } + + /// Pop a value from the stack. + pub fn pop(&mut self) -> ObjP { + self.stack.pop().expect("stack empty") + } + + /// Peek the top value of the stack. + pub fn peek(&self) -> ObjP { + self.stack.last().map(Ptr::clone).expect("stack empty") + } + + /// Push a value to the stack. + pub fn push(&mut self, value: ObjP) { + self.stack.push(value); + } + + pub fn run(&mut self) { + loop { + match self.next() { + Op::Pop => { + self.pop(); + } + Op::PushConstant(constant_id) => { + let constant = Ptr::clone(&self.constants[constant_id as usize]); + self.push(constant); + } + Op::GetLocal(local_index) => { + let local = &self.chunk().locals[local_index as usize]; + let value = + Ptr::clone(&self.stack[self.frame().stack_base + local.slot as usize]); + self.push(value); + } + Op::SetLocal(local_index) => { + let value = self.pop(); + let local = &self.chunk().locals[local_index as usize]; + let index = self.frame().stack_base + local.slot as usize; + self.stack[index] = value; + } + Op::GetGlobal(global_index) => { + let value = Ptr::clone(&self.globals[global_index as usize]); + self.push(value); + } + Op::SetGlobal(global_index) => { + let value = self.pop(); + self.globals[global_index as usize] = value; + } + Op::GetAttr(constant_id) => { + // need both declarations to borrow cell value + let name_obj = Ptr::clone(&self.constants[constant_id as usize]); + let name = + with_obj_downcast(name_obj, |name: &StrInst| Arc::clone(&name.str_value())); + let owner = self.pop(); + let value = owner.try_read().unwrap().get_attr(&name); + if let Some(value) = value { + self.push(value); + } else { + // TODO Vm::run, Op::GetAttr - throw an exception when the attribute + // doesn't exist + // BLOCKED-ON: exceptions + todo!( + "throw an error because we couldn't read attr '{}' on '{}'", + name, + owner.try_read().unwrap(), + ); + } + } + Op::SetAttr(constant_id) => { + let name_obj = Ptr::clone(&self.constants[constant_id as usize]); + let name = + with_obj_downcast(name_obj, |name: &StrInst| Arc::clone(&name.str_value())); + let value = self.pop(); + let target = self.pop(); + + let mut target_ptr = target.try_write().unwrap(); + target_ptr.set_attr(&name, value); + } + Op::Jump(offset) => { + let base = (self.ip() - 1) as JumpOpArg; + assert!(base + offset > 0, "tried to jump to negative IP"); + self.set_ip((base + offset) as usize); + } + Op::JumpFalse(offset) => { + let base = (self.ip() - 1) as JumpOpArg; + let value = self.peek(); + if !value.try_read().unwrap().is_truthy() { + self.set_ip((base + offset) as usize); + } + } + Op::JumpTrue(offset) => { + let base = (self.ip() - 1) as JumpOpArg; + let value = self.peek(); + if value.try_read().unwrap().is_truthy() { + self.set_ip((base + offset) as usize); + } + } + Op::Call(argc) => { + let argc = argc as usize; + let index = self.stack.len() - argc - 1; + let fun_ptr = Ptr::clone(&self.stack[index]); + let fun_ptr = fun_ptr.try_read().unwrap(); + + let arity = if let Some(arity) = fun_ptr.arity() { + arity as usize + } else { + // TODO Vm::run, Op::Call - throw an exception when the value isn't + // callable + // BLOCKED-ON: exceptions + todo!("throw an error because we couldn't call {}", fun_ptr); + }; + + // Methods with bound "self" parameter + // argc may be mutated + let mut argc = argc; + if let Some(method) = fun_ptr.as_any().downcast_ref::() { + // shift all of the arguments over by one + // (duplicate the last item on the stack and then shift everyone else over) + self.stack + .insert(self.stack.len() - argc, Ptr::clone(method.self_binding())); + // also increment argc since we're specifying another arg + argc += 1; + } + // remove mutability + let argc = argc; + + if arity != argc { + // TODO Vm::run, Op::Call - throw an exception when the number of arguments + // does not match the function's arity + // BLOCKED-ON: exceptions + todo!( + "throw an error because we passed the wrong number of arguments to {}", + fun_ptr + ); + } + fun_ptr.call(self, argc as Argc); + } + Op::Return => { + let return_value = self.pop(); + let old_frame = self.frames.pop().unwrap(); + // stack_base is always going to be <= current stack size + self.stack + .resize_with(old_frame.stack_base, || unreachable!()); + // also pop the function object off of the stack + self.stack.pop(); + self.push(return_value); + } + Op::CloseOver { depth, slot } => { + // since we're closing over a value, and functions ultimately come from + // constants, we want to deep-clone this object so we don't alter any live + // objects. + // there is some room for optimization here so we aren't cloning the entire + // UserFunctionInst for every individual capture in a function. + let fun_ptr = self.pop(); + let mut fun: UserFunctionInst = + with_obj_downcast(fun_ptr, UserFunctionInst::clone); + let frame_index = self.frames.len() - (depth as usize) - 1; + let stack_base = self.frames[frame_index].stack_base; + let value = Ptr::clone(&self.stack[stack_base + (slot as usize)]); + fun.push_capture(value); + self.push(make_ptr(fun)); + } + Op::Halt => { + break; + } + } + } + } +}