Base initial commit

Still WIP, working on object system still, which in Rust, makes me want
to kill myself

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2024-09-20 16:04:30 -07:00
parent ccf6c9e939
commit 16f3dc960c
11 changed files with 4079 additions and 0 deletions

272
Cargo.lock generated Normal file
View File

@@ -0,0 +1,272 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anstream"
version = "0.6.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
[[package]]
name = "anstyle-parse"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "assert_matches"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9"
[[package]]
name = "clap"
version = "4.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "colorchoice"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
[[package]]
name = "common_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3f6d59c71e7dc3af60f0af9db32364d96a16e9310f3f5db2b55ed642162dd35"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "not-python-rs"
version = "0.1.0"
dependencies = [
"assert_matches",
"clap",
"common_macros",
"thiserror",
]
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

10
Cargo.toml Normal file
View File

@@ -0,0 +1,10 @@
[package]
name = "not-python-rs"
version = "0.1.0"
edition = "2021"
[dependencies]
assert_matches = "1.5.0"
clap = { version = "4.5.8", features = ["derive"] }
common_macros = "0.1.1"
thiserror = "1.0.63"

289
src/ast.rs Normal file
View File

@@ -0,0 +1,289 @@
// This is an auto-generated file. Any changes made to this file may be overwritten.
// This file was created at: 2024-09-18 09:28:21
#![allow(dead_code)]
use std::fmt::Debug;
use std::any::Any;
use crate::token::Token;
pub trait ExprVisitor {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<(), Box<dyn std::error::Error>>;
fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<(), Box<dyn std::error::Error>>;
fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<(), Box<dyn std::error::Error>>;
fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<(), Box<dyn std::error::Error>>;
fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<(), Box<dyn std::error::Error>>;
fn visit_function_expr(&mut self, expr: &FunctionExpr) -> Result<(), Box<dyn std::error::Error>>;
}
pub trait StmtVisitor {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<(), Box<dyn std::error::Error>>;
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<(), Box<dyn std::error::Error>>;
fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<(), Box<dyn std::error::Error>>;
fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<(), Box<dyn std::error::Error>>;
fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<(), Box<dyn std::error::Error>>;
fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<(), Box<dyn std::error::Error>>;
}
pub trait Expr: Debug + Any {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>;
fn as_any(self: Box<Self>) -> Box<dyn Any>;
fn as_any_ref(&self) -> &dyn Any;
}
pub type ExprP = Box<dyn Expr + 'static>;
#[derive(Debug)]
pub struct BinaryExpr {
pub lhs: ExprP,
pub op: Token,
pub rhs: ExprP,
}
impl Expr for BinaryExpr {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_binary_expr(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct UnaryExpr {
pub op: Token,
pub expr: ExprP,
}
impl Expr for UnaryExpr {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_unary_expr(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct CallExpr {
pub expr: ExprP,
pub args: Vec<ExprP>,
pub rparen: Token,
}
impl Expr for CallExpr {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_call_expr(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct GetExpr {
pub expr: ExprP,
pub name: Token,
}
impl Expr for GetExpr {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_get_expr(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct PrimaryExpr {
pub token: Token,
}
impl Expr for PrimaryExpr {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_primary_expr(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct FunctionExpr {
pub lparen: Token,
pub params: Vec<(Token , Option<ExprP>)>,
pub return_type: Option<ExprP>,
pub body: Vec<StmtP>,
pub rbrace: Token,
}
impl Expr for FunctionExpr {
fn accept(&self, visitor: &mut dyn ExprVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_function_expr(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
pub trait Stmt: Debug + Any {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>;
fn as_any(self: Box<Self>) -> Box<dyn Any>;
fn as_any_ref(&self) -> &dyn Any;
}
pub type StmtP = Box<dyn Stmt + 'static>;
#[derive(Debug)]
pub struct ExprStmt {
pub expr: ExprP,
}
impl Stmt for ExprStmt {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_expr_stmt(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct AssignStmt {
pub lhs: Token,
pub rhs: ExprP,
}
impl Stmt for AssignStmt {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_assign_stmt(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct SetStmt {
pub expr: ExprP,
pub name: Token,
pub rhs: ExprP,
}
impl Stmt for SetStmt {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_set_stmt(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct BlockStmt {
pub lbrace: Token,
pub stmts: Vec<StmtP>,
pub rbrace: Token,
}
impl Stmt for BlockStmt {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_block_stmt(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct ReturnStmt {
pub return_kw: Token,
pub expr: Option<ExprP>,
}
impl Stmt for ReturnStmt {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_return_stmt(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}
#[derive(Debug)]
pub struct IfStmt {
pub if_kw: Token,
pub condition: ExprP,
pub then_branch: BlockStmt,
pub else_branch: Vec<StmtP>,
}
impl Stmt for IfStmt {
fn accept(&self, visitor: &mut dyn StmtVisitor) -> Result<(), Box<dyn std::error::Error>>{
visitor.visit_if_stmt(self)
}
fn as_any(self: Box<Self>) -> Box<dyn Any> {
self
}
fn as_any_ref(&self) -> &dyn Any {
self
}
}

13
src/builtins.rs Normal file
View File

@@ -0,0 +1,13 @@
//! Builtin functions.
use crate::obj::{NilInst, ObjP};
use crate::vm::Vm;
pub(crate) fn println(_vm: &mut Vm, args: Vec<ObjP>) -> ObjP {
println!("{}", args[0].try_read().unwrap());
NilInst::create()
}
pub(crate) fn print(_vm: &mut Vm, args: Vec<ObjP>) -> ObjP {
print!("{}", args[0].try_read().unwrap());
NilInst::create()
}

988
src/compiler.rs Normal file
View File

@@ -0,0 +1,988 @@
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Display};
use std::sync::{Arc, LazyLock};
use assert_matches::assert_matches;
use common_macros::hash_map;
use thiserror::Error;
use crate::ast::*;
use crate::obj::*;
use crate::token::TokenKind;
use crate::vm::*;
pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
////////////////////////////////////////////////////////////////////////////////
// LineNumber visitor
////////////////////////////////////////////////////////////////////////////////
#[derive(Default)]
struct LineNumber {
lock_start: bool,
start: usize,
end: usize,
}
impl LineNumber {
fn update_start(&mut self, start: usize) {
if !self.lock_start {
self.start = start;
self.lock_start = true;
}
}
fn update_end(&mut self, end: usize) {
self.end = end;
}
}
impl StmtVisitor for LineNumber {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
stmt.expr.accept(self).unwrap();
Ok(())
}
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> {
self.update_start(stmt.lhs.line);
stmt.rhs.accept(self).unwrap();
Ok(())
}
fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> {
stmt.expr.accept(self).unwrap();
stmt.rhs.accept(self).unwrap();
Ok(())
}
fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> {
self.update_start(stmt.lbrace.line);
self.update_end(stmt.rbrace.line);
Ok(())
}
fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> {
self.update_start(stmt.return_kw.line);
self.update_end(stmt.return_kw.line);
if let Some(expr) = stmt.expr.as_ref() {
expr.accept(self).unwrap();
}
Ok(())
}
fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> {
self.update_start(stmt.if_kw.line);
stmt.condition.accept(self).unwrap();
stmt.then_branch.accept(self).unwrap();
for stmt in &stmt.else_branch {
stmt.accept(self).unwrap();
}
Ok(())
}
}
impl ExprVisitor for LineNumber {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> {
expr.lhs.accept(self).unwrap();
expr.rhs.accept(self).unwrap();
Ok(())
}
fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> {
self.update_start(expr.op.line);
expr.expr.accept(self).unwrap();
Ok(())
}
fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> {
expr.expr.accept(self).unwrap();
self.update_end(expr.rparen.line);
Ok(())
}
fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> {
expr.expr.accept(self).unwrap();
self.update_end(expr.name.line);
Ok(())
}
fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<()> {
self.update_start(expr.token.line);
self.update_end(expr.token.line);
Ok(())
}
fn visit_function_expr(&mut self, expr: &FunctionExpr) -> Result<()> {
self.update_start(expr.lparen.line);
self.update_end(expr.rbrace.line);
Ok(())
}
}
fn expr_line_number(expr: &dyn Expr) -> LineRange {
let mut line_number = LineNumber::default();
expr.accept(&mut line_number).unwrap();
(line_number.start, line_number.end)
}
fn stmt_line_number(stmt: &dyn Stmt) -> LineRange {
let mut line_number = LineNumber::default();
stmt.accept(&mut line_number).unwrap();
(line_number.start, line_number.end)
}
////////////////////////////////////////////////////////////////////////////////
// LocalAssignCollector and LocalNameCollector
////////////////////////////////////////////////////////////////////////////////
// TODO - reduce copy/paste stuff here?
#[derive(Default)]
struct LocalAssignCollector {
names: HashSet<String>,
}
impl LocalAssignCollector {
fn collect(body: &Vec<StmtP>) -> HashSet<String> {
let mut collector = Self::default();
for stmt in body {
stmt.accept(&mut collector).unwrap();
}
collector.names
}
}
impl StmtVisitor for LocalAssignCollector {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
stmt.expr.accept(self)?;
Ok(())
}
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> {
self.names.insert(stmt.lhs.text.to_string());
Ok(())
}
fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> {
stmt.expr.accept(self)?;
stmt.rhs.accept(self)?;
Ok(())
}
fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> {
// we visit the block statement because even though it goes below the current "local"
// scope, we're ultimately trying to get a list of ALL local names that are assigned to in
// this scope.
// FIXME BUG this does create some weirdness, for example take this:
// outer_function = () {
// some_value = 1234
// inner_function = () {
// {
// # this is a local value because we're assigning to it
// some_value = 5678
// }
// # our local named "some_value" has gone out of scope, so hypothetically we
// # should be using the "some_value" that was defined in the scope above us.
// # however, since we're collecting local assignments in all blocks, this should
// # error out as "unknown local 'some_value'"
// println(some_value)
// }
// return inner_function
// }
//
// Ideally, we would be checking nonlocals with every new scope layer, and every new block.
// This is a pretty tough bug to solve with how things are set up right now. not sure how
// we'll go about solving this one.
for stmt in &stmt.stmts {
stmt.accept(self)?;
}
Ok(())
}
fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> {
if let Some(expr) = stmt.expr.as_ref() {
expr.accept(self)?;
}
Ok(())
}
fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> {
stmt.condition.accept(self)?;
stmt.then_branch.accept(self)?;
for stmt in &stmt.else_branch {
stmt.accept(self)?;
}
Ok(())
}
}
impl ExprVisitor for LocalAssignCollector {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> {
expr.lhs.accept(self)?;
expr.rhs.accept(self)?;
Ok(())
}
fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> {
expr.expr.accept(self)?;
Ok(())
}
fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> {
expr.expr.accept(self)?;
Ok(())
}
fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> {
expr.expr.accept(self)?;
Ok(())
}
fn visit_primary_expr(&mut self, _expr: &PrimaryExpr) -> Result<()> {
Ok(())
}
fn visit_function_expr(&mut self, _expr: &FunctionExpr) -> Result<()> {
// don't visit function expr, we're only collecting local assigns
Ok(())
}
}
#[derive(Default)]
struct LocalNameCollector {
names: HashSet<String>,
}
impl LocalNameCollector {
fn collect(body: &Vec<StmtP>) -> HashSet<String> {
let mut collector = Self::default();
for stmt in body {
stmt.accept(&mut collector).unwrap();
}
collector.names
}
}
impl StmtVisitor for LocalNameCollector {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
stmt.expr.accept(self)?;
Ok(())
}
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> {
stmt.rhs.accept(self)?;
Ok(())
}
fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> {
stmt.expr.accept(self)?;
stmt.rhs.accept(self)?;
Ok(())
}
fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> {
for stmt in &stmt.stmts {
stmt.accept(self)?;
}
Ok(())
}
fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> {
if let Some(expr) = stmt.expr.as_ref() {
expr.accept(self)?;
}
Ok(())
}
fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> {
stmt.condition.accept(self)?;
stmt.then_branch.accept(self)?;
for stmt in &stmt.else_branch {
stmt.accept(self)?;
}
Ok(())
}
}
impl ExprVisitor for LocalNameCollector {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> {
expr.lhs.accept(self)?;
expr.rhs.accept(self)?;
Ok(())
}
fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> {
expr.expr.accept(self)?;
Ok(())
}
fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> {
expr.expr.accept(self)?;
Ok(())
}
fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> {
expr.expr.accept(self)?;
Ok(())
}
fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<()> {
if expr.token.kind == TokenKind::Name {
self.names.insert(expr.token.text.to_string());
}
Ok(())
}
fn visit_function_expr(&mut self, _expr: &FunctionExpr) -> Result<()> {
// don't visit function expr, we're only collecting local assigns
Ok(())
}
}
////////////////////////////////////////////////////////////////////////////////
// Misc
////////////////////////////////////////////////////////////////////////////////
fn unescape(s: &str) -> String {
s.chars()
.skip(1)
.take(s.len() - 2) // first and last chars are guaranteed to be 1 byte long
.collect::<String>()
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\\"", "\"")
.replace("\\\'", "\'")
.replace("\\\\", "\\")
}
////////////////////////////////////////////////////////////////////////////////
// Scope
////////////////////////////////////////////////////////////////////////////////
#[derive(Debug, PartialEq)]
enum ScopeKind {
Local,
Function,
//Class,
}
#[derive(Debug)]
struct Scope {
kind: ScopeKind,
scope: Vec<Local>,
}
impl Scope {
pub fn new(kind: ScopeKind) -> Self {
Self {
kind,
scope: Default::default(),
}
}
}
////////////////////////////////////////////////////////////////////////////////
// CompileError
////////////////////////////////////////////////////////////////////////////////
#[derive(Error, Debug)]
pub struct CompileError {
pub line: Option<LineRange>,
pub message: String,
}
impl Display for CompileError {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
if let Some(line) = &self.line {
write!(fmt, "line {:?}: {}", line, self.message)
} else {
write!(fmt, "{}", self.message)
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Compiler
////////////////////////////////////////////////////////////////////////////////
#[derive(Debug)]
pub struct Compiler {
chunks: Vec<Chunk>,
scopes: Vec<Scope>,
constants: Vec<ObjP>,
globals: Vec<String>,
}
impl Default for Compiler {
fn default() -> Self {
Compiler {
chunks: Default::default(),
scopes: Default::default(),
constants: Default::default(),
globals: vec!["print".to_string(), "println".to_string()],
}
}
}
impl Compiler {
fn chunk(&self) -> &Chunk {
self.chunks.last().expect("no chunk")
}
fn chunk_mut(&mut self) -> &mut Chunk {
self.chunks.last_mut().expect("no chunk")
}
fn scope(&self) -> &Scope {
self.scopes.last().expect("no scope")
}
fn scope_mut(&mut self) -> &mut Scope {
self.scopes.last_mut().expect("no scope")
}
fn is_global_scope(&self) -> bool {
self.scopes.is_empty()
}
/// Compiles a body of code.
///
/// This returns a tuple of `Chunk`, the constants table, and the list of globals.
pub fn compile(mut self, body: &Vec<StmtP>) -> Result<(Chunk, Vec<ObjP>, Vec<String>)> {
self.chunks.push(Chunk::default());
for stmt in body {
self.compile_stmt(stmt)?;
}
// add halt instruction with last line, if any
let mut last_line = (0, 0);
if let Some(last) = body.last() {
last_line = stmt_line_number(last.as_ref());
}
self.emit(last_line, Op::Halt);
let chunk = self.chunks.pop().expect("no chunk");
Ok((chunk, self.constants, self.globals))
}
fn compile_stmt(&mut self, stmt: &StmtP) -> Result<()> {
stmt.accept(self)
}
fn compile_expr(&mut self, expr: &ExprP) -> Result<()> {
expr.accept(self)
}
fn insert_constant(&mut self, constant: ObjP) -> Result<ConstantId> {
// simple interning - try to find a constant that is exactly equal to this one and just
// return its value instead
for (index, interned) in self.constants.iter().enumerate() {
if constant
.try_read()
.unwrap()
.equals(&*interned.try_read().unwrap())
{
return Ok(index as ConstantId);
}
}
let index = self.constants.len();
if index > (ConstantId::MAX as usize) {
return Err(CompileError {
line: None,
message: format!("too many constants (maximum {})", ConstantId::MAX),
}
.into());
}
self.constants.push(constant);
Ok(index as ConstantId)
}
fn get_global(&self, name: &str) -> Option<GlobalId> {
self.globals
.iter()
.position(|global| global == &name)
.map(|id| id as GlobalId)
}
fn insert_global(&mut self, name: &str) -> Result<GlobalId> {
if let Some(id) = self.get_global(name) {
return Ok(id);
}
let index = self.globals.len();
if index > (GlobalId::MAX as usize) {
return Err(CompileError {
line: None,
message: format!("too many globals (maximum {})", GlobalId::MAX),
}
.into());
}
self.globals.push(name.to_string());
Ok(index as GlobalId)
}
/// Get a nonlocal binding to a variable.
///
/// This will return how many stack frames up we should look for this nonlocal, the `Local`
/// that defines this binding.
fn get_nonlocal(&self, name: &str) -> Option<(FrameDepth, &Local)> {
let mut is_local = true;
let mut depth = 0;
for scope in self.scopes.iter().rev() {
if scope.kind == ScopeKind::Function {
// no longer inside the local scope
if is_local {
is_local = false;
continue;
}
// increase stack frame search
depth += 1;
}
// skip local variables
if is_local {
continue;
}
// outside of the local scope, check if we hvae defined the sought-after name
for local in &scope.scope {
if local.name == name {
return Some((depth, local));
}
}
}
None
}
fn get_local(&self, name: &str) -> Option<&Local> {
for scope in self.scopes.iter().rev() {
for local in &scope.scope {
if local.name == name {
return Some(local);
}
}
if scope.kind == ScopeKind::Function {
break;
}
}
None
}
fn insert_local(&mut self, name: String) -> Result<&Local> {
let index = self.chunk().locals.len();
if index > (LocalIndex::MAX as usize) {
return Err(CompileError {
line: None,
message: format!("too many locals (maximum: {})", LocalIndex::MAX),
}
.into());
}
let mut local = Local {
slot: 0,
index: index as LocalIndex,
name,
};
// get the last allocated slot
for scope in self.scopes.iter().rev() {
if scope.scope.len() == 0 {
if scope.kind == ScopeKind::Function {
// don't go above the current function's scope (which was just determined to be
// empty)
break;
}
continue;
}
// get the last allocated slot and increment by one
let last = &scope.scope.last().unwrap();
if last.slot == LocalSlot::MAX {
return Err(CompileError {
line: None,
message: format!(
"too many stack slots used by locals(maximum: {})",
LocalSlot::MAX
),
}
.into());
}
local.slot = last.slot + 1;
break;
}
self.scope_mut().scope.push(local.clone());
self.chunk_mut().locals.push(local);
Ok(self.scope().scope.last().unwrap())
}
fn begin_scope(&mut self, kind: ScopeKind) {
self.scopes.push(Scope::new(kind));
}
fn end_scope(&mut self, line: LineRange) {
let scope = self.scopes.pop().expect("no scope");
for _local in scope.scope {
self.emit(line, Op::Pop);
}
}
fn emit(&mut self, line: LineRange, op: Op) {
let chunk = self.chunk_mut();
chunk.code.push(op);
chunk.lines.push(line);
}
}
impl StmtVisitor for Compiler {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
self.compile_expr(&stmt.expr)?;
self.emit(stmt_line_number(stmt), Op::Pop);
Ok(())
}
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> {
let name = &stmt.lhs.text;
if self.is_global_scope() {
let global = self.insert_global(name)?;
self.compile_expr(&stmt.rhs)?;
self.emit(stmt_line_number(stmt), Op::SetGlobal(global));
} else {
let mut declare = false;
let local = if let Some(local) = self.get_local(name) {
local
} else {
declare = true;
self.insert_local(name.to_string())?
}
.clone(); // gotta clone so we can borrow self as mutable for compile_expr
self.compile_expr(&stmt.rhs)?;
if !declare {
self.emit(stmt_line_number(stmt), Op::SetLocal(local.index));
}
}
// If the last value that was assigned to is a function, set its name here
// TODO - maybe this would be smarter to set up in the AST. I'm 99% sure that the last
// object created, if it were a function object, will be what we're assigning it to, but I
// want to be 100% sure instead of 99%.
let mut obj = self.constants.last_mut().unwrap().try_write().unwrap();
if let Some(fun) = obj.as_any_mut().downcast_mut::<UserFunctionInst>() {
fun.set_name(Arc::new(name.to_string()));
}
Ok(())
}
fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> {
self.compile_expr(&stmt.expr)?;
let name = self.insert_constant(StrInst::create(&stmt.name.text))?;
self.compile_expr(&stmt.rhs)?;
self.emit(stmt_line_number(stmt), Op::SetAttr(name));
Ok(())
}
fn visit_block_stmt(&mut self, stmt: &BlockStmt) -> Result<()> {
self.begin_scope(ScopeKind::Local);
for s in &stmt.stmts {
self.compile_stmt(s)?;
}
self.end_scope((stmt.rbrace.line, stmt.rbrace.line));
Ok(())
}
fn visit_return_stmt(&mut self, stmt: &ReturnStmt) -> Result<()> {
if let Some(expr) = &stmt.expr {
self.compile_expr(expr)?;
} else {
let nil = self.insert_constant(NilInst::create())?;
self.emit(stmt_line_number(stmt), Op::PushConstant(nil));
}
Ok(())
}
fn visit_if_stmt(&mut self, stmt: &IfStmt) -> Result<()> {
// condition
self.compile_expr(&stmt.condition)?;
// call obj.__bool__()
let bool_attr = self.insert_constant(StrInst::create("__bool__"))?;
self.emit(expr_line_number(&*stmt.condition), Op::GetAttr(bool_attr));
self.emit(expr_line_number(&*stmt.condition), Op::Call(0));
let condition_patch_index = self.chunk().code.len();
self.emit(expr_line_number(&*stmt.condition), Op::JumpFalse(0));
// then branch
// pop the condition on top of the stack (no jump taken)
self.emit(expr_line_number(&*stmt.condition), Op::Pop);
// not using compile_stmt because then_branch isn't a pointer, it's an honest-to-goodness
// value
stmt.then_branch.accept(self)?;
let exit_patch_index = self.chunk().code.len();
self.emit(stmt_line_number(&stmt.then_branch), Op::Jump(0));
// else branch
// patch the condition index - this is where the JUMP_FALSE will jump to
assert_matches!(self.chunk().code[condition_patch_index], Op::JumpFalse(_));
let offset = self.chunk().code.len() - condition_patch_index;
assert!(
offset <= (JumpOpArg::MAX as usize),
"jump offset too large between lines {:?} - this is a compiler limitation, sorry",
stmt_line_number(&stmt.then_branch)
);
self.chunk_mut().code[condition_patch_index] = Op::JumpFalse(offset as JumpOpArg);
// pop the condition on top of the stack (jump taken)
self.emit(expr_line_number(&*stmt.condition), Op::Pop);
for s in &stmt.else_branch {
self.compile_stmt(s)?;
}
// patch the "then" branch exit jump address - this is where Op::Jump will jump to.
// TODO : see if we can eliminate duplicates by checking the last two instructions
assert_matches!(self.chunk().code[exit_patch_index], Op::Jump(_));
let offset = self.chunk().code.len() - condition_patch_index;
assert!(
offset <= (JumpOpArg::MAX as usize),
"jump offset too large between lines {:?} - this is a compiler limitation, sorry",
stmt_line_number(&stmt.then_branch)
);
self.chunk_mut().code[exit_patch_index] = Op::Jump(offset as JumpOpArg);
Ok(())
}
}
impl ExprVisitor for Compiler {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> {
static OP_NAMES: LazyLock<HashMap<TokenKind, &'static str>> = LazyLock::new(|| {
hash_map! {
TokenKind::Plus => "__add__",
TokenKind::Minus => "__sub__",
TokenKind::Star => "__mul__",
TokenKind::Slash => "__div__",
TokenKind::And => "__and__",
TokenKind::Or => "__or__",
TokenKind::BangEq => "__ne__",
TokenKind::EqEq => "__eq__",
TokenKind::Greater => "__gt__",
TokenKind::GreaterEq => "__ge__",
TokenKind::Less => "__lt__",
TokenKind::LessEq => "__le__",
}
});
self.compile_expr(&expr.lhs)?;
// short-circuit setup
let mut exit_patch_index = 0;
if let TokenKind::And | TokenKind::Or = expr.op.kind {
let constant_id = self.insert_constant(StrInst::create("__bool__"))?;
self.emit(expr_line_number(&*expr.lhs), Op::GetAttr(constant_id));
self.emit(expr_line_number(&*expr.lhs), Op::Call(0));
exit_patch_index = self.chunk().code.len();
if expr.op.kind == TokenKind::And {
self.emit((expr.op.line, expr.op.line), Op::JumpFalse(0));
} else {
self.emit((expr.op.line, expr.op.line), Op::JumpTrue(0));
}
}
let name = OP_NAMES
.get(&expr.op.kind)
.expect("invalid binary operator");
let constant_id = self.insert_constant(StrInst::create(name))?;
self.emit(expr_line_number(expr), Op::GetAttr(constant_id));
// convert RHS to a bool if we're doing AND or OR
if let TokenKind::And | TokenKind::Or = expr.op.kind {
let constant_id = self.insert_constant(StrInst::create("__bool__"))?;
self.emit(expr_line_number(&*expr.rhs), Op::GetAttr(constant_id));
self.emit(expr_line_number(&*expr.rhs), Op::Call(0));
}
// call operator function
self.emit(expr_line_number(expr), Op::Call(1));
// patch exit if we're doing a short circuit
if exit_patch_index != 0 {
assert_matches!(
self.chunk().code[exit_patch_index],
Op::JumpTrue(_) | Op::JumpFalse(_)
);
let offset = self.chunk().code.len() - exit_patch_index;
// don't worry about doing a check on if offset is small enough for JumpOpArg, if you
// have 4 billion instructions between jumps that is probably your own fault
let new_op = match self.chunk().code[exit_patch_index] {
Op::JumpTrue(_) => Op::JumpTrue(offset as JumpOpArg),
Op::JumpFalse(_) => Op::JumpFalse(offset as JumpOpArg),
_ => unreachable!(),
};
self.chunk_mut().code[exit_patch_index] = new_op;
}
Ok(())
}
fn visit_unary_expr(&mut self, expr: &UnaryExpr) -> Result<()> {
static OP_NAMES: LazyLock<HashMap<TokenKind, &'static str>> = LazyLock::new(|| {
hash_map! {
TokenKind::Plus => "__pos__",
TokenKind::Minus => "__neg__",
TokenKind::Bang => "__not__",
}
});
self.compile_expr(&expr.expr)?;
let name = OP_NAMES.get(&expr.op.kind).expect("invalid unary operator");
let constant_id = self.insert_constant(StrInst::create(name))?;
self.emit(expr_line_number(expr), Op::GetAttr(constant_id));
self.emit(expr_line_number(expr), Op::Call(0));
Ok(())
}
fn visit_call_expr(&mut self, expr: &CallExpr) -> Result<()> {
self.compile_expr(&expr.expr)?;
for arg in &expr.args {
self.compile_expr(arg)?;
}
if expr.args.len() > (Argc::MAX as usize) {
return Err(CompileError {
line: Some(expr_line_number(expr)),
message: format!("too many function arguments (maximum: {})", Argc::MAX),
}
.into());
}
self.emit(expr_line_number(expr), Op::Call(expr.args.len() as Argc));
Ok(())
}
fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> {
self.compile_expr(&expr.expr)?;
let constant_id = self.insert_constant(StrInst::create(&expr.name.text))?;
self.emit(expr_line_number(expr), Op::GetAttr(constant_id));
Ok(())
}
fn visit_primary_expr(&mut self, expr: &PrimaryExpr) -> Result<()> {
match expr.token.kind {
TokenKind::Name => {
let name = &expr.token.text;
// check if there's a local with this name, otherwise check globals
if let Some(local) = self.get_local(name) {
self.emit(expr_line_number(expr), Op::GetLocal(local.index));
} else {
let global = self.get_global(name).ok_or_else(|| CompileError {
line: Some(expr_line_number(expr)),
message: if self.is_global_scope() {
format!("unknown global {}", name)
} else {
format!("unknown local {}", name)
},
})?;
self.emit(expr_line_number(expr), Op::GetGlobal(global));
}
}
TokenKind::Number => {
let obj = if expr.token.text.contains('.') {
FloatInst::create(expr.token.text.parse().unwrap()) as ObjP
} else {
IntInst::create(expr.token.text.parse().unwrap()) as ObjP
};
let constant_id = self.insert_constant(obj)?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
TokenKind::String => {
let constant_id =
self.insert_constant(StrInst::create(unescape(&expr.token.text)))?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
TokenKind::True | TokenKind::False => {
let constant_id =
self.insert_constant(BoolInst::create(expr.token.kind == TokenKind::True))?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
TokenKind::Nil => {
let constant_id = self.insert_constant(NilInst::create())?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
_ => unreachable!(),
}
Ok(())
}
fn visit_function_expr(&mut self, expr: &FunctionExpr) -> Result<()> {
let end_line = (expr.rbrace.line, expr.rbrace.line);
self.begin_scope(ScopeKind::Function);
self.chunks.push(Chunk::default());
let mut locals: HashSet<String> = Default::default();
for (param, _ty) in &expr.params {
// register all params as locals
locals.insert(param.text.to_string());
// also insert them as locals in the scope
self.insert_local(param.text.to_string())?;
}
// closures: figure out all other locals that are assigned to in the function
for local in LocalAssignCollector::collect(&expr.body) {
locals.insert(local);
}
// figure out all nonlocals being used, and then re-register them as locals
// when a user function is called, all values of the nonlocal are pushed to the top of the
// stack on top of the function parameters.
let all_names = LocalNameCollector::collect(&expr.body);
// these are the nonlocals that we're copying/re-registering as locals
let mut captures: HashMap<String, Local> = Default::default();
let mut nonlocals: HashMap<String, (FrameDepth, Local)> = Default::default();
for name in &all_names {
// already registered as a local
if locals.contains(name) {
continue;
}
// already captured
if captures.contains_key(name) {
continue;
}
if let Some((depth, nonlocal)) = self.get_nonlocal(name) {
let nonlocal = nonlocal.clone();
nonlocals.insert(name.to_string(), (depth, nonlocal));
captures.insert(
name.to_string(),
self.insert_local(name.to_string())?.clone(),
);
}
}
// compile body
for stmt in &expr.body {
self.compile_stmt(stmt)?;
}
// always end with a "return nil"
let nil = self.insert_constant(NilInst::create())?;
self.emit(end_line, Op::PushConstant(nil));
self.emit(end_line, Op::Return);
self.end_scope(end_line);
// create the function
let chunk = self.chunks.pop().unwrap();
let fun = UserFunctionInst::create(chunk, expr.params.len() as Argc);
// register the function as a constant
let fun_constant = self.insert_constant(fun)?;
self.emit(expr_line_number(expr), Op::PushConstant(fun_constant));
// close over the captured values
for (depth, local) in nonlocals.values() {
self.emit(
expr_line_number(expr),
Op::CloseOver {
depth: *depth,
slot: local.slot,
},
);
}
Ok(())
}
}

160
src/disassemble.rs Normal file
View File

@@ -0,0 +1,160 @@
use crate::obj::{ObjP, UserFunctionInst};
use crate::vm::{Chunk, JumpOpArg, Op};
type Row = (String, String, &'static str, String, String);
fn disassemble_chunk(chunk: &Chunk, constants: &Vec<ObjP>, globals: &Vec<String>) {
let mut rows: Vec<Row> = vec![(
"ADDR".into(),
"LINE".into(),
"OP".into(),
"ARG".into(),
"INFO".into(),
)];
for (index, op) in chunk.code.iter().enumerate() {
let (start_line, end_line) = chunk.lines[index];
let addr: String = index.to_string();
let line = if start_line == end_line {
start_line.to_string()
} else {
format!("{start_line}-{end_line}")
};
let op_str: &'static str;
let arg: String;
let info: String;
match op {
Op::Pop => {
op_str = "POP";
arg = String::new();
info = String::new();
}
Op::PushConstant(constant_id) => {
op_str = "PUSH_CONSTANT";
arg = format!("{}", &constants[*constant_id as usize].try_read().unwrap());
info = format!("(constant ID {constant_id})");
}
Op::GetLocal(local_id) => {
op_str = "GET_LOCAL";
let local = &chunk.locals[*local_id as usize];
arg = local.name.to_string();
info = format!("(slot {}, local ID {})", local.slot, local.index);
}
Op::SetLocal(local_id) => {
op_str = "SET_LOCAL";
let local = &chunk.locals[*local_id as usize];
arg = local.name.to_string();
info = format!("(slot {}, local ID {})", local.slot, local.index);
}
Op::GetGlobal(global_id) => {
op_str = "GET_GLOBAL";
arg = globals[*global_id as usize].clone();
info = format!("(global ID {global_id})");
}
Op::SetGlobal(global_id) => {
op_str = "SET_GLOBAL";
arg = globals[*global_id as usize].clone();
info = format!("(global ID {global_id})");
}
Op::GetAttr(constant_id) => {
op_str = "GET_ATTR";
arg = format!("{}", &constants[*constant_id as usize].try_read().unwrap());
info = format!("(constant ID {constant_id})");
}
Op::SetAttr(constant_id) => {
op_str = "SET_ATTR";
arg = format!("{}", &constants[*constant_id as usize].try_read().unwrap());
info = format!("(constant ID {constant_id})");
}
Op::Jump(jump_offset) => {
op_str = "JUMP";
arg = format!("{}", jump_offset);
info = format!("(address {})", (index as JumpOpArg) + *jump_offset);
}
Op::JumpFalse(jump_offset) => {
op_str = "JUMP_FALSE";
arg = format!("{}", jump_offset);
info = format!("(address {})", (index as JumpOpArg) + *jump_offset);
}
Op::JumpTrue(jump_offset) => {
op_str = "JUMP_FALSE";
arg = format!("{}", jump_offset);
info = format!("(address {})", (index as JumpOpArg) + *jump_offset);
}
Op::Call(argc) => {
op_str = "CALL";
arg = format!("argc {argc}");
info = String::new();
}
Op::Return => {
op_str = "RETURN";
arg = String::new();
info = String::new();
}
Op::CloseOver { depth, slot } => {
op_str = "CLOSE_OVER";
arg = format!("{depth}");
info = format!("slot {slot} (name unknown)");
}
Op::Halt => {
op_str = "HALT";
arg = String::new();
info = String::new();
}
}
rows.push((addr, line, op_str, arg, info));
}
display_rows(&rows);
}
fn display_rows(rows: &Vec<Row>) {
// get the longest width of each row
let mut addr_width = 0;
let mut line_width = 0;
let mut op_width = 0;
let mut arg_width = 0;
let mut info_width = 0;
for (addr, line, op, arg, info) in rows {
addr_width = addr_width.max(addr.len());
line_width = line_width.max(line.len());
op_width = op_width.max(op.len());
arg_width = arg_width.max(arg.len());
info_width = info_width.max(info.len());
}
addr_width += 2;
line_width += 2;
op_width += 2;
arg_width += 2;
info_width += 2;
for (addr, line, op, arg, info) in rows {
println!(
"{addr:>addr_width$} {line:>line_width$} {op:>op_width$} {arg:arg_width$} {info:info_width$}"
);
}
}
pub fn disassemble(chunk: &Chunk, constants: &Vec<ObjP>, globals: &Vec<String>) {
println!("== main chunk");
println!();
disassemble_chunk(chunk, constants, globals);
for constant in constants {
let borrowed = constant.try_read().unwrap();
if let Some(fun) = borrowed.as_any().downcast_ref::<UserFunctionInst>() {
println!();
println!(
"== {} starting on line {}",
fun.name(),
fun.chunk().lines[0].0
);
println!();
disassemble_chunk(fun.chunk(), constants, globals);
}
}
}

70
src/main.rs Normal file
View File

@@ -0,0 +1,70 @@
// trait_upcasting - https://github.com/rust-lang/rust/issues/65991
// stabilization in progress
#![feature(trait_upcasting)]
mod ast;
mod builtins;
mod compiler;
mod disassemble;
mod obj;
mod parser;
mod token;
mod vm;
use std::fmt;
use std::fs::File;
use std::io::prelude::*;
use std::path::PathBuf;
use clap::Parser as ClapParser;
use thiserror::Error;
#[derive(ClapParser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
#[arg(short, long, help = "Dump program disassembly and exit")]
disassemble: bool,
#[arg(help = "The path of the file to run")]
path: PathBuf,
}
#[derive(Debug, Error)]
struct ProgramError(String);
impl fmt::Display for ProgramError {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.0)
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
let mut file = File::open(&args.path)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
let mut parser = parser::Parser::new(contents, &args.path)?;
let ast = parser.parse_all()?;
if parser.was_error() {
return Err(ProgramError("error occurred, exiting".to_string()).into());
}
// initialize type system
obj::init_types();
// compile
let (chunk, constants, globals) = compiler::Compiler::default().compile(&ast)?;
if args.disassemble {
disassemble::disassemble(&chunk, &constants, &globals);
return Ok(());
}
// run
let mut vm = vm::Vm::new(chunk.into(), constants, globals);
vm.run();
Ok(())
}

1059
src/obj.rs Normal file

File diff suppressed because it is too large Load Diff

801
src/parser.rs Normal file
View File

@@ -0,0 +1,801 @@
use common_macros::hash_map;
use thiserror::Error;
use std::collections::HashMap;
use std::fmt::{self, Display};
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use crate::ast::*;
use crate::token::{Token, TokenKind};
////////////////////////////////////////////////////////////////////////////////
// ParseError
////////////////////////////////////////////////////////////////////////////////
#[derive(Error, Debug)]
pub struct ParseError {
pub message: String,
pub line: usize,
pub path: PathBuf,
}
pub type Result<T> = std::result::Result<T, ParseError>;
impl Display for ParseError {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(
fmt,
"in {} at line {}: {}",
self.path.as_os_str().to_str().unwrap(),
self.line,
self.message
)
}
}
////////////////////////////////////////////////////////////////////////////////
// Constants
////////////////////////////////////////////////////////////////////////////////
const WHITESPACE: &str = " \t\r";
const NAME_START_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
const NAME_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789-";
const NUMBER_START_CHARS: &str = "0123456789";
const NUMBER_CHARS: &str = "0123456789.";
const STRING_START_CHARS: &str = "'\"";
const STRING_ESCAPES: &str = "nrt\\\"'";
////////////////////////////////////////////////////////////////////////////////
// Lexer
////////////////////////////////////////////////////////////////////////////////
#[derive(Debug)]
pub struct Lexer {
line: usize,
index: usize,
start: usize,
text: String,
path: PathBuf,
paren_stack: Vec<char>,
was_error: bool,
}
impl Lexer {
pub fn new(text: String, path: &dyn AsRef<Path>) -> Self {
Self {
line: 1,
index: 1,
start: 0,
text,
path: path.as_ref().into(),
paren_stack: Vec::new(),
was_error: false,
}
}
pub fn is_eof(&self) -> bool {
self.index >= self.text.len()
}
pub fn lexeme(&self) -> &str {
&self.text[self.start..self.index - 1]
}
pub fn was_error(&self) -> bool {
self.was_error
}
fn current(&self) -> char {
if self.is_eof() {
return '\0';
}
self.text[self.index - 1..].chars().nth(0).unwrap()
}
fn ignore_newlines(&self) -> bool {
self.paren_stack.len() > 0 && self.paren_stack.last() != Some(&'}')
}
fn error(&mut self, message: impl ToString) -> ParseError {
self.was_error = true;
ParseError {
message: message.to_string(),
line: self.line,
path: self.path.clone(),
}
}
fn advance(&mut self) {
if self.is_eof() {
return;
}
if self.current() == '\n' {
self.line += 1;
}
self.index += self.text[self.index - 1..]
.chars()
.nth(0)
.unwrap()
.len_utf8();
}
fn mat(&mut self, c: char) -> bool {
if self.current() == c {
self.advance();
return true;
} else {
return false;
}
}
fn skip_whitespace(&mut self) {
while WHITESPACE.contains(self.current())
|| (self.current() == '\n' && self.ignore_newlines())
|| self.current() == '#'
{
if self.current() == '#' {
self.advance();
while self.current() != '\n' && !self.is_eof() {
self.advance();
}
self.mat('\n');
} else {
self.advance();
}
}
self.start = self.index - 1;
}
fn make_token(&mut self, kind: TokenKind) -> Token {
let token = Token {
line: self.line,
//index: self.start,
text: self.lexeme().to_string(),
kind,
};
self.start = self.index - 1;
token
}
pub fn next(&mut self) -> Result<Token> {
self.skip_whitespace();
if self.is_eof() {
return Ok(self.make_token(TokenKind::Eof));
} else if NAME_START_CHARS.contains(self.current()) {
return Ok(self.name());
} else if NUMBER_START_CHARS.contains(self.current()) {
return Ok(self.number());
} else if STRING_START_CHARS.contains(self.current()) {
return self.string();
} else if self.mat('+') {
return Ok(self.make_token(TokenKind::Plus));
} else if self.mat('-') {
if self.mat('>') {
return Ok(self.make_token(TokenKind::Arrow));
} else {
return Ok(self.make_token(TokenKind::Minus));
}
} else if self.mat('*') {
return Ok(self.make_token(TokenKind::Star));
} else if self.mat('/') {
return Ok(self.make_token(TokenKind::Slash));
} else if self.mat('&') {
if self.mat('&') {
return Ok(self.make_token(TokenKind::And));
}
} else if self.mat('|') {
if self.mat('|') {
return Ok(self.make_token(TokenKind::Or));
}
} else if self.mat('!') {
if self.mat('=') {
return Ok(self.make_token(TokenKind::BangEq));
} else {
return Ok(self.make_token(TokenKind::Bang));
}
} else if self.mat('=') {
if self.mat('=') {
return Ok(self.make_token(TokenKind::EqEq));
} else {
return Ok(self.make_token(TokenKind::Eq));
}
} else if self.mat('<') {
if self.mat('=') {
return Ok(self.make_token(TokenKind::LessEq));
} else {
return Ok(self.make_token(TokenKind::Less));
}
} else if self.mat('>') {
if self.mat('=') {
return Ok(self.make_token(TokenKind::Greater));
} else {
return Ok(self.make_token(TokenKind::GreaterEq));
}
} else if self.mat('(') {
self.paren_stack.push(')');
return Ok(self.make_token(TokenKind::LParen));
} else if self.mat(')') {
return match self.paren_stack.last() {
None => Err(self.error("')' has unmatched '('")),
Some(')') => {
self.paren_stack.pop();
Ok(self.make_token(TokenKind::RParen))
}
Some(c) => Err(self.error(format!("mismatched ')' (expected {:?})", c))),
};
} else if self.mat('{') {
self.paren_stack.push('}');
return Ok(self.make_token(TokenKind::LBrace));
} else if self.mat('}') {
return match self.paren_stack.last() {
None => Err(self.error("'}' has unmatched '{'")),
Some('}') => {
self.paren_stack.pop();
Ok(self.make_token(TokenKind::RBrace))
}
Some(c) => Err(self.error(format!("mismatched '}}' (expected {:?})", c))),
};
} else if self.mat('[') {
self.paren_stack.push(']');
return Ok(self.make_token(TokenKind::LBracket));
} else if self.mat(']') {
return match self.paren_stack.last() {
None => Err(self.error("']' has unmatched '['")),
Some(']') => {
self.paren_stack.pop();
Ok(self.make_token(TokenKind::RBracket))
}
Some(c) => Err(self.error(format!("mismatched ']' (expected {:?})", c))),
};
} else if self.mat('.') {
return Ok(self.make_token(TokenKind::Dot));
} else if self.mat(',') {
return Ok(self.make_token(TokenKind::Comma));
} else if self.mat(':') {
return Ok(self.make_token(TokenKind::Colon));
} else if self.mat('\n') {
assert!(!self.ignore_newlines());
// fix the line number since it will have already advanced when we make the token
self.line -= 1;
let token = self.make_token(TokenKind::Eol);
self.line += 1;
return Ok(token);
} else if self.mat(';') {
return Ok(self.make_token(TokenKind::Eol));
}
Err(self.error(format!("unexpected character: {:?}", self.current())))
}
fn name(&mut self) -> Token {
static KEYWORDS: OnceLock<HashMap<&'static str, TokenKind>> = OnceLock::new();
let keywords = KEYWORDS.get_or_init(|| {
hash_map! {
"return" => TokenKind::Return,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"true" => TokenKind::True,
"false" => TokenKind::False,
"nil" => TokenKind::Nil,
}
});
while NAME_CHARS.contains(self.current()) {
self.advance();
}
if let Some(kind) = keywords.get(self.lexeme()) {
self.make_token(*kind)
} else {
self.make_token(TokenKind::Name)
}
}
fn number(&mut self) -> Token {
while NUMBER_CHARS.contains(self.current()) {
self.advance();
}
self.make_token(TokenKind::Number)
}
fn string(&mut self) -> Result<Token> {
let terminator = self.current();
self.advance();
while self.current() != terminator && !self.is_eof() {
if self.current() == '\\' {
self.advance();
if STRING_ESCAPES.contains(self.current()) {
self.advance();
} else {
return Err(self.error(format!("unknown string escape {:?}", self.current())));
}
} else {
self.advance();
}
}
if self.current() == terminator {
self.advance();
Ok(self.make_token(TokenKind::String))
} else {
Err(self.error("unterminated string"))
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Parser
////////////////////////////////////////////////////////////////////////////////
macro_rules! mat {
($self:expr, $($op:expr),+ $(,)?) => {
$($self.mat($op)?)||+
};
}
macro_rules! expect {
($self:expr, $message:expr, $($kind:expr),+ $(,)?) => {{
if mat!($self, $($kind),+) {
Ok($self.prev.clone().unwrap())
} else {
Err($self.error($message))
}
}};
}
macro_rules! bin_expr {
($name:ident, $next:ident, $($op:expr),+ $(,)?) => {
fn $name(&mut self) -> Result<ExprP> {
let mut expr = self.$next()?;
while $(self.mat($op)?)||+ {
let op = self.prev.clone().unwrap();
let rhs = self.$next()?;
expr = Box::new(BinaryExpr {lhs: expr, op, rhs});
}
Ok(expr)
}
};
}
pub struct Parser {
lexer: Lexer,
prev: Option<Token>,
current: Token,
next: Token,
was_error: bool,
}
impl Parser {
pub fn new(text: String, path: &dyn AsRef<Path>) -> Result<Self> {
let mut lexer = Lexer::new(text, path);
let prev = None;
let current = lexer.next()?;
let next = lexer.next()?;
Ok(Self {
lexer,
prev,
current,
next,
was_error: false,
})
}
pub fn parse_all(&mut self) -> Result<Vec<StmtP>> {
let mut stmts = Vec::new();
while !self.is_eof() {
if let Some(s) = self.stmt()? {
stmts.push(s);
}
}
Ok(stmts)
}
//
// Properties
//
fn line(&self) -> usize {
self.lexer.line
}
fn path(&self) -> &Path {
&self.lexer.path
}
fn is_eof(&self) -> bool {
self.lexer.is_eof()
}
pub fn was_error(&self) -> bool {
self.was_error || self.lexer.was_error()
}
//
// Parser primitives
//
fn advance(&mut self) -> Result<()> {
self.prev = Some(self.current.clone());
self.current = self.next.clone();
self.next = self.lexer.next()?;
Ok(())
}
fn check(&self, what: TokenKind) -> bool {
self.current.kind == what
}
fn mat(&mut self, what: TokenKind) -> Result<bool> {
if self.check(what) {
self.advance()?;
Ok(true)
} else {
Ok(false)
}
}
fn expect(&mut self, message: impl Display, what: TokenKind) -> Result<&Token> {
if self.mat(what)? {
Ok(self.prev.as_ref().unwrap())
} else {
Err(self.error(format!(
"{message} (NOTE: got {:?} {:?})",
self.current.kind, self.current.text
)))
}
}
fn error(&mut self, message: impl ToString) -> ParseError {
self.was_error = true;
ParseError {
message: message.to_string(),
line: self.line(),
path: self.path().into(),
}
}
fn synchronize(&mut self) -> Result<()> {
while !self.is_eof() {
match self.current.kind {
TokenKind::Return | TokenKind::If | TokenKind::LBrace => {
break;
}
_ => self.advance()?,
}
}
Ok(())
}
//
// Statements
//
fn stmt(&mut self) -> Result<Option<StmtP>> {
// skip past end-lines to get to the good stuff
while self.mat(TokenKind::Eol)? {
continue;
}
// nothing left after EOLs
if self.is_eof() {
return Ok(None);
}
match self.stmt_wrapped() {
Ok(result) => Ok(Some(result)),
Err(e) => {
eprintln!("{}", e);
self.synchronize()?;
Ok(None)
}
}
}
fn stmt_wrapped(&mut self) -> Result<StmtP> {
if self.mat(TokenKind::Return)? {
self.return_stmt()
} else if self.mat(TokenKind::If)? {
self.if_stmt()
} else if self.mat(TokenKind::LBrace)? {
let lbrace = self.prev.clone().unwrap();
let stmts = self.block()?;
let rbrace = self.prev.clone().unwrap();
Ok(Box::new(BlockStmt {
lbrace,
stmts,
rbrace,
}) as Box<dyn Stmt + 'static>)
} else if self.current.kind == TokenKind::Name && self.next.kind == TokenKind::Eq {
self.assign_stmt()
} else {
let expr = self.expr()?;
let stmt: StmtP;
if expr.as_any_ref().downcast_ref::<GetExpr>().is_some() && self.mat(TokenKind::Eq)? {
let expr = expr.as_any().downcast::<GetExpr>().unwrap();
let rhs = self.expr()?;
// unpack the GetExpr and turn it into a SetExpr instead
stmt = Box::new(SetStmt {
expr: expr.expr,
name: expr.name,
rhs,
});
} else {
stmt = Box::new(ExprStmt { expr });
}
expect!(
self,
"expect end of line after expression",
TokenKind::Eol,
TokenKind::Eof,
)?;
Ok(stmt)
}
}
fn return_stmt(&mut self) -> Result<StmtP> {
let return_kw = self.prev.clone().unwrap();
let mut expr = None;
if !self.check(TokenKind::Eol) && !self.check(TokenKind::RBrace) {
expr = Some(self.expr()?);
}
if !self.check(TokenKind::RBrace) {
expect!(
self,
"expected end of line after return statement",
TokenKind::Eol,
TokenKind::Eof,
)?;
}
Ok(Box::new(ReturnStmt { return_kw, expr }))
}
fn if_stmt(&mut self) -> Result<StmtP> {
let if_kw = self.prev.clone().unwrap();
let condition = self.expr()?;
self.expect("expect '{' after 'if' condition", TokenKind::LBrace)?;
let then_branch = self.block_stmt()?;
let mut else_branch = Vec::new();
if self.mat(TokenKind::Else)? {
if self.mat(TokenKind::If)? {
else_branch.push(self.if_stmt()?);
} else {
self.expect("expect '{' after else statement", TokenKind::LBrace)?;
else_branch = self.block()?;
}
}
Ok(Box::new(IfStmt {
if_kw,
condition,
then_branch,
else_branch,
}))
}
fn block_stmt(&mut self) -> Result<BlockStmt> {
let lbrace = self.prev.clone().unwrap();
assert_eq!(lbrace.kind, TokenKind::LBrace);
let stmts = self.block()?;
let rbrace = self.prev.clone().unwrap();
assert_eq!(rbrace.kind, TokenKind::RBrace);
Ok(BlockStmt {
lbrace,
stmts,
rbrace,
})
}
fn block(&mut self) -> Result<Vec<StmtP>> {
let mut stmts = Vec::new();
// the stmt rule is skipping past EOLs too. however if there's nothing *except* for EOLs
// remaining for the rest of the block, we want to know about that head of time rather than
// let the statement rule handle it.
// so we handle a bunch of EOLs right here and now.
while self.mat(TokenKind::Eol)? {
continue;
}
while !self.check(TokenKind::RBrace) && !self.is_eof() {
let s = self.stmt()?;
if let Some(s) = s {
stmts.push(s);
} else {
break;
}
while self.mat(TokenKind::Eol)? {
continue;
}
}
self.expect("expect '}' after statement block", TokenKind::RBrace)?;
Ok(stmts)
}
fn assign_stmt(&mut self) -> Result<StmtP> {
let name = self
.expect("expect name for assign statement", TokenKind::Name)?
.clone();
self.expect("expect '=' after name", TokenKind::Eq)?;
let expr = self.expr()?;
if !self.check(TokenKind::RBrace) {
expect!(
self,
"expected end of line after assign statement",
TokenKind::Eol,
TokenKind::Eof
)?;
}
Ok(Box::new(AssignStmt {
lhs: name,
rhs: expr,
}))
}
//
// Expressions
//
fn expr(&mut self) -> Result<ExprP> {
self.logical_or_expr()
}
bin_expr!(logical_or_expr, logical_and_expr, TokenKind::Or);
bin_expr!(logical_and_expr, equality_expr, TokenKind::And);
bin_expr!(
equality_expr,
compare_expr,
TokenKind::BangEq,
TokenKind::EqEq
);
bin_expr!(
compare_expr,
binary_term,
TokenKind::Less,
TokenKind::LessEq,
TokenKind::Greater,
TokenKind::GreaterEq
);
bin_expr!(
binary_term,
binary_factor,
TokenKind::Plus,
TokenKind::Minus
);
bin_expr!(binary_factor, unary_expr, TokenKind::Star, TokenKind::Slash);
fn unary_expr(&mut self) -> Result<ExprP> {
if mat!(self, TokenKind::Bang, TokenKind::Minus, TokenKind::Plus) {
let op = self.prev.clone().unwrap();
let expr = self.unary_expr()?;
Ok(Box::new(UnaryExpr { op, expr }))
} else {
self.call_expr()
}
}
fn call_expr(&mut self) -> Result<ExprP> {
let mut expr = self.primary_expr()?;
loop {
if self.mat(TokenKind::LParen)? {
expr = self.finish_call_expr(expr)?;
} else if self.mat(TokenKind::Dot)? {
let name = self
.expect("expect name after '.'", TokenKind::Name)?
.clone();
expr = Box::new(GetExpr { expr, name });
} else {
break;
}
}
Ok(expr)
}
fn finish_call_expr(&mut self, callee: ExprP) -> Result<ExprP> {
let mut args = Vec::new();
if !self.check(TokenKind::RParen) {
args.push(self.expr()?);
while self.mat(TokenKind::Comma)? {
// this allows a trailing comma
if self.check(TokenKind::RParen) {
break;
}
args.push(self.expr()?);
}
}
let rparen = self
.expect("expect ')' after function arguments", TokenKind::RParen)?
.clone();
Ok(Box::new(CallExpr {
expr: callee,
args,
rparen,
}))
}
fn primary_expr(&mut self) -> Result<ExprP> {
if mat!(
self,
TokenKind::Name,
TokenKind::Number,
TokenKind::String,
TokenKind::True,
TokenKind::False,
TokenKind::Nil
) {
Ok(Box::new(PrimaryExpr {
token: self.prev.clone().unwrap(),
}))
} else if self.mat(TokenKind::LParen)? {
let expr: ExprP;
// check if we're defining a function
if self.check(TokenKind::RParen) {
expr = self.finish_function_expr()?;
} else if self.current.kind == TokenKind::Name
&& (self.next.kind == TokenKind::RParen
|| self.next.kind == TokenKind::Colon
|| self.next.kind == TokenKind::Comma)
{
expr = self.finish_function_expr()?;
} else {
expr = self.expr()?;
self.expect("expect ')' after expression", TokenKind::RParen)?;
}
Ok(expr)
} else {
Err(self.error(format!("unexpected token {:?}", self.current.kind)))
}
}
fn finish_function_expr(&mut self) -> Result<ExprP> {
let lparen = self.prev.clone().unwrap();
let mut params = Vec::new();
if !self.check(TokenKind::RParen) {
self.parse_param(&mut params)?;
while self.mat(TokenKind::Comma)? {
if self.check(TokenKind::RParen) {
break;
}
self.parse_param(&mut params)?;
}
}
self.expect(
"expect ')' after function definition parameters",
TokenKind::RParen,
)?;
let mut return_type = None;
if self.mat(TokenKind::Arrow)? {
return_type = Some(self.expr()?);
}
self.expect("expect '{' after function signature", TokenKind::LBrace)?;
let body = self.block()?;
let rbrace = self.prev.clone().unwrap();
Ok(Box::new(FunctionExpr {
lparen,
params,
return_type,
body,
rbrace,
}))
}
fn parse_param(&mut self, params: &mut Vec<(Token, Option<ExprP>)>) -> Result<()> {
let name = self
.expect("expect name after function declaration", TokenKind::Name)?
.clone();
let mut ty = None;
if self.mat(TokenKind::Colon)? {
ty = Some(self.expr()?);
}
params.push((name, ty));
Ok(())
}
}

67
src/token.rs Normal file
View File

@@ -0,0 +1,67 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TokenKind {
// Keywords
Return,
If,
Else,
True,
False,
Nil,
// Expressions
Name,
Number,
String,
// Binary operators
Plus,
Minus,
Star,
Slash,
// Unary operators (not already covered)
Bang,
// Boolean operators
And,
Or,
// Comparison
BangEq,
EqEq,
Greater,
GreaterEq,
Less,
LessEq,
// Braces, parens, etc
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
// Assignment
Eq,
// Dot, comma
Dot,
Comma,
Arrow,
Colon,
// Line end
Eol,
// File end
Eof,
}
#[derive(Debug, Clone)]
pub struct Token {
pub line: usize,
//pub index: usize,
pub text: String,
pub kind: TokenKind,
}

350
src/vm.rs Normal file
View File

@@ -0,0 +1,350 @@
use std::sync::Arc;
use crate::builtins;
use crate::obj::*;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Op {
// Stack functions
Pop,
PushConstant(LongOpArg),
// Variables
GetLocal(LocalIndex),
SetLocal(LocalIndex),
GetGlobal(GlobalId),
SetGlobal(GlobalId),
// Attributes
GetAttr(ConstantId),
SetAttr(ConstantId),
// Jumps
Jump(JumpOpArg),
JumpFalse(JumpOpArg),
JumpTrue(JumpOpArg),
// Functions
Call(Argc),
Return,
CloseOver { depth: ShortOpArg, slot: ShortOpArg },
// VM control
Halt,
}
pub type LineRange = (usize, usize);
type ShortOpArg = u16;
type LongOpArg = u32;
pub type JumpOpArg = i32;
pub type LocalIndex = LongOpArg;
pub type LocalSlot = ShortOpArg;
pub type ConstantId = LongOpArg;
pub type GlobalId = LongOpArg;
pub type Argc = LongOpArg;
pub type FrameDepth = ShortOpArg;
#[derive(Debug, Clone)]
pub struct Local {
pub(crate) slot: LocalSlot,
pub(crate) index: LocalIndex,
pub(crate) name: String,
}
#[derive(Debug, Default, Clone)]
pub struct Chunk {
pub(crate) code: Vec<Op>,
pub(crate) lines: Vec<LineRange>,
pub(crate) locals: Vec<Local>,
}
#[derive(Debug)]
pub struct Frame {
pub(crate) name: Arc<String>,
pub(crate) chunk: Arc<Chunk>,
pub(crate) ip: usize,
pub(crate) stack_base: usize,
}
impl Frame {
pub fn new(name: Arc<String>, chunk: Arc<Chunk>, stack_base: usize) -> Self {
Self {
name,
chunk,
ip: 0,
stack_base,
}
}
}
pub struct Vm {
constants: Vec<ObjP>,
//global_names: Vec<String>,
globals: Vec<ObjP>,
stack: Vec<ObjP>,
frames: Vec<Frame>,
}
impl Vm {
/// Create a new virtual machine with the given chunk, constants, and global names.
pub fn new(chunk: Arc<Chunk>, constants: Vec<ObjP>, global_names: Vec<String>) -> Self {
// set up globals
let nil = NilInst::create();
let mut globals: Vec<_> = global_names
.iter()
.map(|_| Ptr::clone(&nil) as ObjP)
.collect();
let mut register_global = |name: &str, value: ObjP| {
let index = global_names
.iter()
.position(|global| global == name)
.expect("could not find global");
globals[index] = value;
};
register_global(
"print",
BuiltinFunctionInst::create("print".to_string(), builtins::print, 1),
);
register_global(
"println",
BuiltinFunctionInst::create("println".to_string(), builtins::println, 1),
);
// stack and frames
let stack = Vec::new();
let frames = vec![Frame::new("__main__".to_string().into(), chunk, 0)];
Vm {
constants,
//global_names,
globals,
stack,
frames,
}
}
/// Get the stack.
pub fn stack(&self) -> &Vec<ObjP> {
&self.stack
}
/// Current stack frame.
pub fn frame(&self) -> &Frame {
self.frames.last().unwrap()
}
/// Current stack frame, mutably.
pub fn frame_mut(&mut self) -> &mut Frame {
self.frames.last_mut().unwrap()
}
/// Push a new stack frame.
pub fn push_frame(&mut self, frame: Frame) {
self.frames.push(frame);
}
/// Pop the current stack frame.
pub fn pop_frame(&mut self) -> Frame {
self.frames.pop().expect("no frame")
}
/// Gets the chunk of the currently executing frame.
pub fn chunk(&self) -> &Chunk {
&self.frame().chunk
}
/// Instruction pointer of the current frame.
pub fn ip(&self) -> usize {
self.frame().ip
}
/// Update the current instruction pointer.
pub fn set_ip(&mut self, ip: usize) {
self.frame_mut().ip = ip;
}
/*
/// Gets the line of the current instruction.
fn line(&self, offset: isize) -> LineRange {
let index = (((self.ip() as isize) + offset).max(0) as usize).min(self.chunk().lines.len());
self.chunk().lines[index]
}
*/
/// Get the current instruction and advance the IP.
fn next(&mut self) -> Op {
let ip = self.ip();
self.set_ip(ip + 1);
self.chunk().code[ip]
}
/// Pop a value from the stack.
pub fn pop(&mut self) -> ObjP {
self.stack.pop().expect("stack empty")
}
/// Peek the top value of the stack.
pub fn peek(&self) -> ObjP {
self.stack.last().map(Ptr::clone).expect("stack empty")
}
/// Push a value to the stack.
pub fn push(&mut self, value: ObjP) {
self.stack.push(value);
}
pub fn run(&mut self) {
loop {
match self.next() {
Op::Pop => {
self.pop();
}
Op::PushConstant(constant_id) => {
let constant = Ptr::clone(&self.constants[constant_id as usize]);
self.push(constant);
}
Op::GetLocal(local_index) => {
let local = &self.chunk().locals[local_index as usize];
let value =
Ptr::clone(&self.stack[self.frame().stack_base + local.slot as usize]);
self.push(value);
}
Op::SetLocal(local_index) => {
let value = self.pop();
let local = &self.chunk().locals[local_index as usize];
let index = self.frame().stack_base + local.slot as usize;
self.stack[index] = value;
}
Op::GetGlobal(global_index) => {
let value = Ptr::clone(&self.globals[global_index as usize]);
self.push(value);
}
Op::SetGlobal(global_index) => {
let value = self.pop();
self.globals[global_index as usize] = value;
}
Op::GetAttr(constant_id) => {
// need both declarations to borrow cell value
let name_obj = Ptr::clone(&self.constants[constant_id as usize]);
let name =
with_obj_downcast(name_obj, |name: &StrInst| Arc::clone(&name.str_value()));
let owner = self.pop();
let value = owner.try_read().unwrap().get_attr(&name);
if let Some(value) = value {
self.push(value);
} else {
// TODO Vm::run, Op::GetAttr - throw an exception when the attribute
// doesn't exist
// BLOCKED-ON: exceptions
todo!(
"throw an error because we couldn't read attr '{}' on '{}'",
name,
owner.try_read().unwrap(),
);
}
}
Op::SetAttr(constant_id) => {
let name_obj = Ptr::clone(&self.constants[constant_id as usize]);
let name =
with_obj_downcast(name_obj, |name: &StrInst| Arc::clone(&name.str_value()));
let value = self.pop();
let target = self.pop();
let mut target_ptr = target.try_write().unwrap();
target_ptr.set_attr(&name, value);
}
Op::Jump(offset) => {
let base = (self.ip() - 1) as JumpOpArg;
assert!(base + offset > 0, "tried to jump to negative IP");
self.set_ip((base + offset) as usize);
}
Op::JumpFalse(offset) => {
let base = (self.ip() - 1) as JumpOpArg;
let value = self.peek();
if !value.try_read().unwrap().is_truthy() {
self.set_ip((base + offset) as usize);
}
}
Op::JumpTrue(offset) => {
let base = (self.ip() - 1) as JumpOpArg;
let value = self.peek();
if value.try_read().unwrap().is_truthy() {
self.set_ip((base + offset) as usize);
}
}
Op::Call(argc) => {
let argc = argc as usize;
let index = self.stack.len() - argc - 1;
let fun_ptr = Ptr::clone(&self.stack[index]);
let fun_ptr = fun_ptr.try_read().unwrap();
let arity = if let Some(arity) = fun_ptr.arity() {
arity as usize
} else {
// TODO Vm::run, Op::Call - throw an exception when the value isn't
// callable
// BLOCKED-ON: exceptions
todo!("throw an error because we couldn't call {}", fun_ptr);
};
// Methods with bound "self" parameter
// argc may be mutated
let mut argc = argc;
if let Some(method) = fun_ptr.as_any().downcast_ref::<MethodInst>() {
// shift all of the arguments over by one
// (duplicate the last item on the stack and then shift everyone else over)
self.stack
.insert(self.stack.len() - argc, Ptr::clone(method.self_binding()));
// also increment argc since we're specifying another arg
argc += 1;
}
// remove mutability
let argc = argc;
if arity != argc {
// TODO Vm::run, Op::Call - throw an exception when the number of arguments
// does not match the function's arity
// BLOCKED-ON: exceptions
todo!(
"throw an error because we passed the wrong number of arguments to {}",
fun_ptr
);
}
fun_ptr.call(self, argc as Argc);
}
Op::Return => {
let return_value = self.pop();
let old_frame = self.frames.pop().unwrap();
// stack_base is always going to be <= current stack size
self.stack
.resize_with(old_frame.stack_base, || unreachable!());
// also pop the function object off of the stack
self.stack.pop();
self.push(return_value);
}
Op::CloseOver { depth, slot } => {
// since we're closing over a value, and functions ultimately come from
// constants, we want to deep-clone this object so we don't alter any live
// objects.
// there is some room for optimization here so we aren't cloning the entire
// UserFunctionInst for every individual capture in a function.
let fun_ptr = self.pop();
let mut fun: UserFunctionInst =
with_obj_downcast(fun_ptr, UserFunctionInst::clone);
let frame_index = self.frames.len() - (depth as usize) - 1;
let stack_base = self.frames[frame_index].stack_base;
let value = Ptr::clone(&self.stack[stack_base + (slot as usize)]);
fun.push_capture(value);
self.push(make_ptr(fun));
}
Op::Halt => {
break;
}
}
}
}
}