Revamp object system, start using gc crate

Wow, what a ride. I think everything should be working now. In short:

* Objects use the `gc` crate, which as a `Gc` garbage-collected pointer
  type. I may choose to implement my own in contiguous memory in the
  future. We will see.
* The type system is no longer global. This is a bit of a burden,
  because now, whenever you want to create a new object, you need to
  pass its type object into the `Obj::instantiate` method, as well as
  its `::create` static method.
* This burden is somewhat alleviated by the `ObjFactory` trait, which
  helps create new objects as long as you have access to a `builtins`
  hashmap. So something that would normally look like this:

    fn init_builtins(builtins: &mut HashMap<String, ObjP>) {
        let print_builtin = upcast_obj(BuiltinFunctionInst::create(
            ObjP::clone(&builtins.get("BuiltinFunction").unwrap()),
            "print",
            print,
            1
        );
        builtins.insert("print".to_string(), print_builtin)
        // other builtins inserted here...
    }

  now looks like this:

    fn init_builtins(builtins: &mut HashMap<String, ObjP>) {
        let print_builtin = builtins.create_builtin_function("print", print, 1);
        builtins.insert("print".to_string(), print_builtin);
    }

(turns out, if all you need is a HashMap<String, ObjP>, you can
implement ObjFactory for HashMap<String, ObjP> itself(!))

Overall, I'm happier with this design, and I think this is what is going
to get merged. It's a little weird to be querying type names that are
used in the language itself to get those type objects, but whatever
works, I guess.

Next up is vtables.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2024-09-23 18:12:32 -07:00
parent 24b06851c7
commit 8b931e9d12
8 changed files with 502 additions and 356 deletions

55
Cargo.lock generated
View File

@@ -88,7 +88,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
"syn 2.0.77",
]
[[package]]
@@ -109,6 +109,27 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3f6d59c71e7dc3af60f0af9db32364d96a16e9310f3f5db2b55ed642162dd35"
[[package]]
name = "gc"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f73a03797d58caede765f4b19522d1a63d737088bbef81de06d2dd117d7bc8c3"
dependencies = [
"gc_derive",
]
[[package]]
name = "gc_derive"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb6f1e0d69658b2806f60864bad1a80a9634fc0bc17f7ed55828e3b9c45bf61e"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"synstructure",
]
[[package]]
name = "heck"
version = "0.5.0"
@@ -128,6 +149,7 @@ dependencies = [
"assert_matches",
"clap",
"common_macros",
"gc",
"thiserror",
]
@@ -155,6 +177,17 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.77"
@@ -166,6 +199,18 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "synstructure"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"unicode-xid",
]
[[package]]
name = "thiserror"
version = "1.0.63"
@@ -183,7 +228,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.77",
]
[[package]]
@@ -192,6 +237,12 @@ version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "unicode-xid"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "utf8parse"
version = "0.2.2"

View File

@@ -8,3 +8,4 @@ assert_matches = "1.5.0"
clap = { version = "4.5.8", features = ["derive"] }
common_macros = "0.1.1"
thiserror = "1.0.63"
gc = { version = "0.5", features = ["derive"] }

View File

@@ -1,13 +1,34 @@
//! Builtin functions.
use crate::obj::{NilInst, ObjP};
use std::collections::HashMap;
use crate::obj::*;
use crate::vm::Vm;
pub(crate) fn println(_vm: &mut Vm, args: Vec<ObjP>) -> ObjP {
println!("{}", args[0]);
NilInst::create()
pub(crate) fn println(vm: &mut Vm, args: Vec<ObjP>) -> ObjP {
println!("{}", args[0].borrow());
vm.create_nil()
}
pub(crate) fn print(_vm: &mut Vm, args: Vec<ObjP>) -> ObjP {
print!("{}", args[0]);
NilInst::create()
pub(crate) fn print(vm: &mut Vm, args: Vec<ObjP>) -> ObjP {
print!("{}", args[0].borrow());
vm.create_nil()
}
pub fn init_builtins(builtins: &mut HashMap<String, ObjP>) {
macro_rules! builtins {
($($builtin:ident / $argc:expr),* $(,)?) => {
$({
let builtin_function = builtins.create_builtin_function(stringify!($builtin), $builtin, $argc);
builtins.insert(
stringify!($builtin).to_string(),
builtin_function
);
})*
}
}
builtins! {
print/1,
println/1,
}
}

View File

@@ -1,6 +1,7 @@
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Display};
use std::sync::{Arc, LazyLock};
use std::rc::Rc;
use std::sync::LazyLock;
use assert_matches::assert_matches;
use common_macros::hash_map;
@@ -405,25 +406,25 @@ impl Display for CompileError {
////////////////////////////////////////////////////////////////////////////////
#[derive(Debug)]
pub struct Compiler {
pub struct Compiler<'b> {
chunks: Vec<Chunk>,
scopes: Vec<Scope>,
constants: Vec<ObjP>,
globals: Vec<String>,
builtins: &'b HashMap<String, ObjP>,
}
impl Default for Compiler {
fn default() -> Self {
impl<'b> Compiler<'b> {
pub fn new(builtins: &'b HashMap<String, ObjP>) -> Self {
Compiler {
chunks: Default::default(),
scopes: Default::default(),
constants: Default::default(),
globals: vec!["print".to_string(), "println".to_string()],
globals: builtins.keys().map(ToString::to_string).collect(),
builtins,
}
}
}
impl Compiler {
fn chunk(&self) -> &Chunk {
self.chunks.last().expect("no chunk")
}
@@ -478,7 +479,7 @@ impl Compiler {
// simple interning - try to find a constant that is exactly equal to this one and just
// return its value instead
for (index, interned) in self.constants.iter().enumerate() {
if constant.equals(interned.as_ref()) {
if constant.borrow().equals(&*interned.borrow()) {
return Ok(index as ConstantId);
}
}
@@ -491,6 +492,8 @@ impl Compiler {
}
.into());
}
// convert this to a pointer, upcast, and then re-GC
self.constants.push(constant);
Ok(index as ConstantId)
}
@@ -629,7 +632,7 @@ impl Compiler {
}
}
impl StmtVisitor for Compiler {
impl StmtVisitor for Compiler<'_> {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
self.compile_expr(&stmt.expr)?;
self.emit(stmt_line_number(stmt), Op::Pop);
@@ -661,9 +664,13 @@ impl StmtVisitor for Compiler {
// TODO - maybe this would be smarter to set up in the AST. I'm 99% sure that the last
// object created, if it were a function object, will be what we're assigning it to, but I
// want to be 100% sure instead of 99%.
let obj = Arc::get_mut(self.constants.last_mut().unwrap()).unwrap();
if let Some(fun) = obj.as_any_mut().downcast_mut::<UserFunctionInst>() {
fun.set_name(Arc::new(name.to_string()));
let obj = self.constants.last().unwrap().as_ref();
if let Some(fun) = obj
.borrow_mut()
.as_any_mut()
.downcast_mut::<UserFunctionInst>()
{
fun.set_name(Rc::new(name.to_string()));
}
Ok(())
@@ -671,7 +678,7 @@ impl StmtVisitor for Compiler {
fn visit_set_stmt(&mut self, stmt: &SetStmt) -> Result<()> {
self.compile_expr(&stmt.expr)?;
let name = self.insert_constant(StrInst::create(&stmt.name.text))?;
let name = self.insert_constant(self.create_str(&stmt.name.text))?;
self.compile_expr(&stmt.rhs)?;
self.emit(stmt_line_number(stmt), Op::SetAttr(name));
Ok(())
@@ -690,7 +697,7 @@ impl StmtVisitor for Compiler {
if let Some(expr) = &stmt.expr {
self.compile_expr(expr)?;
} else {
let nil = self.insert_constant(NilInst::create())?;
let nil = self.insert_constant(self.create_nil())?;
self.emit(stmt_line_number(stmt), Op::PushConstant(nil));
}
Ok(())
@@ -699,7 +706,7 @@ impl StmtVisitor for Compiler {
// condition
self.compile_expr(&stmt.condition)?;
// call obj.__bool__()
let bool_attr = self.insert_constant(StrInst::create("__bool__"))?;
let bool_attr = self.insert_constant(self.create_str("__bool__"))?;
self.emit(expr_line_number(&*stmt.condition), Op::GetAttr(bool_attr));
self.emit(expr_line_number(&*stmt.condition), Op::Call(0));
let condition_patch_index = self.chunk().code.len();
@@ -746,7 +753,7 @@ impl StmtVisitor for Compiler {
}
}
impl ExprVisitor for Compiler {
impl ExprVisitor for Compiler<'_> {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> {
static OP_NAMES: LazyLock<HashMap<TokenKind, &'static str>> = LazyLock::new(|| {
hash_map! {
@@ -771,7 +778,7 @@ impl ExprVisitor for Compiler {
let mut exit_patch_index = 0;
if let TokenKind::And | TokenKind::Or = expr.op.kind {
let constant_id = self.insert_constant(StrInst::create("__bool__"))?;
let constant_id = self.insert_constant(self.create_str("__bool__"))?;
self.emit(expr_line_number(&*expr.lhs), Op::GetAttr(constant_id));
self.emit(expr_line_number(&*expr.lhs), Op::Call(0));
exit_patch_index = self.chunk().code.len();
@@ -785,12 +792,12 @@ impl ExprVisitor for Compiler {
let name = OP_NAMES
.get(&expr.op.kind)
.expect("invalid binary operator");
let constant_id = self.insert_constant(StrInst::create(name))?;
let constant_id = self.insert_constant(self.create_str(name))?;
self.emit(expr_line_number(expr), Op::GetAttr(constant_id));
// convert RHS to a bool if we're doing AND or OR
if let TokenKind::And | TokenKind::Or = expr.op.kind {
let constant_id = self.insert_constant(StrInst::create("__bool__"))?;
let constant_id = self.insert_constant(self.create_str("__bool__"))?;
self.emit(expr_line_number(&*expr.rhs), Op::GetAttr(constant_id));
self.emit(expr_line_number(&*expr.rhs), Op::Call(0));
}
@@ -828,7 +835,7 @@ impl ExprVisitor for Compiler {
});
self.compile_expr(&expr.expr)?;
let name = OP_NAMES.get(&expr.op.kind).expect("invalid unary operator");
let constant_id = self.insert_constant(StrInst::create(name))?;
let constant_id = self.insert_constant(self.create_str(name))?;
self.emit(expr_line_number(expr), Op::GetAttr(constant_id));
self.emit(expr_line_number(expr), Op::Call(0));
Ok(())
@@ -852,7 +859,7 @@ impl ExprVisitor for Compiler {
fn visit_get_expr(&mut self, expr: &GetExpr) -> Result<()> {
self.compile_expr(&expr.expr)?;
let constant_id = self.insert_constant(StrInst::create(&expr.name.text))?;
let constant_id = self.insert_constant(self.create_str(&expr.name.text))?;
self.emit(expr_line_number(expr), Op::GetAttr(constant_id));
Ok(())
}
@@ -878,25 +885,25 @@ impl ExprVisitor for Compiler {
}
TokenKind::Number => {
let obj = if expr.token.text.contains('.') {
FloatInst::create(expr.token.text.parse().unwrap()) as ObjP
self.create_float(expr.token.text.parse().unwrap())
} else {
IntInst::create(expr.token.text.parse().unwrap()) as ObjP
self.create_int(expr.token.text.parse().unwrap())
};
let constant_id = self.insert_constant(obj)?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
TokenKind::String => {
let constant_id =
self.insert_constant(StrInst::create(unescape(&expr.token.text)))?;
self.insert_constant(self.create_str(unescape(&expr.token.text)))?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
TokenKind::True | TokenKind::False => {
let constant_id =
self.insert_constant(BoolInst::create(expr.token.kind == TokenKind::True))?;
self.insert_constant(self.create_bool(expr.token.kind == TokenKind::True))?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
TokenKind::Nil => {
let constant_id = self.insert_constant(NilInst::create())?;
let constant_id = self.insert_constant(self.create_nil())?;
self.emit(expr_line_number(expr), Op::PushConstant(constant_id));
}
_ => unreachable!(),
@@ -954,7 +961,7 @@ impl ExprVisitor for Compiler {
}
// always end with a "return nil"
let nil = self.insert_constant(NilInst::create())?;
let nil = self.insert_constant(self.create_nil())?;
self.emit(end_line, Op::PushConstant(nil));
self.emit(end_line, Op::Return);
@@ -962,7 +969,7 @@ impl ExprVisitor for Compiler {
// create the function
let chunk = self.chunks.pop().unwrap();
let fun = UserFunctionInst::create(chunk, expr.params.len() as Argc);
let fun = self.create_user_function(chunk, expr.params.len() as Argc);
// register the function as a constant
let fun_constant = self.insert_constant(fun)?;
@@ -982,3 +989,9 @@ impl ExprVisitor for Compiler {
Ok(())
}
}
impl ObjFactory for Compiler<'_> {
fn builtins(&self) -> &HashMap<String, ObjP> {
&self.builtins
}
}

View File

@@ -33,7 +33,7 @@ fn disassemble_chunk(chunk: &Chunk, constants: &Vec<ObjP>, globals: &Vec<String>
}
Op::PushConstant(constant_id) => {
op_str = "PUSH_CONSTANT";
arg = format!("{}", &constants[*constant_id as usize]);
arg = format!("{}", &constants[*constant_id as usize].borrow());
info = format!("(constant ID {constant_id})");
}
Op::GetLocal(local_id) => {
@@ -60,12 +60,12 @@ fn disassemble_chunk(chunk: &Chunk, constants: &Vec<ObjP>, globals: &Vec<String>
}
Op::GetAttr(constant_id) => {
op_str = "GET_ATTR";
arg = format!("{}", &constants[*constant_id as usize]);
arg = format!("{}", &constants[*constant_id as usize].borrow());
info = format!("(constant ID {constant_id})");
}
Op::SetAttr(constant_id) => {
op_str = "SET_ATTR";
arg = format!("{}", &constants[*constant_id as usize]);
arg = format!("{}", &constants[*constant_id as usize].borrow());
info = format!("(constant ID {constant_id})");
}
Op::Jump(jump_offset) => {
@@ -145,7 +145,11 @@ pub fn disassemble(chunk: &Chunk, constants: &Vec<ObjP>, globals: &Vec<String>)
disassemble_chunk(chunk, constants, globals);
for constant in constants {
if let Some(fun) = constant.as_any().downcast_ref::<UserFunctionInst>() {
if let Some(fun) = constant
.borrow()
.as_any()
.downcast_ref::<UserFunctionInst>()
{
println!();
println!(
"== {} starting on line {}",

View File

@@ -1,6 +1,7 @@
// trait_upcasting - https://github.com/rust-lang/rust/issues/65991
// stabilization in progress
#![feature(trait_upcasting)]
#![feature(coerce_unsized)]
mod ast;
mod builtins;
@@ -11,6 +12,7 @@ mod parser;
mod token;
mod vm;
use std::collections::HashMap;
use std::fmt;
use std::fs::File;
use std::io::prelude::*;
@@ -52,10 +54,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
// initialize type system
obj::init_types();
let mut builtins = HashMap::new();
obj::init_types(&mut builtins);
crate::builtins::init_builtins(&mut builtins);
// compile
let (chunk, constants, globals) = compiler::Compiler::default().compile(&ast)?;
let (chunk, constants, globals) = compiler::Compiler::new(&builtins).compile(&ast)?;
if args.disassemble {
disassemble::disassemble(&chunk, &constants, &globals);
@@ -63,7 +67,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
// run
let mut vm = vm::Vm::new(chunk.into(), constants, globals);
let mut vm = vm::Vm::new(chunk.into(), constants, globals, builtins);
vm.run();
Ok(())

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
use std::sync::Arc;
use std::collections::HashMap;
use std::rc::Rc;
use crate::builtins;
use crate::obj::*;
#[derive(Debug, Clone, Copy, PartialEq)]
@@ -62,14 +62,14 @@ pub struct Chunk {
#[derive(Debug)]
pub struct Frame {
pub(crate) name: Arc<String>,
pub(crate) chunk: Arc<Chunk>,
pub(crate) name: Rc<String>,
pub(crate) chunk: Rc<Chunk>,
pub(crate) ip: usize,
pub(crate) stack_base: usize,
}
impl Frame {
pub fn new(name: Arc<String>, chunk: Arc<Chunk>, stack_base: usize) -> Self {
pub fn new(name: Rc<String>, chunk: Rc<Chunk>, stack_base: usize) -> Self {
Self {
name,
chunk,
@@ -85,17 +85,20 @@ pub struct Vm {
globals: Vec<ObjP>,
stack: Vec<ObjP>,
frames: Vec<Frame>,
builtins: HashMap<String, ObjP>,
}
impl Vm {
/// Create a new virtual machine with the given chunk, constants, and global names.
pub fn new(chunk: Arc<Chunk>, constants: Vec<ObjP>, global_names: Vec<String>) -> Self {
pub fn new(
chunk: Rc<Chunk>,
constants: Vec<ObjP>,
global_names: Vec<String>,
builtins: HashMap<String, ObjP>,
) -> Self {
// set up globals
let nil = NilInst::create();
let mut globals: Vec<_> = global_names
.iter()
.map(|_| Ptr::clone(&nil) as ObjP)
.collect();
let nil = builtins.create_nil();
let mut globals: Vec<_> = global_names.iter().map(|_| ObjP::clone(&nil)).collect();
let mut register_global = |name: &str, value: ObjP| {
let index = global_names
@@ -105,14 +108,9 @@ impl Vm {
globals[index] = value;
};
register_global(
"print",
BuiltinFunctionInst::create("print".to_string(), builtins::print, 1),
);
register_global(
"println",
BuiltinFunctionInst::create("println".to_string(), builtins::println, 1),
);
for (name, builtin) in builtins.iter() {
register_global(&name, ObjP::clone(&builtin));
}
// stack and frames
let stack = Vec::new();
@@ -124,6 +122,7 @@ impl Vm {
globals,
stack,
frames,
builtins,
}
}
@@ -231,9 +230,9 @@ impl Vm {
// need both declarations to borrow cell value
let name_obj = Ptr::clone(&self.constants[constant_id as usize]);
let name =
with_obj_downcast(name_obj, |name: &StrInst| Arc::clone(&name.str_value()));
with_obj_downcast(name_obj, |name: &StrInst| Rc::clone(&name.str_value()));
let owner = self.pop();
let value = owner.get_attr(&name);
let value = owner.borrow().get_attr(&name);
if let Some(value) = value {
self.push(value);
} else {
@@ -243,18 +242,18 @@ impl Vm {
todo!(
"throw an error because we couldn't read attr '{}' on '{}'",
name,
owner,
owner.borrow(),
);
}
}
Op::SetAttr(constant_id) => {
let name_obj = Ptr::clone(&self.constants[constant_id as usize]);
let name =
with_obj_downcast(name_obj, |name: &StrInst| Arc::clone(&name.str_value()));
with_obj_downcast(name_obj, |name: &StrInst| Rc::clone(&name.str_value()));
let value = self.pop();
let target = self.pop();
target.set_attr(&name, value);
target.borrow_mut().set_attr(&name, value);
}
Op::Jump(offset) => {
let base = (self.ip() - 1) as JumpOpArg;
@@ -264,14 +263,14 @@ impl Vm {
Op::JumpFalse(offset) => {
let base = (self.ip() - 1) as JumpOpArg;
let value = self.peek();
if !value.is_truthy() {
if !value.borrow().is_truthy() {
self.set_ip((base + offset) as usize);
}
}
Op::JumpTrue(offset) => {
let base = (self.ip() - 1) as JumpOpArg;
let value = self.peek();
if value.is_truthy() {
if value.borrow().is_truthy() {
self.set_ip((base + offset) as usize);
}
}
@@ -280,19 +279,22 @@ impl Vm {
let index = self.stack.len() - argc - 1;
let fun_ptr = Ptr::clone(&self.stack[index]);
let arity = if let Some(arity) = fun_ptr.arity() {
let arity = if let Some(arity) = fun_ptr.borrow().arity() {
arity as usize
} else {
// TODO Vm::run, Op::Call - throw an exception when the value isn't
// callable
// BLOCKED-ON: exceptions
todo!("throw an error because we couldn't call {}", fun_ptr);
todo!(
"throw an error because we couldn't call {}",
fun_ptr.borrow()
);
};
// Methods with bound "self" parameter
// argc may be mutated
let mut argc = argc;
if let Some(method) = fun_ptr.as_any().downcast_ref::<MethodInst>() {
if let Some(method) = fun_ptr.borrow().as_any().downcast_ref::<MethodInst>() {
// shift all of the arguments over by one
// (duplicate the last item on the stack and then shift everyone else over)
self.stack
@@ -309,10 +311,10 @@ impl Vm {
// BLOCKED-ON: exceptions
todo!(
"throw an error because we passed the wrong number of arguments to {}",
fun_ptr
fun_ptr.borrow()
);
}
fun_ptr.call(self, argc as Argc);
fun_ptr.borrow().call(self, argc as Argc);
}
Op::Return => {
let return_value = self.pop();
@@ -337,7 +339,7 @@ impl Vm {
let stack_base = self.frames[frame_index].stack_base;
let value = Ptr::clone(&self.stack[stack_base + (slot as usize)]);
fun.push_capture(value);
self.push(make_ptr(fun));
self.push(upcast_obj(make_ptr(fun)));
}
Op::Halt => {
break;
@@ -346,3 +348,9 @@ impl Vm {
}
}
}
impl ObjFactory for Vm {
fn builtins(&self) -> &HashMap<String, ObjP> {
&self.builtins
}
}