From 958a6caabb1780465537e60d01ecde179f33e31e Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Sun, 27 Sep 2020 19:33:18 -0700 Subject: [PATCH] Update how scope rules work, and update implementation * Currently, scopes are only allowed to look at their locals and the globals. Inner functions cannot refer to values in their parent functions. This will change eventually. * Scope lookup is split between globals and locals. Locals are defined in a scope if they are explicitly assigned to. * i.e. `a = foo` will treat `a` as a local in the current scope if it appears anywhere in that scope. This does not extend to setattrs; `a.b = foo` will not trigger `a` into being a local var. * `Package` objects are no longer returned from the compiler - instead, a user function is returned. * Other various changes and renames Signed-off-by: Alek Ratzloff --- src/bin/not.rs | 5 ++- src/compile/mod.rs | 79 +++++++++++++++----------------------- src/compile/scope.rs | 86 +++++++++++++++++++++++++++++++++++++++++ src/compile/thunk.rs | 47 ++++++++++++++++++----- src/obj/fun.rs | 91 +++++++++++++++++++++++++++++++++++++++++--- src/obj/mod.rs | 4 +- src/obj/name.rs | 1 + src/obj/names.rs | 7 ---- src/vm/frame.rs | 11 ++++-- src/vm/inst.rs | 16 ++++++-- src/vm/mod.rs | 57 ++++++++++++++++++--------- src/vm/package.rs | 71 ---------------------------------- 12 files changed, 304 insertions(+), 171 deletions(-) create mode 100644 src/compile/scope.rs create mode 100644 src/obj/name.rs delete mode 100644 src/obj/names.rs diff --git a/src/bin/not.rs b/src/bin/not.rs index 5cb7853..9b2a9df 100644 --- a/src/bin/not.rs +++ b/src/bin/not.rs @@ -46,11 +46,12 @@ fn main() -> Result<()> { return Ok(()); } - let package = Compile::new().compile(&ast)?; + let (const_pool, main) = Compile::new().compile(&ast)?; if action == "dump" { let mut stdout = std::io::stdout(); - package.dump(&mut stdout)?; + not_python::read_obj!(let main_obj = main); + main_obj.dump(&mut stdout, &const_pool, main_obj.locals())?; return Ok(()); } diff --git a/src/compile/mod.rs b/src/compile/mod.rs index ebd4c53..be1e85f 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -1,18 +1,17 @@ pub mod basic_block; pub mod error; mod locals; +mod scope; pub mod thunk; -use crate::{syn::ast::Body, obj::prelude::*, vm::{consts::*, package::Package}}; +use crate::{syn::ast::Body, obj::prelude::*, vm::consts::*}; +use scope::*; use std::collections::HashMap; #[derive(Default)] pub struct Compile { const_data: ConstData, - globals: Locals, - scope: Vec, - names: Vec, - next_local: Name, + scope: Scope, } impl Compile { @@ -22,12 +21,24 @@ impl Compile { } /// Compiles the given AST body. - pub fn compile(mut self, body: &Body) -> error::Result { + pub fn compile(mut self, body: &Body) -> error::Result<(ConstPool, UserFunRef)> { + self.push_scope_layer(); let main = thunk::CompileBody::new(&mut self) .compile(body)? .flatten() .to_vec(); - Ok(Package::new(self.names, self.const_data.const_pool, main)) + let globals_syms: std::collections::BTreeMap<_, _> = self.pop_scope_layer().unwrap() + .into_iter() + .map(|(sym, name)| (name, sym)) + .collect(); + let globals = globals_syms.into_iter() + .enumerate() + .map(|(index, (name, sym))| { + assert_eq!(index, name.index()); + sym + }) + .collect(); + Ok((self.const_data.const_pool, UserFun::new_obj(main, globals))) } /// Gets the constant data that is interned in this compile session. @@ -55,64 +66,34 @@ impl Compile { self.const_data_mut().push(obj) } - /// Looks up a variable name. - pub fn lookup_scope(&mut self, sym: Sym) -> Option { - self.scope - .iter() - .rev() - .filter_map(|locals| locals.get(&sym)) - .next() - .or_else(|| self.globals.get(&sym)) - .copied() + /// Looks up a local variable name. + pub fn lookup_local(&mut self, sym: Sym) -> Option { + self.scope.lookup_local(sym) } - /// Looks up a variable name, or creates a global name if it doesn't exist. - pub fn lookup_scope_or_create_global(&mut self, sym: Sym) -> Name { - if let Some(local) = self.lookup_scope(sym) { - local - } else { - self.create_global(sym) - } + /// Looks up a global variable name. + pub fn lookup_global(&mut self, sym: Sym) -> Option { + self.scope.lookup_global(sym) } /// Creates a new local variable if it does not exist in the current local scope. pub(crate) fn create_local(&mut self, sym: Sym) -> Name { - let locals = self.scope.last_mut().expect("scope"); - if let Some(local) = locals.get(&sym) { - *local - } else { - // wish I could use mem::replace here, oh well - let local = self.next_local; - self.next_local = self.next_local.next(); - locals.insert(sym, local); - self.names.push(sym); - assert_eq!(self.names.len(), self.next_local.index()); - local - } + self.scope.create_local(sym) } /// Creates a new global variable if it does not exist in the current global scope. pub fn create_global(&mut self, sym: Sym) -> Name { - if let Some(global) = self.globals.get(&sym) { - *global - } else { - let global = self.next_local; - self.next_local = self.next_local.next(); - self.globals.insert(sym, global); - self.names.push(sym); - assert_eq!(self.names.len(), self.next_local.index()); - global - } + self.scope.create_global(sym) } /// Pushes an empty scope layer of local variables. - pub(crate) fn push_scope_layer(&mut self) { - self.scope.push(Default::default()); + pub fn push_scope_layer(&mut self) { + self.scope.push_layer() } /// Pops a scope layer of local variables, if any are available. - pub(crate) fn pop_scope_layer(&mut self) -> Option { - self.scope.pop() + pub fn pop_scope_layer(&mut self) -> Option { + self.scope.pop_layer() } /// Collects local variables for the given AST body non-recursively. diff --git a/src/compile/scope.rs b/src/compile/scope.rs new file mode 100644 index 0000000..0d433e9 --- /dev/null +++ b/src/compile/scope.rs @@ -0,0 +1,86 @@ +use crate::obj::prelude::*; +use std::collections::BTreeMap; + +// current TODO/problem: How should locals work? +// +// * I think we should have a global/local dichotomy in the VM. You can only look up the local +// scope in the VM; inner functions that have captures should be created on the fly and detected +// by the compiler. I.E. any lookups that are not local or global should be captured dynamically. +// * Need to differentiate between locals collected on the compilation side vs. locals on the +// object/vm side + +pub type ScopeLocalSyms = BTreeMap; +pub type ScopeLocals = Vec; + +pub struct Scope { + scope: Vec, +} + +impl Scope { + pub fn lookup_local(&self, sym: Sym) -> Option { + self.locals() + .and_then(|locals| locals.get(&sym)) + .copied() + } + + pub fn lookup_global(&self, sym: Sym) -> Option { + self.globals() + .and_then(|globals| globals.get(&sym)) + .copied() + } + + pub fn create_local(&mut self, sym: Sym) -> Name { + let locals = self.locals_mut() + .expect("locals"); + if let Some(local) = locals.get(&sym).copied() { + local + } else { + let name = Name::new(locals.len()); + locals.insert(sym, name); + name + } + } + + pub fn create_global(&mut self, sym: Sym) -> Name { + let globals = self.globals_mut() + .expect("globals"); + if let Some(global) = globals.get(&sym).copied() { + global + } else { + let name = Name::new(globals.len()); + globals.insert(sym, name); + name + } + } + + pub fn push_layer(&mut self) { + self.scope.push(Default::default()); + } + + pub fn pop_layer(&mut self) -> Option { + self.scope.pop() + } + + pub fn locals(&self) -> Option<&ScopeLocalSyms> { + self.scope.last() + } + + pub fn locals_mut(&mut self) -> Option<&mut ScopeLocalSyms> { + self.scope.last_mut() + } + + pub fn globals(&self) -> Option<&ScopeLocalSyms> { + self.scope.first() + } + + pub fn globals_mut(&mut self) -> Option<&mut ScopeLocalSyms> { + self.scope.first_mut() + } +} + +impl Default for Scope { + fn default() -> Self { + // empty global scope + Self { scope: vec![Default::default()] } + } +} diff --git a/src/compile/thunk.rs b/src/compile/thunk.rs index 175374a..cceae99 100644 --- a/src/compile/thunk.rs +++ b/src/compile/thunk.rs @@ -4,7 +4,7 @@ use crate::{ syn::{ast::*, visit::*}, vm::inst::*, }; -use std::mem; +use std::{collections::BTreeMap, mem}; /// A basic block of VM code. /// @@ -213,9 +213,7 @@ impl<'c> CompileBody<'c> { } pub fn compile(&mut self, body: &'c Body) -> Result { - self.compile.push_scope_layer(); let thunk = self.visit_body(body)?; - self.compile.pop_scope_layer(); Ok(thunk) } } @@ -266,8 +264,12 @@ impl Visit for CompileBody<'_> { } LhsExpr::Name(local_name) => { let sym = global_sym(local_name.to_string()); - let local = self.compile.lookup_scope_or_create_global(sym); - thunk = Inst::Pop(Some(local)).into(); + if let Some(local) = self.compile.lookup_local(sym) { + thunk = Inst::PopLocal(Some(local)).into(); + } else { + let global = self.compile.lookup_global(sym).expect("name expected to exist someplace(?)"); + thunk = Inst::PopGlobal(Some(global)).into(); + } } } Ok(thunk) @@ -346,6 +348,12 @@ impl Visit for CompileBody<'_> { } fn visit_fun_expr(&mut self, expr: &FunExpr) -> Self::Out { + // TODO(fun) : need captures for functions, built dynamically (or statically?) + // - static is not possible, since captures are *created* at runtime, and there's no + // instruction that will look up just one scope level - it's either locals or globals. + // - an entire "create function" instruction is probably the best way to solve it, don't + // try to be clever, just implement it like that (since I mean, python does too...) + // - push const // (functions are unique const values so a new function will be created for every literal // function defined in code) @@ -357,7 +365,23 @@ impl Visit for CompileBody<'_> { let sym = global_sym(param.to_string()); self.compile.create_local(sym); } - let locals = self.compile.pop_scope_layer().unwrap(); + // remap (Sym -> Name) to be (Name -> Sym) and make sure it's all in order. + let scope_locals: BTreeMap<_, _> = self.compile.pop_scope_layer() + .unwrap() + .into_iter() + .map(|(sym, name)| (name, sym)) + .collect(); + // this should be in numeric order since: + // 1. locals are created exactly once or looked up + // 2. scope_locals is a btreemap, keyed by names, which are in order from 0..N + let locals: FunLocals = scope_locals.into_iter() + .enumerate() + .map(|(index, (name, sym))| { + assert_eq!(index, name.index()); + sym + }) + .collect(); + let code = self.visit_body(&expr.body)? .flatten() .to_vec(); @@ -373,9 +397,14 @@ impl Visit for CompileBody<'_> { let thunk = match atom { Atom::Ident(ident) => { let sym = global_sym(ident.to_string()); - let name = self.compile.lookup_scope_or_create_global(sym); - // get local - Inst::LoadName(name).into() + if let Some(local) = self.compile.lookup_local(sym) { + // get local + Inst::LoadLocal(local).into() + } else { + // get or create global + let global = self.compile.create_global(sym); + Inst::LoadGlobal(global).into() + } } Atom::Sym(sym) => { // push symbol diff --git a/src/obj/fun.rs b/src/obj/fun.rs index 1a9d869..39f2898 100644 --- a/src/obj/fun.rs +++ b/src/obj/fun.rs @@ -1,7 +1,9 @@ -use crate::{obj::{reserved::*, prelude::*}, vm::{inst::Inst, signal::*, Vm}}; +use crate::{obj::{reserved::*, prelude::*}, vm::{consts::ConstPool, inst::Inst, signal::*, Vm}}; use once_cell::sync::Lazy; use shredder::{GcSafeWrapper, Scan}; -use std::fmt::{Debug, Formatter, self}; +use std::{fmt::{Debug, Formatter, self}, io::{self, Write}}; + +pub type FunLocals = Vec; // // struct UserFun @@ -18,11 +20,11 @@ pub struct UserFun { code: Vec, // Safe because this is just an interner that points to symbols, which aren't GC'd #[shredder(unsafe_skip)] - locals: Locals, + locals: FunLocals, } impl UserFun { - pub fn new_obj(code: Vec, locals: Locals) -> UserFunRef { + pub fn new_obj(code: Vec, locals: FunLocals) -> UserFunRef { let obj_ref = ObjRef::new(UserFun { vtable: Default::default(), // this is a placeholder for the real vtable attrs: Default::default(), @@ -47,9 +49,88 @@ impl UserFun { &self.code } - pub fn locals(&self) -> &Locals { + pub fn locals(&self) -> &FunLocals { &self.locals } + + pub fn dump(&self, writer: &mut dyn Write, const_pool: &ConstPool, globals: &FunLocals) -> io::Result<()> { + fn num_digits(n: usize, radix: usize) -> usize { + ((n as f64) + 1.0).log(radix as f64).ceil() as usize + } + + // column widths + let addr_w = num_digits(self.code().len(), 16).max(4); + let inst_col = 16 - addr_w; + let inst_w = 16; + + for (addr, inst) in self.code().iter().enumerate() { + let (param_val, mut param_name) = match inst { + Inst::PushSym(sym) | Inst::GetAttr(sym) | Inst::SetAttr(sym) => ( + sym.index().to_string(), + global_sym_lookup(*sym).unwrap().to_string(), + ), + Inst::PushConst(hdl) => ( + hdl.index().to_string(), + { + let obj_ref = const_pool.get(*hdl); + read_obj!(let obj = obj_ref); + format!("{:?}", obj) + }, + ), + Inst::LoadLocal(local) => ( + local.index().to_string(), + global_sym_lookup(self.locals()[local.index()]).unwrap().to_string(), + ), + Inst::PopLocal(local) => { + if let Some(local) = local { + let index = local.index(); + let sym = self.locals()[index]; + let name = global_sym_lookup(sym).unwrap().to_string(); + (index.to_string(), name) + } else { + ("(discarded)".to_string(), String::new()) + } + } + Inst::PopGlobal(global) => { + if let Some(global) = global{ + let index = global.index(); + let sym = globals[index]; + let name = global_sym_lookup(sym).unwrap().to_string(); + (index.to_string(), name) + } else { + ("(discarded)".to_string(), String::new()) + } + } + Inst::Jump(addr) | Inst::JumpTrue(addr) => ( + addr.to_string(), + String::new(), + ), + Inst::Call(argc) => ( + argc.to_string(), + String::new(), + ), + _ => (String::new(), String::new()), + }; + + if !param_name.is_empty() { + param_name = format!("({})", param_name); + } + + writeln!( + writer, + "{:0addr_w$x}{space: >inst_col$}{: ; -pub type Locals = BTreeMap; diff --git a/src/vm/frame.rs b/src/vm/frame.rs index cee9781..418955d 100644 --- a/src/vm/frame.rs +++ b/src/vm/frame.rs @@ -1,5 +1,6 @@ use crate::obj::prelude::*; use shredder::{GcSafe, Scan, Scanner}; +use std::collections::BTreeMap; /// A stack call frame. #[derive(Debug, Clone)] @@ -25,20 +26,20 @@ unsafe impl GcSafe for FrameKind {} #[derive(Scan, Debug, Clone)] pub struct Frame { - locals: Names, + locals: FrameLocals, kind: FrameKind, } impl Frame { - pub fn new(locals: Names, kind: FrameKind) -> Self { + pub fn new(locals: FrameLocals, kind: FrameKind) -> Self { Self { locals, kind, } } - pub fn locals(&self) -> &Names { + pub fn locals(&self) -> &FrameLocals { &self.locals } - pub fn locals_mut(&mut self) -> &mut Names { + pub fn locals_mut(&mut self) -> &mut FrameLocals { &mut self.locals } @@ -46,3 +47,5 @@ impl Frame { &self.kind } } + +pub type FrameLocals = BTreeMap; diff --git a/src/vm/inst.rs b/src/vm/inst.rs index d908ffc..a8d534d 100644 --- a/src/vm/inst.rs +++ b/src/vm/inst.rs @@ -10,10 +10,16 @@ pub enum Inst { PushConst(ConstHandle), /// Looks up and pushes a local value. - LoadName(Name), + LoadLocal(Name), + + /// Looks up and pushes a global value. + LoadGlobal(Name), /// Pop a value from the stack, possibly into a local symbol. - Pop(Option), + PopLocal(Option), + + /// Pop a value from the stack, possibly into a global symbol. + PopGlobal(Option), /// Pops a symbol value and an object reference. /// @@ -119,8 +125,10 @@ impl Inst { match self { Inst::PushSym(_) => "PUSH_SYM", Inst::PushConst(_) => "PUSH_CONST", - Inst::LoadName(_) => "LOAD_NAME", - Inst::Pop(_) => "POP", + Inst::LoadLocal(_) => "LOAD_LOCAL", + Inst::LoadGlobal(_) => "LOAD_GLOBAL", + Inst::PopLocal(_) => "POP_LOCAL", + Inst::PopGlobal(_) => "POP_GLOBAL", Inst::GetAttr(_) => "GET_ATTR", Inst::SetAttr(_) => "SET_ATTR", Inst::Jump(_) => "JUMP", diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 84af19f..c6c3130 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -143,14 +143,16 @@ impl<'p> Vm<'p> { fn begin_call(&mut self, caller: ObjRef, args: Vec) { // create stack frame let stack_frame = if let Some(user_fun) = std::any::Any::downcast_ref::(&caller) { - let names: Names = { + let names: FrameLocals = { read_obj!(let fun_ref = user_fun); fun_ref.locals() .iter() + .enumerate() .zip(args.into_iter()) - .map(|((_, v), arg)| (*v, arg.clone())) + .map(|((index, _), arg)| (index, arg.clone())) .collect() }; + // TODO : check function arity vs argument count Frame::new(names, FrameKind::User { last_pc: self.pc(), @@ -202,20 +204,31 @@ impl<'p> Vm<'p> { .clone(); self.push(obj_ref); } - Inst::LoadName(name) => { + Inst::LoadLocal(local) => { let obj_ref = self.frames() .iter() .rev() - .filter_map(|frame| frame.locals().get(&name)) + .filter_map(|frame| frame.locals().get(&local.index())) .cloned() .next(); if let Some(obj_ref) = obj_ref { self.push(obj_ref); } else { - todo!("TODO: implement \"name value not found\" lookup") + todo!("TODO: implement \"local value not found\" lookup") } } - Inst::Pop(name) => { + Inst::LoadGlobal(global) => { + let obj_ref = self.frames() + .first() + .and_then(|frame| frame.locals().get(&global.index())) + .cloned(); + if let Some(obj_ref) = obj_ref { + self.push(obj_ref); + } else { + todo!("TODO: implement \"local value not found\" lookup") + } + } + Inst::PopLocal(name) => { let tos = self.pop() .expect("stack underflow"); // pop into name @@ -224,6 +237,15 @@ impl<'p> Vm<'p> { } // else discard } + Inst::PopGlobal(name) => { + let tos = self.pop() + .expect("stack underflow"); + // pop into name + if let Some(name) = name { + self.set_global(name, tos); + } + // else discard + } Inst::GetAttr(sym) => { let obj_ref = self.pop().expect("getattr object"); read_obj!(let obj = obj_ref); @@ -281,21 +303,20 @@ impl<'p> Vm<'p> { } fn set_local(&mut self, name: Name, value: ObjRef) { - for frame in self.frames.iter_mut().rev() { - let locals = frame.locals_mut(); - if locals.contains_key(&name) { - locals.insert(name, value); - return; - } - } - unreachable!("unregistered name {:?}", name); + let frame = self.frame_mut().unwrap(); + let locals = frame.locals_mut(); + locals.insert(name.index(), value); + } + + fn set_global(&mut self, name: Name, value: ObjRef) { + let frame = self.frames.first_mut().unwrap(); + let locals = frame.locals_mut(); + locals.insert(name.index(), value); } fn get_local(&mut self, name: Name) -> Option { - self.frames.iter() - .rev() - .filter_map(|frame| frame.locals().get(&name)) - .next() + self.frame() + .and_then(|frame| frame.locals().get(&name.index())) .cloned() } } diff --git a/src/vm/package.rs b/src/vm/package.rs index 48f1a0f..584a936 100644 --- a/src/vm/package.rs +++ b/src/vm/package.rs @@ -1,6 +1,5 @@ use crate::{obj::prelude::*, vm::{consts::ConstPool, inst::Inst}}; use shredder::Scan; -use std::io::{self, Write}; /// A compiled package that can be executed by a VM. #[derive(Scan, Debug)] @@ -31,74 +30,4 @@ impl Package { pub fn code(&self) -> &Vec { &self.code } - - /// Dumps a debug output of this package to the given writer. - pub fn dump(&self, writer: &mut dyn Write) -> io::Result<()> { - // column widths - let addr_w = num_digits(self.code().len(), 16).max(4); - let inst_col = 16 - addr_w; - let inst_w = 16; - - for (addr, inst) in self.code().iter().enumerate() { - let (param_val, mut param_name) = match inst { - Inst::PushSym(sym) | Inst::GetAttr(sym) | Inst::SetAttr(sym) => ( - sym.index().to_string(), - global_sym_lookup(*sym).unwrap().to_string(), - ), - Inst::PushConst(hdl) => ( - hdl.index().to_string(), - { - let obj_ref = self.const_pool().get(*hdl); - read_obj!(let obj = obj_ref); - format!("{:?}", obj) - }, - ), - Inst::LoadName(local) => ( - local.index().to_string(), - global_sym_lookup(self.names()[local.index()]).unwrap().to_string(), - ), - Inst::Pop(local) => { - if let Some(local) = local { - let index = local.index(); - let sym = self.names()[index]; - let name = global_sym_lookup(sym).unwrap().to_string(); - (index.to_string(), name) - } else { - ("(discarded)".to_string(), String::new()) - } - } - Inst::Jump(addr) | Inst::JumpTrue(addr) => ( - addr.to_string(), - String::new(), - ), - Inst::Call(argc) => ( - argc.to_string(), - String::new(), - ), - _ => (String::new(), String::new()), - }; - - if !param_name.is_empty() { - param_name = format!("({})", param_name); - } - - writeln!( - writer, - "{:0addr_w$x}{space: >inst_col$}{: usize { - ((n as f64) + 1.0).log(radix as f64).ceil() as usize }