Update how scope rules work, and update implementation

* Currently, scopes are only allowed to look at their locals and the
  globals. Inner functions cannot refer to values in their parent
  functions. This will change eventually.
* Scope lookup is split between globals and locals. Locals are defined
  in a scope if they are explicitly assigned to.
    * i.e. `a = foo` will treat `a` as a local in the current scope if
      it appears anywhere in that scope. This does not extend to
      setattrs; `a.b = foo` will not trigger `a` into being a local var.
* `Package` objects are no longer returned from the compiler - instead,
  a user function is returned.
* Other various changes and renames

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-09-27 19:33:18 -07:00
parent 4848a342f0
commit 958a6caabb
12 changed files with 304 additions and 171 deletions

View File

@@ -46,11 +46,12 @@ fn main() -> Result<()> {
return Ok(());
}
let package = Compile::new().compile(&ast)?;
let (const_pool, main) = Compile::new().compile(&ast)?;
if action == "dump" {
let mut stdout = std::io::stdout();
package.dump(&mut stdout)?;
not_python::read_obj!(let main_obj = main);
main_obj.dump(&mut stdout, &const_pool, main_obj.locals())?;
return Ok(());
}

View File

@@ -1,18 +1,17 @@
pub mod basic_block;
pub mod error;
mod locals;
mod scope;
pub mod thunk;
use crate::{syn::ast::Body, obj::prelude::*, vm::{consts::*, package::Package}};
use crate::{syn::ast::Body, obj::prelude::*, vm::consts::*};
use scope::*;
use std::collections::HashMap;
#[derive(Default)]
pub struct Compile {
const_data: ConstData,
globals: Locals,
scope: Vec<Locals>,
names: Vec<Sym>,
next_local: Name,
scope: Scope,
}
impl Compile {
@@ -22,12 +21,24 @@ impl Compile {
}
/// Compiles the given AST body.
pub fn compile(mut self, body: &Body) -> error::Result<Package> {
pub fn compile(mut self, body: &Body) -> error::Result<(ConstPool, UserFunRef)> {
self.push_scope_layer();
let main = thunk::CompileBody::new(&mut self)
.compile(body)?
.flatten()
.to_vec();
Ok(Package::new(self.names, self.const_data.const_pool, main))
let globals_syms: std::collections::BTreeMap<_, _> = self.pop_scope_layer().unwrap()
.into_iter()
.map(|(sym, name)| (name, sym))
.collect();
let globals = globals_syms.into_iter()
.enumerate()
.map(|(index, (name, sym))| {
assert_eq!(index, name.index());
sym
})
.collect();
Ok((self.const_data.const_pool, UserFun::new_obj(main, globals)))
}
/// Gets the constant data that is interned in this compile session.
@@ -55,64 +66,34 @@ impl Compile {
self.const_data_mut().push(obj)
}
/// Looks up a variable name.
pub fn lookup_scope(&mut self, sym: Sym) -> Option<Name> {
self.scope
.iter()
.rev()
.filter_map(|locals| locals.get(&sym))
.next()
.or_else(|| self.globals.get(&sym))
.copied()
/// Looks up a local variable name.
pub fn lookup_local(&mut self, sym: Sym) -> Option<Name> {
self.scope.lookup_local(sym)
}
/// Looks up a variable name, or creates a global name if it doesn't exist.
pub fn lookup_scope_or_create_global(&mut self, sym: Sym) -> Name {
if let Some(local) = self.lookup_scope(sym) {
local
} else {
self.create_global(sym)
}
/// Looks up a global variable name.
pub fn lookup_global(&mut self, sym: Sym) -> Option<Name> {
self.scope.lookup_global(sym)
}
/// Creates a new local variable if it does not exist in the current local scope.
pub(crate) fn create_local(&mut self, sym: Sym) -> Name {
let locals = self.scope.last_mut().expect("scope");
if let Some(local) = locals.get(&sym) {
*local
} else {
// wish I could use mem::replace here, oh well
let local = self.next_local;
self.next_local = self.next_local.next();
locals.insert(sym, local);
self.names.push(sym);
assert_eq!(self.names.len(), self.next_local.index());
local
}
self.scope.create_local(sym)
}
/// Creates a new global variable if it does not exist in the current global scope.
pub fn create_global(&mut self, sym: Sym) -> Name {
if let Some(global) = self.globals.get(&sym) {
*global
} else {
let global = self.next_local;
self.next_local = self.next_local.next();
self.globals.insert(sym, global);
self.names.push(sym);
assert_eq!(self.names.len(), self.next_local.index());
global
}
self.scope.create_global(sym)
}
/// Pushes an empty scope layer of local variables.
pub(crate) fn push_scope_layer(&mut self) {
self.scope.push(Default::default());
pub fn push_scope_layer(&mut self) {
self.scope.push_layer()
}
/// Pops a scope layer of local variables, if any are available.
pub(crate) fn pop_scope_layer(&mut self) -> Option<Locals> {
self.scope.pop()
pub fn pop_scope_layer(&mut self) -> Option<ScopeLocalSyms> {
self.scope.pop_layer()
}
/// Collects local variables for the given AST body non-recursively.

86
src/compile/scope.rs Normal file
View File

@@ -0,0 +1,86 @@
use crate::obj::prelude::*;
use std::collections::BTreeMap;
// current TODO/problem: How should locals work?
//
// * I think we should have a global/local dichotomy in the VM. You can only look up the local
// scope in the VM; inner functions that have captures should be created on the fly and detected
// by the compiler. I.E. any lookups that are not local or global should be captured dynamically.
// * Need to differentiate between locals collected on the compilation side vs. locals on the
// object/vm side
pub type ScopeLocalSyms = BTreeMap<Sym, Name>;
pub type ScopeLocals = Vec<Sym>;
pub struct Scope {
scope: Vec<ScopeLocalSyms>,
}
impl Scope {
pub fn lookup_local(&self, sym: Sym) -> Option<Name> {
self.locals()
.and_then(|locals| locals.get(&sym))
.copied()
}
pub fn lookup_global(&self, sym: Sym) -> Option<Name> {
self.globals()
.and_then(|globals| globals.get(&sym))
.copied()
}
pub fn create_local(&mut self, sym: Sym) -> Name {
let locals = self.locals_mut()
.expect("locals");
if let Some(local) = locals.get(&sym).copied() {
local
} else {
let name = Name::new(locals.len());
locals.insert(sym, name);
name
}
}
pub fn create_global(&mut self, sym: Sym) -> Name {
let globals = self.globals_mut()
.expect("globals");
if let Some(global) = globals.get(&sym).copied() {
global
} else {
let name = Name::new(globals.len());
globals.insert(sym, name);
name
}
}
pub fn push_layer(&mut self) {
self.scope.push(Default::default());
}
pub fn pop_layer(&mut self) -> Option<ScopeLocalSyms> {
self.scope.pop()
}
pub fn locals(&self) -> Option<&ScopeLocalSyms> {
self.scope.last()
}
pub fn locals_mut(&mut self) -> Option<&mut ScopeLocalSyms> {
self.scope.last_mut()
}
pub fn globals(&self) -> Option<&ScopeLocalSyms> {
self.scope.first()
}
pub fn globals_mut(&mut self) -> Option<&mut ScopeLocalSyms> {
self.scope.first_mut()
}
}
impl Default for Scope {
fn default() -> Self {
// empty global scope
Self { scope: vec![Default::default()] }
}
}

View File

@@ -4,7 +4,7 @@ use crate::{
syn::{ast::*, visit::*},
vm::inst::*,
};
use std::mem;
use std::{collections::BTreeMap, mem};
/// A basic block of VM code.
///
@@ -213,9 +213,7 @@ impl<'c> CompileBody<'c> {
}
pub fn compile(&mut self, body: &'c Body) -> Result<Thunk> {
self.compile.push_scope_layer();
let thunk = self.visit_body(body)?;
self.compile.pop_scope_layer();
Ok(thunk)
}
}
@@ -266,8 +264,12 @@ impl Visit for CompileBody<'_> {
}
LhsExpr::Name(local_name) => {
let sym = global_sym(local_name.to_string());
let local = self.compile.lookup_scope_or_create_global(sym);
thunk = Inst::Pop(Some(local)).into();
if let Some(local) = self.compile.lookup_local(sym) {
thunk = Inst::PopLocal(Some(local)).into();
} else {
let global = self.compile.lookup_global(sym).expect("name expected to exist someplace(?)");
thunk = Inst::PopGlobal(Some(global)).into();
}
}
}
Ok(thunk)
@@ -346,6 +348,12 @@ impl Visit for CompileBody<'_> {
}
fn visit_fun_expr(&mut self, expr: &FunExpr) -> Self::Out {
// TODO(fun) : need captures for functions, built dynamically (or statically?)
// - static is not possible, since captures are *created* at runtime, and there's no
// instruction that will look up just one scope level - it's either locals or globals.
// - an entire "create function" instruction is probably the best way to solve it, don't
// try to be clever, just implement it like that (since I mean, python does too...)
// - push const
// (functions are unique const values so a new function will be created for every literal
// function defined in code)
@@ -357,7 +365,23 @@ impl Visit for CompileBody<'_> {
let sym = global_sym(param.to_string());
self.compile.create_local(sym);
}
let locals = self.compile.pop_scope_layer().unwrap();
// remap (Sym -> Name) to be (Name -> Sym) and make sure it's all in order.
let scope_locals: BTreeMap<_, _> = self.compile.pop_scope_layer()
.unwrap()
.into_iter()
.map(|(sym, name)| (name, sym))
.collect();
// this should be in numeric order since:
// 1. locals are created exactly once or looked up
// 2. scope_locals is a btreemap, keyed by names, which are in order from 0..N
let locals: FunLocals = scope_locals.into_iter()
.enumerate()
.map(|(index, (name, sym))| {
assert_eq!(index, name.index());
sym
})
.collect();
let code = self.visit_body(&expr.body)?
.flatten()
.to_vec();
@@ -373,9 +397,14 @@ impl Visit for CompileBody<'_> {
let thunk = match atom {
Atom::Ident(ident) => {
let sym = global_sym(ident.to_string());
let name = self.compile.lookup_scope_or_create_global(sym);
// get local
Inst::LoadName(name).into()
if let Some(local) = self.compile.lookup_local(sym) {
// get local
Inst::LoadLocal(local).into()
} else {
// get or create global
let global = self.compile.create_global(sym);
Inst::LoadGlobal(global).into()
}
}
Atom::Sym(sym) => {
// push symbol

View File

@@ -1,7 +1,9 @@
use crate::{obj::{reserved::*, prelude::*}, vm::{inst::Inst, signal::*, Vm}};
use crate::{obj::{reserved::*, prelude::*}, vm::{consts::ConstPool, inst::Inst, signal::*, Vm}};
use once_cell::sync::Lazy;
use shredder::{GcSafeWrapper, Scan};
use std::fmt::{Debug, Formatter, self};
use std::{fmt::{Debug, Formatter, self}, io::{self, Write}};
pub type FunLocals = Vec<Sym>;
//
// struct UserFun
@@ -18,11 +20,11 @@ pub struct UserFun {
code: Vec<Inst>,
// Safe because this is just an interner that points to symbols, which aren't GC'd
#[shredder(unsafe_skip)]
locals: Locals,
locals: FunLocals,
}
impl UserFun {
pub fn new_obj(code: Vec<Inst>, locals: Locals) -> UserFunRef {
pub fn new_obj(code: Vec<Inst>, locals: FunLocals) -> UserFunRef {
let obj_ref = ObjRef::new(UserFun {
vtable: Default::default(), // this is a placeholder for the real vtable
attrs: Default::default(),
@@ -47,9 +49,88 @@ impl UserFun {
&self.code
}
pub fn locals(&self) -> &Locals {
pub fn locals(&self) -> &FunLocals {
&self.locals
}
pub fn dump(&self, writer: &mut dyn Write, const_pool: &ConstPool, globals: &FunLocals) -> io::Result<()> {
fn num_digits(n: usize, radix: usize) -> usize {
((n as f64) + 1.0).log(radix as f64).ceil() as usize
}
// column widths
let addr_w = num_digits(self.code().len(), 16).max(4);
let inst_col = 16 - addr_w;
let inst_w = 16;
for (addr, inst) in self.code().iter().enumerate() {
let (param_val, mut param_name) = match inst {
Inst::PushSym(sym) | Inst::GetAttr(sym) | Inst::SetAttr(sym) => (
sym.index().to_string(),
global_sym_lookup(*sym).unwrap().to_string(),
),
Inst::PushConst(hdl) => (
hdl.index().to_string(),
{
let obj_ref = const_pool.get(*hdl);
read_obj!(let obj = obj_ref);
format!("{:?}", obj)
},
),
Inst::LoadLocal(local) => (
local.index().to_string(),
global_sym_lookup(self.locals()[local.index()]).unwrap().to_string(),
),
Inst::PopLocal(local) => {
if let Some(local) = local {
let index = local.index();
let sym = self.locals()[index];
let name = global_sym_lookup(sym).unwrap().to_string();
(index.to_string(), name)
} else {
("(discarded)".to_string(), String::new())
}
}
Inst::PopGlobal(global) => {
if let Some(global) = global{
let index = global.index();
let sym = globals[index];
let name = global_sym_lookup(sym).unwrap().to_string();
(index.to_string(), name)
} else {
("(discarded)".to_string(), String::new())
}
}
Inst::Jump(addr) | Inst::JumpTrue(addr) => (
addr.to_string(),
String::new(),
),
Inst::Call(argc) => (
argc.to_string(),
String::new(),
),
_ => (String::new(), String::new()),
};
if !param_name.is_empty() {
param_name = format!("({})", param_name);
}
writeln!(
writer,
"{:0addr_w$x}{space: >inst_col$}{: <inst_w$}{: <4} {}",
addr,
inst.name(),
param_val,
param_name,
space = "",
addr_w = addr_w,
inst_w = inst_w,
inst_col = inst_col,
)?;
}
Ok(())
}
}
impl_obj!(UserFun);

View File

@@ -5,7 +5,7 @@ pub mod attrs;
pub mod fun;
pub mod int;
pub mod intern;
pub mod names;
pub mod name;
pub mod reserved;
pub mod str;
pub mod sym;
@@ -13,7 +13,7 @@ pub mod ty;
pub mod prelude {
pub use crate::obj::{
attrs::*, fun::*, int::*, intern::*, names::*, str::*, sym::*, ty::*, Obj, ObjRef,
attrs::*, fun::*, int::*, intern::*, name::*, str::*, sym::*, ty::*, Obj, ObjRef,
};
}

1
src/obj/name.rs Normal file
View File

@@ -0,0 +1 @@
handle_type!(Name);

View File

@@ -1,7 +0,0 @@
use crate::obj::prelude::*;
use std::collections::BTreeMap;
handle_type!(Name);
pub type Names = BTreeMap<Name, ObjRef>;
pub type Locals = BTreeMap<Sym, Name>;

View File

@@ -1,5 +1,6 @@
use crate::obj::prelude::*;
use shredder::{GcSafe, Scan, Scanner};
use std::collections::BTreeMap;
/// A stack call frame.
#[derive(Debug, Clone)]
@@ -25,20 +26,20 @@ unsafe impl GcSafe for FrameKind {}
#[derive(Scan, Debug, Clone)]
pub struct Frame {
locals: Names,
locals: FrameLocals,
kind: FrameKind,
}
impl Frame {
pub fn new(locals: Names, kind: FrameKind) -> Self {
pub fn new(locals: FrameLocals, kind: FrameKind) -> Self {
Self { locals, kind, }
}
pub fn locals(&self) -> &Names {
pub fn locals(&self) -> &FrameLocals {
&self.locals
}
pub fn locals_mut(&mut self) -> &mut Names {
pub fn locals_mut(&mut self) -> &mut FrameLocals {
&mut self.locals
}
@@ -46,3 +47,5 @@ impl Frame {
&self.kind
}
}
pub type FrameLocals = BTreeMap<usize, ObjRef>;

View File

@@ -10,10 +10,16 @@ pub enum Inst {
PushConst(ConstHandle),
/// Looks up and pushes a local value.
LoadName(Name),
LoadLocal(Name),
/// Looks up and pushes a global value.
LoadGlobal(Name),
/// Pop a value from the stack, possibly into a local symbol.
Pop(Option<Name>),
PopLocal(Option<Name>),
/// Pop a value from the stack, possibly into a global symbol.
PopGlobal(Option<Name>),
/// Pops a symbol value and an object reference.
///
@@ -119,8 +125,10 @@ impl Inst {
match self {
Inst::PushSym(_) => "PUSH_SYM",
Inst::PushConst(_) => "PUSH_CONST",
Inst::LoadName(_) => "LOAD_NAME",
Inst::Pop(_) => "POP",
Inst::LoadLocal(_) => "LOAD_LOCAL",
Inst::LoadGlobal(_) => "LOAD_GLOBAL",
Inst::PopLocal(_) => "POP_LOCAL",
Inst::PopGlobal(_) => "POP_GLOBAL",
Inst::GetAttr(_) => "GET_ATTR",
Inst::SetAttr(_) => "SET_ATTR",
Inst::Jump(_) => "JUMP",

View File

@@ -143,14 +143,16 @@ impl<'p> Vm<'p> {
fn begin_call(&mut self, caller: ObjRef, args: Vec<ObjRef>) {
// create stack frame
let stack_frame = if let Some(user_fun) = std::any::Any::downcast_ref::<UserFunRef>(&caller) {
let names: Names = {
let names: FrameLocals = {
read_obj!(let fun_ref = user_fun);
fun_ref.locals()
.iter()
.enumerate()
.zip(args.into_iter())
.map(|((_, v), arg)| (*v, arg.clone()))
.map(|((index, _), arg)| (index, arg.clone()))
.collect()
};
// TODO : check function arity vs argument count
Frame::new(names, FrameKind::User {
last_pc: self.pc(),
@@ -202,20 +204,31 @@ impl<'p> Vm<'p> {
.clone();
self.push(obj_ref);
}
Inst::LoadName(name) => {
Inst::LoadLocal(local) => {
let obj_ref = self.frames()
.iter()
.rev()
.filter_map(|frame| frame.locals().get(&name))
.filter_map(|frame| frame.locals().get(&local.index()))
.cloned()
.next();
if let Some(obj_ref) = obj_ref {
self.push(obj_ref);
} else {
todo!("TODO: implement \"name value not found\" lookup")
todo!("TODO: implement \"local value not found\" lookup")
}
}
Inst::Pop(name) => {
Inst::LoadGlobal(global) => {
let obj_ref = self.frames()
.first()
.and_then(|frame| frame.locals().get(&global.index()))
.cloned();
if let Some(obj_ref) = obj_ref {
self.push(obj_ref);
} else {
todo!("TODO: implement \"local value not found\" lookup")
}
}
Inst::PopLocal(name) => {
let tos = self.pop()
.expect("stack underflow");
// pop into name
@@ -224,6 +237,15 @@ impl<'p> Vm<'p> {
}
// else discard
}
Inst::PopGlobal(name) => {
let tos = self.pop()
.expect("stack underflow");
// pop into name
if let Some(name) = name {
self.set_global(name, tos);
}
// else discard
}
Inst::GetAttr(sym) => {
let obj_ref = self.pop().expect("getattr object");
read_obj!(let obj = obj_ref);
@@ -281,21 +303,20 @@ impl<'p> Vm<'p> {
}
fn set_local(&mut self, name: Name, value: ObjRef) {
for frame in self.frames.iter_mut().rev() {
let locals = frame.locals_mut();
if locals.contains_key(&name) {
locals.insert(name, value);
return;
}
}
unreachable!("unregistered name {:?}", name);
let frame = self.frame_mut().unwrap();
let locals = frame.locals_mut();
locals.insert(name.index(), value);
}
fn set_global(&mut self, name: Name, value: ObjRef) {
let frame = self.frames.first_mut().unwrap();
let locals = frame.locals_mut();
locals.insert(name.index(), value);
}
fn get_local(&mut self, name: Name) -> Option<ObjRef> {
self.frames.iter()
.rev()
.filter_map(|frame| frame.locals().get(&name))
.next()
self.frame()
.and_then(|frame| frame.locals().get(&name.index()))
.cloned()
}
}

View File

@@ -1,6 +1,5 @@
use crate::{obj::prelude::*, vm::{consts::ConstPool, inst::Inst}};
use shredder::Scan;
use std::io::{self, Write};
/// A compiled package that can be executed by a VM.
#[derive(Scan, Debug)]
@@ -31,74 +30,4 @@ impl Package {
pub fn code(&self) -> &Vec<Inst> {
&self.code
}
/// Dumps a debug output of this package to the given writer.
pub fn dump(&self, writer: &mut dyn Write) -> io::Result<()> {
// column widths
let addr_w = num_digits(self.code().len(), 16).max(4);
let inst_col = 16 - addr_w;
let inst_w = 16;
for (addr, inst) in self.code().iter().enumerate() {
let (param_val, mut param_name) = match inst {
Inst::PushSym(sym) | Inst::GetAttr(sym) | Inst::SetAttr(sym) => (
sym.index().to_string(),
global_sym_lookup(*sym).unwrap().to_string(),
),
Inst::PushConst(hdl) => (
hdl.index().to_string(),
{
let obj_ref = self.const_pool().get(*hdl);
read_obj!(let obj = obj_ref);
format!("{:?}", obj)
},
),
Inst::LoadName(local) => (
local.index().to_string(),
global_sym_lookup(self.names()[local.index()]).unwrap().to_string(),
),
Inst::Pop(local) => {
if let Some(local) = local {
let index = local.index();
let sym = self.names()[index];
let name = global_sym_lookup(sym).unwrap().to_string();
(index.to_string(), name)
} else {
("(discarded)".to_string(), String::new())
}
}
Inst::Jump(addr) | Inst::JumpTrue(addr) => (
addr.to_string(),
String::new(),
),
Inst::Call(argc) => (
argc.to_string(),
String::new(),
),
_ => (String::new(), String::new()),
};
if !param_name.is_empty() {
param_name = format!("({})", param_name);
}
writeln!(
writer,
"{:0addr_w$x}{space: >inst_col$}{: <inst_w$}{: <4} {}",
addr,
inst.name(),
param_val,
param_name,
space = "",
addr_w = addr_w,
inst_w = inst_w,
inst_col = inst_col,
)?;
}
Ok(())
}
}
fn num_digits(n: usize, radix: usize) -> usize {
((n as f64) + 1.0).log(radix as f64).ceil() as usize
}