diff --git a/src/bin/not.rs b/src/bin/not.rs index 7a7d530..9f42b74 100644 --- a/src/bin/not.rs +++ b/src/bin/not.rs @@ -1,4 +1,4 @@ -use not_python::{compile::Compile, syn::ast, vm::Vm}; +use not_python::{compile::prelude::*, syn::ast, vm::Vm}; use shredder::run_with_gc_cleanup; use std::{fs, path::PathBuf}; diff --git a/src/compile/list.rs b/src/compile/list.rs new file mode 100644 index 0000000..b74a9ff --- /dev/null +++ b/src/compile/list.rs @@ -0,0 +1,380 @@ +use crate::{ + compile::{Compile, thunk::{Thunk, ThunkBranch}}, + obj::{prelude::*, reserved::*}, + syn::{ast::*, visit::*}, + vm::inst::Inst, +}; +use std::collections::BTreeMap; + +#[derive(Debug, Clone, PartialEq)] +pub enum List { + Sym(String), + Ident(String), + Int(IntValue), + String(String), + If { + cond: Box, + body: Box, + el: Box, + }, + Lambda { + params: Vec, + expr: Box, + }, + Assign { + name: String, + rhs: Box, + }, + Access { + expr: Box, + access: String, + }, + Update { + expr: Box, + name: String, + value: Box, + }, + Return(Box), + Call(Box, Vec), + Do(Vec), +} + +impl List { + pub fn thunkify(self, compile: &mut Compile) -> Thunk { + match self { + List::Sym(sym) => { + Inst::PushSym(global_sym(sym.clone())).into() + } + List::Ident(ident) => { + // Small gotcha: + // Looking up a name will either result in a local or a global lookup. If it's + // a local variable first, then it's determined as a local and that's the end + // of the story... except when we're at the top scope level, we're both "local" + // *and* global. + // + // This checks to make sure that it's both a local variable and that there's more + // than one scope layer. + let sym = global_sym(ident.to_string()); + if let (true, Some(local)) = ( + compile.scope().layers_len() > 1, + compile.lookup_local(sym), + ) { + // get local + Inst::LoadLocal(local).into() + } else { + // get or create global + // create_global only makes a new global with this symbol name if one has not + // been created yet + let global = compile.create_global(sym); + Inst::LoadGlobal(global).into() + } + } + List::Int(int) => { + // push const + let (hdl, _) = compile.const_int(int); + Inst::PushConst(hdl).into() + } + List::String(s) => { + // push const + let (hdl, _) = compile.const_str(s); + Inst::PushConst(hdl).into() + } + List::If { cond, body, el, } => { + let mut preamble = cond.thunkify(compile); + // push CheckTruth here since there's not much of a better place to do so + preamble.push_thunk(vec![ + Inst::GetAttr(BOOL_MEMBER_NAME.sym), + Inst::Call(0), + Inst::CheckTruth, + ]); + let thunk_true = body.thunkify(compile).into(); + let thunk_false = el.thunkify(compile).into(); + Thunk::Branch(ThunkBranch { + preamble: preamble.into(), + thunk_true, + thunk_false, + }) + } + List::Lambda { params, expr } => { + // TODO(fun) : need captures for functions, built dynamically (or statically?) + // - static is not possible, since captures are *created* at runtime, and there's no + // instruction that will look up just one scope level - it's either locals or globals. + // - an entire "create function" instruction is probably the best way to solve it, don't + // try to be clever, just implement it like that (since I mean, python does too...) + + // - push const + // (functions are unique const values so a new function will be created for every literal + // function defined in code) + + // This is pretty much the only place where a new scope layer gets pushed beyond the start + // of the program + compile.push_scope_layer(); + let params_len = params.len(); + for param in params.into_iter() { + let sym = global_sym(param); + compile.create_local(sym); + } + + // Compile function body + let mut code = expr.thunkify(compile) + .flatten() + .to_vec(); + + // If the last instruction is not a return, or if there are no instructions, then return + // :nil value. + if !matches!(code.last(), Some(Inst::Return)) { + code.push(Inst::PushSym(NIL_NAME.sym)); + code.push(Inst::Return); + } + + // remap (Sym -> Name) to be (Name -> Sym) and make sure it's all in order. + let scope_locals: BTreeMap<_, _> = compile + .pop_scope_layer() + .unwrap() + .into_iter() + .map(|(sym, name)| (name, sym)) + .collect(); + + // this should be in numeric order since: + // 1. locals are created exactly once or looked up + // 2. scope_locals is a btreemap, keyed by names, which are in order from 0..N + let locals: FunLocals = scope_locals + .into_iter() + .enumerate() + .map(|(index, (name, sym))| { + assert_eq!(index, name.index()); + sym + }) + .collect(); + + let (hdl, _fun) = + compile.push_const(UserFun::new_obj(code, locals, params_len)); + + // TODO(compile) : determine return value at the end of the body (preferably at parse-time) + + // oh yeah, we were compiling a function body weren't we + Inst::PushConst(hdl).into() + } + List::Assign { name, rhs, } => { + let mut thunk = rhs.thunkify(compile); + let sym = global_sym(name.to_string()); + if let Some(local) = compile.lookup_local(sym) { + thunk.push(Inst::PopLocal(Some(local))); + } else { + let global = compile.lookup_global(sym) + .expect("name expected to exist someplace(?)"); + thunk.push(Inst::PopGlobal(Some(global))); + } + thunk + } + List::Access { expr, access, } => { + let mut thunk = expr.thunkify(compile); + thunk.push(Inst::GetAttr(global_sym(access.to_string()))); + thunk + } + List::Update { expr, name, value, } => { + let mut thunk = expr.thunkify(compile); + let (hdl, _) = compile.const_str(name); + thunk.push(Inst::PushConst(hdl)); + thunk.push_thunk(value.thunkify(compile)); + thunk + } + List::Return(expr) => { + let mut thunk = expr.thunkify(compile); + thunk.push(Inst::Return); + thunk + } + List::Call(fun, args) => { + let argc = args.len(); + let mut thunk = fun.thunkify(compile); + for arg in args { + thunk.push_thunk(arg.thunkify(compile)); + } + thunk.push(Inst::Call(argc)); + thunk + } + List::Do(stmts) => { + Thunk::List(stmts.into_iter() + .map(|stmt| stmt.thunkify(compile)) + .collect()) + } + } + } +} + +pub struct CompileList<'c> { + compile: &'c mut Compile, +} + +impl<'c> CompileList<'c> { + pub fn new(compile: &'c mut Compile) -> Self { + Self { compile, } + } + + fn visit_elif_el(&mut self, elif: &[CondBody], el: &Option) -> List { + match (elif, el) { + ([cond_body, tail @ ..], _) => { + let cond = self.visit_expr(&cond_body.cond); + let body = self.visit_body(&cond_body.body); + let el = self.visit_elif_el(tail, el); + List::If { + cond: cond.into(), + body: body.into(), + el: el.into(), + } + } + ([], Some(body)) => self.visit_body(body), + ([], None) => List::Sym(NIL_NAME.name.to_string()) + } + } +} + +impl<'c> Visit for CompileList<'c> { + type Out = List; + + fn visit_body(&mut self, body: &Body) -> Self::Out { + self.compile.collect_locals(body); + List::Do(body.iter() + .map(|stmt| self.visit_stmt(stmt)) + .collect()) + } + + fn visit_stmt(&mut self, stmt: &Stmt) -> Self::Out { + DefaultAccept::default_accept(stmt, self) + } + + fn visit_assign_stmt(&mut self, assign: &AssignStmt) -> Self::Out { + match &assign.lhs { + LhsExpr::SetAttr(access) => List::Call( + List::Access { + expr: self.visit_expr(&access.expr).into(), + access: SET_ATTR_MEMBER_NAME.name.to_string(), + }.into(), + vec![ + List::Sym(access.access.to_string()), + self.visit_expr(&assign.rhs), + ] + ), + LhsExpr::Name(name) => List::Assign { + name: name.to_string(), + rhs: self.visit_expr(&assign.rhs).into(), + } + } + } + + fn visit_lhs_expr(&mut self, _lhs_expr: &LhsExpr) -> Self::Out { + unreachable!() + } + + fn visit_return_stmt(&mut self, ret: &ReturnStmt) -> Self::Out { + if let Some(expr) = &ret.expr { + List::Return(self.visit_expr(expr).into()) + } else { + List::Return(List::Sym(NIL_NAME.name.to_string()).into()) + } + } + + fn visit_expr(&mut self, expr: &Expr) -> Self::Out { + DefaultAccept::default_accept(expr, self) + } + + fn visit_bin_expr(&mut self, expr: &BinExpr) -> Self::Out { + use BinOp::*; + let op_name = match expr.op { + Plus => PLUS_OP_NAME.name, + Minus => MINUS_OP_NAME.name, + Times => TIMES_OP_NAME.name, + Div => DIV_OP_NAME.name, + Eq => EQ_OP_NAME.name, + Neq => NE_OP_NAME.name, + Lt => LT_OP_NAME.name, + Le => LE_OP_NAME.name, + Gt => GT_OP_NAME.name, + Ge => GE_OP_NAME.name, + And => AND_OP_NAME.name, + Or => OR_OP_NAME.name, + }.to_string(); + List::Call( + List::Access { + expr: self.visit_expr(&expr.lhs).into(), + access: op_name, + }.into(), + vec![self.visit_expr(&expr.rhs)], + ) + } + + fn visit_un_expr(&mut self, expr: &UnExpr) -> Self::Out { + use UnOp::*; + let op_name = match expr.op { + Plus => POS_OP_NAME.name, + Minus => NEG_OP_NAME.name, + }.to_string(); + List::Call( + List::Access { + expr: self.visit_expr(&expr.expr).into(), + access: op_name, + }.into(), + vec![self.visit_expr(&expr.expr)], + ) + } + + fn visit_call_expr(&mut self, expr: &CallExpr) -> Self::Out { + let fun = List::Access { + expr: self.visit_expr(&expr.expr).into(), + access: CALL_MEMBER_NAME.name.to_string(), + }; + let args: Vec<_> = expr.args + .iter() + .map(|arg| self.visit_expr(arg)) + .collect(); + List::Call(fun.into(), args) + } + + fn visit_index_expr(&mut self, expr: &IndexExpr) -> Self::Out { + List::Call(List::Access { + expr: self.visit_expr(&expr.expr).into(), + access: INDEX_MEMBER_NAME.name.to_string(), + }.into(), + vec![self.visit_expr(&expr.index)] + ) + } + + fn visit_access_expr(&mut self, expr: &AccessExpr) -> Self::Out { + List::Access { + expr: self.visit_expr(&expr.expr).into(), + access: expr.access.clone(), + } + } + + fn visit_fun_expr(&mut self, expr: &FunExpr) -> Self::Out { + List::Lambda { + params: expr.params.clone(), + expr: self.visit_body(&expr.body).into(), + } + } + + fn visit_if_expr(&mut self, expr: &IfExpr) -> Self::Out { + let cond = self.visit_expr(&expr.cond_body.cond); + let body = self.visit_body(&expr.cond_body.body); + let el = self.visit_elif_el(&expr.elif, &expr.el); + List::If { + cond: cond.into(), + body: body.into(), + el: el.into(), + } + } + + fn visit_cond_body(&mut self, _cond_body: &CondBody) -> Self::Out { + unreachable!() + } + + fn visit_atom(&mut self, atom: &Atom) -> Self::Out { + use Atom::*; + match atom { + Ident(s) => List::Ident(s.clone()), + Sym(s) => List::Sym(s.clone()), + Num(n) => List::Int(*n), + String(s) => List::String(s.clone()), + } + } +} diff --git a/src/compile/mod.rs b/src/compile/mod.rs index ad89f39..249a102 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -1,10 +1,22 @@ pub mod basic_block; pub mod error; +pub mod list; mod locals; mod scope; pub mod thunk; -use crate::{obj::prelude::*, syn::ast::Body, vm::consts::*}; +pub mod prelude { + pub use crate::{ + syn::visit::*, + compile::{ + list::{List, CompileList}, + thunk::Thunk, + Compile, + }, + }; +} + +use crate::{obj::prelude::*, syn::{ast::Body, visit::*}, vm::consts::*}; use scope::*; use std::collections::HashMap; @@ -23,8 +35,9 @@ impl Compile { /// Compiles the given AST body. pub fn compile(mut self, body: &Body) -> error::Result<(ConstPool, UserFunRef)> { self.push_scope_layer(); - let mut main = thunk::CompileBody::new(&mut self) - .compile(body)? + let mut main = list::CompileList::new(&mut self) + .visit_body(body) + .thunkify(&mut self) .flatten() .to_vec(); // XXX TODO(compile) diff --git a/src/compile/thunk.rs b/src/compile/thunk.rs index 4c42ba3..e04801b 100644 --- a/src/compile/thunk.rs +++ b/src/compile/thunk.rs @@ -130,9 +130,9 @@ impl From> for Thunk { // #[derive(Debug, Clone, PartialEq)] pub struct ThunkBranch { - preamble: Box, - thunk_true: Box, - thunk_false: Box, + pub(crate) preamble: Box, + pub(crate) thunk_true: Box, + pub(crate) thunk_false: Box, } // @@ -176,13 +176,18 @@ impl Flatten { ); assert!(prev.is_none()); } - Thunk::List(thunks) => { - for thunk in thunks.into_iter() { - let next_block = self.this_block() + thunk.basic_block_count(); - self.flatten_next(next_block, thunk); - assert_eq!(next_block, self.this_block()); + Thunk::List(mut thunks) => { + if !thunks.is_empty() { + let tail = thunks.pop().unwrap(); + for thunk in thunks.into_iter() { + let next_block = self.this_block() + thunk.basic_block_count(); + self.flatten_next(next_block, thunk); + assert_eq!(next_block, self.this_block()); + } + self.flatten_next(next_block, tail); } - assert_eq!(next_block, self.this_block()); + // don't assert_eq here because the "next_block" really should be interpreted as an + // "exit_block" } Thunk::Branch(ThunkBranch { preamble, diff --git a/src/obj/reserved.rs b/src/obj/reserved.rs index 80c1bc3..4cf6c09 100644 --- a/src/obj/reserved.rs +++ b/src/obj/reserved.rs @@ -47,10 +47,12 @@ name!(REPR_MEMBER_NAME, "__repr__"); name!(STR_MEMBER_NAME, "__str__"); name!(INT_MEMBER_NAME, "__int__"); name!(BOOL_MEMBER_NAME, "__bool__"); +name!(INDEX_MEMBER_NAME, "__index__"); // // Builtin functions // +name!(APPLY_BUILTIN_NAME, "apply"); name!(PRINTLN_BUILTIN_NAME, "println"); name!(PRINT_BUILTIN_NAME, "print"); @@ -66,7 +68,12 @@ name!(LT_OP_NAME, "__lt__"); name!(LE_OP_NAME, "__le__"); name!(GT_OP_NAME, "__gt__"); name!(GE_OP_NAME, "__ge__"); +name!(AND_OP_NAME, "__and__"); +name!(OR_OP_NAME, "__or__"); + name!(PLUS_OP_NAME, "__add__"); name!(MINUS_OP_NAME, "__sub__"); name!(TIMES_OP_NAME, "__mul__"); name!(DIV_OP_NAME, "__div__"); +name!(POS_OP_NAME, "__pos__"); +name!(NEG_OP_NAME, "__neg__");