use crate::{ compile::{basic_block::*, error::*, Compile}, obj::prelude::*, syn::{ast::*, visit::*}, vm::inst::*, }; use std::mem; /// A basic block of VM code. /// /// Thunks are precomputed chunks of code that may allow for branching and/or looping. #[derive(Debug, Clone, PartialEq)] pub enum Thunk { /// A list of instructions. /// /// This is the core of all `Thunk` values. Body(Vec), /// A list of thunks. List(Vec), /// Based on the conditional flag in the VM, code for one of these thunks will be executed. /// /// The conditional flag is expected to be set upon entry to this thunk. /// /// Only one of these thunks will be executed. At the end of either thunk, the program will /// continue at the address following this branch. Branch { thunk_true: Box, thunk_false: Box, }, /// Based on the conditional flag in the VM, code for this loop will continue to execute. /// /// The conditional flag is expected to be set upon entry to this thunk. /// /// At the start of the body, the condition flag is initially checked. If it is not true, /// the program jumps to the end of the body and continues. /// /// At the end of the body, the program jumps back to the start where the condition is checked /// again. Loop(Box), /// A placeholder/default thunk that compiles to nothing. Nop, } impl Thunk { pub fn push(&mut self, append: impl Into) { self.push_thunk(Thunk::Body(vec![append.into()])) } pub fn push_thunk(&mut self, append: impl Into) { let append = append.into(); let inner = mem::replace(self, Thunk::Nop); *self = match (inner, append) { // X + Nop = X (lhs, Thunk::Nop) => lhs, (Thunk::Nop, rhs) => rhs, // Body + Body = Body (Thunk::Body(mut lhs), Thunk::Body(rhs)) => { lhs.extend(rhs); Thunk::Body(lhs) } // List + List = List (Thunk::List(mut lhs), Thunk::List(rhs)) => { lhs.extend(rhs); Thunk::List(lhs) } // List + X = List (Thunk::List(mut lhs), rhs) => { lhs.push(rhs); Thunk::List(lhs) } // X + List = List (lhs, Thunk::List(mut rhs)) => { rhs.insert(0, lhs); Thunk::List(rhs) } // X + X = List (lhs, rhs) => Thunk::List(vec![lhs, rhs]), }; } /// Gets the number of basic blocks that this thunk will produce. /// /// This is necessary for compiling to a basic block, in order to predict the "next block" that /// a thunk will be jumping to. fn basic_block_count(&self) -> usize { match self { Thunk::Body(_) => 1, Thunk::List(thunks) => thunks .iter() .fold(0, |n, thunk| n + thunk.basic_block_count()), Thunk::Branch { thunk_true, thunk_false, // length is true + false block count, + 1 for the branch basic block at the start } => thunk_true.basic_block_count() + thunk_false.basic_block_count() + 1, // length is thunk, + 1 for branch at the start of the loop Thunk::Loop(thunk) => thunk.basic_block_count() + 1, Thunk::Nop => 0, } } pub fn flatten(self) -> BasicBlockList { Flatten::default() .flatten(self) } } impl From for Thunk { fn from(other: Inst) -> Self { Self::from(vec![other]) } } impl From> for Thunk { fn from(other: Vec) -> Self { Thunk::Body(other) } } impl From> for Thunk { fn from(other: Vec) -> Self { Thunk::List(other) } } // // struct Flatten // /// Flattens a thunk into linear list of basic blocks. #[derive(Default)] struct Flatten { // using a btreemap instead of a vec because we can insert things out-of-order blocks: BasicBlockList, } // // impl Flatten // impl Flatten { pub fn flatten(mut self, thunk: Thunk) -> BasicBlockList { // "It's 4pm babe, time for your thunk flattening!" // "Yes, honey..." let last_block = thunk.basic_block_count(); self.flatten_next(last_block, thunk); assert_eq!(self.blocks.len(), last_block); self.blocks } fn flatten_next(&mut self, next_block: usize, thunk: Thunk) { match thunk { Thunk::Body(thunk) => { let this_block = self.this_block(); let prev = self.blocks.insert(this_block, BasicBlock::Block { exit: next_block, block: thunk, }); assert!(prev.is_none()); } Thunk::List(thunks) => { for thunk in thunks.into_iter() { let next_block = self.this_block() + thunk.basic_block_count(); self.flatten_next(next_block, thunk); assert_eq!(next_block, self.this_block()); } assert_eq!(next_block, self.this_block()); } Thunk::Branch { thunk_true, thunk_false, } => { let branch_block = self.this_block(); let block_true = self.this_block() + 1; let block_false = block_true + thunk_true.basic_block_count(); self.blocks.insert(branch_block, BasicBlock::Branch { block_true, block_false, }); self.flatten_next(next_block, *thunk_true); self.flatten_next(next_block, *thunk_false); assert_eq!(self.this_block(), next_block); } Thunk::Loop(_) => todo!(), Thunk::Nop => {} } } fn this_block(&self) -> usize { self.blocks.len() } } // // struct CompileBody // /// Compiles an AST body down to a `Thunk`. /// /// Thunks are the basic building blocks of the IR. Thunks form a chain of decision paths that may /// be taken, which allows an optimizer to remove dead code, detect endless loops, and so on. This /// allows for shrinking blocks of code without having to recalculate jump addresses. pub struct CompileBody<'c> { compile: &'c mut Compile, } impl<'c> CompileBody<'c> { pub fn new(compile: &'c mut Compile) -> Self { CompileBody { compile } } pub fn compile(&mut self, body: &'c Body) -> Result { self.compile.push_scope_layer(); let thunk = self.visit_body(body)?; self.compile.pop_scope_layer(); Ok(thunk) } } // // impl Visit for CompileBody // impl Visit for CompileBody<'_> { // XXX // Trying to "future-proof" by using Result<_> in case there's some reason that an error // may need to be thrown in the future so I don't have to wrap every return value in Ok(_) type Out = Result; fn visit_body(&mut self, body: &Body) -> Self::Out { self.compile.collect_locals(body); let mut thunk = Thunk::Nop; for stmt in body.iter() { thunk.push_thunk(stmt.accept(self)?); } Ok(thunk) } fn visit_stmt(&mut self, stmt: &Stmt) -> Self::Out { DefaultAccept::default_accept(stmt, self) } fn visit_assign_stmt(&mut self, assign: &AssignStmt) -> Self::Out { // - push rhs // - push lhs (which handles the assignment) let mut thunk = self.visit_expr(&assign.rhs)?; thunk.push_thunk(self.visit_lhs_expr(&assign.lhs)?); Ok(thunk) } fn visit_lhs_expr(&mut self, lhs_expr: &LhsExpr) -> Self::Out { // Do different things depending on the LHS let mut thunk; match &lhs_expr { LhsExpr::SetAttr(expr) => { // - push lhs expression (without accessor) // - setattr (access) NOTE : rhs should already be on stack thunk = self.visit_expr(&expr.expr)?; let attr = global_sym(expr.access.to_string()); thunk.push(Inst::SetAttr(attr)); } LhsExpr::Local(local) => { let local = global_sym(local.to_string()); thunk = Inst::Pop(Some(local)).into(); } } Ok(thunk) } fn visit_expr(&mut self, expr: &Expr) -> Self::Out { DefaultAccept::default_accept(expr, self) } fn visit_bin_expr(&mut self, expr: &BinExpr) -> Self::Out { // - push lhs // - push rhs // - call operator's function let mut thunk = self.visit_expr(&expr.lhs)?; thunk.push_thunk(self.visit_expr(&expr.rhs)?); let inst = match expr.op { BinOp::Plus => Inst::BinPlus, BinOp::Minus => Inst::BinMinus, BinOp::Times => Inst::BinMul, BinOp::Div => Inst::BinDiv, BinOp::Eq => Inst::BinEq, BinOp::Neq => Inst::BinNeq, BinOp::Lt => Inst::BinLt, BinOp::Le => Inst::BinLe, BinOp::Gt => Inst::BinGt, BinOp::Ge => Inst::BinGe, BinOp::And => Inst::BinAnd, BinOp::Or => Inst::BinOr, }; thunk.push(inst); Ok(thunk) } fn visit_un_expr(&mut self, expr: &UnExpr) -> Self::Out { // - push expr // - call operator's function let mut thunk = self.visit_expr(&expr.expr)?; match expr.op { UnOp::Plus => thunk.push(Inst::UnPos), UnOp::Minus => thunk.push(Inst::UnNeg), } Ok(thunk) } fn visit_call_expr(&mut self, expr: &CallExpr) -> Self::Out { // - push expr // - push args in order // - call function let mut thunk = self.visit_expr(&expr.expr)?; for arg in expr.args.iter() { thunk.push_thunk(self.visit_expr(&arg)?); } Ok(thunk) } fn visit_index_expr(&mut self, expr: &IndexExpr) -> Self::Out { // - eval expr // - eval index // - index let mut thunk = self.visit_expr(&expr.expr)?; thunk.push_thunk(self.visit_expr(&expr.index)?); thunk.push(Inst::Index); Ok(thunk) } fn visit_access_expr(&mut self, expr: &AccessExpr) -> Self::Out { // - eval expr // - getattr (expr.access) let mut thunk = self.visit_expr(&expr.expr)?; thunk.push_thunk(Thunk::Body(vec![Inst::GetAttr(global_sym( expr.access.to_string(), ))])); Ok(thunk) } fn visit_atom(&mut self, atom: &Atom) -> Self::Out { let thunk = match atom { Atom::Ident(ident) => { let sym = global_sym(ident.to_string()); let local = if let Some(local) = self.compile.lookup_scope(sym) { local } else { // create a global that gets looked up instead, since nothing with this name // has been declared/assigned in this scope self.compile.create_global(sym) }; // get local Inst::PushLocal(local).into() } Atom::Sym(sym) => { // push symbol Inst::PushSym(global_sym(sym.clone())).into() } Atom::Num(num) => { // push const let (hdl, _) = self.compile.const_int(*num); Inst::PushConst(hdl).into() } Atom::String(s) => { // push const let (hdl, _) = self.compile.const_str(s); Inst::PushConst(hdl).into() } }; Ok(thunk) } } // // Tests // #[test] fn test_flatten_thunk() { let init_body = vec![ Inst::PushSym(Sym::new(0)), Inst::PushSym(Sym::new(1)), Inst::Call(1) ]; let true_body = vec![Inst::PushSym(Sym::new(2))]; let false_body = vec![Inst::PushSym(Sym::new(3))]; let end_body = vec![ Inst::PushSym(Sym::new(1)), Inst::Call(1) ]; let thunk = Thunk::List(vec![ // do something before Thunk::Body(init_body.clone()), // branch Thunk::Branch { thunk_true: Thunk::Body(true_body.clone()).into(), thunk_false: Thunk::Body(false_body.clone()).into(), }, // do something after Thunk::Body(end_body.clone()), ]); let block_count = thunk.basic_block_count(); let blocks = thunk.flatten(); assert_eq!(blocks.len(), block_count); let mut iter = blocks.into_iter(); assert_eq!(iter.next().unwrap(), (0, BasicBlock::Block { exit: 1, block: init_body, })); assert_eq!(iter.next().unwrap(), (1, BasicBlock::Branch { block_true: 2, block_false: 3, })); assert_eq!(iter.next().unwrap(), (2, BasicBlock::Block { exit: 4, block: true_body, })); assert_eq!(iter.next().unwrap(), (3, BasicBlock::Block { exit: 4, block: false_body, })); assert_eq!(iter.next().unwrap(), (4, BasicBlock::Block { exit: 5, block: end_body, })); assert!(iter.next().is_none()); }