Files
not-python/src/compile/thunk.rs

473 lines
15 KiB
Rust
Raw Normal View History

use crate::{
compile::{basic_block::*, error::*, Compile},
obj::prelude::*,
syn::{ast::*, visit::*},
vm::inst::*,
};
use std::{collections::BTreeMap, mem};
/// A basic block of VM code.
///
/// Thunks are precomputed chunks of code that may allow for branching and/or looping.
#[derive(Debug, Clone, PartialEq)]
pub enum Thunk {
/// A list of instructions.
///
/// This is the core of all `Thunk` values.
Body(Vec<Inst>),
/// A list of thunks.
List(Vec<Thunk>),
/// Based on the conditional flag in the VM, code for one of these thunks will be executed.
///
/// The conditional flag is expected to be set upon entry to this thunk.
///
/// Only one of these thunks will be executed. At the end of either thunk, the program will
/// continue at the address following this branch.
Branch {
thunk_true: Box<Thunk>,
thunk_false: Box<Thunk>,
},
/// Based on the conditional flag in the VM, code for this loop will continue to execute.
///
/// The conditional flag is expected to be set upon entry to this thunk.
///
/// At the start of the body, the condition flag is initially checked. If it is not true,
/// the program jumps to the end of the body and continues.
///
/// At the end of the body, the program jumps back to the start where the condition is checked
/// again.
Loop(Box<Thunk>),
/// A placeholder/default thunk that compiles to nothing.
Nop,
}
impl Thunk {
pub fn push(&mut self, append: impl Into<Inst>) {
self.push_thunk(Thunk::Body(vec![append.into()]))
}
pub fn push_thunk(&mut self, append: impl Into<Thunk>) {
let append = append.into();
let inner = mem::replace(self, Thunk::Nop);
*self = match (inner, append) {
// X + Nop = X
(lhs, Thunk::Nop) => lhs,
(Thunk::Nop, rhs) => rhs,
// Body + Body = Body
(Thunk::Body(mut lhs), Thunk::Body(rhs)) => {
lhs.extend(rhs);
Thunk::Body(lhs)
}
// List + List = List
(Thunk::List(mut lhs), Thunk::List(rhs)) => {
lhs.extend(rhs);
Thunk::List(lhs)
}
// List + X = List
(Thunk::List(mut lhs), rhs) => {
lhs.push(rhs);
Thunk::List(lhs)
}
// X + List = List
(lhs, Thunk::List(mut rhs)) => {
rhs.insert(0, lhs);
Thunk::List(rhs)
}
// X + X = List
(lhs, rhs) => Thunk::List(vec![lhs, rhs]),
};
}
/// Gets the number of basic blocks that this thunk will produce.
///
/// This is necessary for compiling to a basic block, in order to predict the "next block" that
/// a thunk will be jumping to.
fn basic_block_count(&self) -> usize {
match self {
Thunk::Body(_) => 1,
Thunk::List(thunks) => thunks
.iter()
.fold(0, |n, thunk| n + thunk.basic_block_count()),
Thunk::Branch {
thunk_true,
thunk_false,
// length is true + false block count, + 1 for the branch basic block at the start
} => thunk_true.basic_block_count() + thunk_false.basic_block_count() + 1,
// length is thunk, + 1 for branch at the start of the loop
Thunk::Loop(thunk) => thunk.basic_block_count() + 1,
Thunk::Nop => 0,
}
}
pub fn flatten(self) -> BasicBlockList {
Flatten::default()
.flatten(self)
}
}
impl From<Inst> for Thunk {
fn from(other: Inst) -> Self {
Self::from(vec![other])
}
}
impl From<Vec<Inst>> for Thunk {
fn from(other: Vec<Inst>) -> Self {
Thunk::Body(other)
}
}
impl From<Vec<Thunk>> for Thunk {
fn from(other: Vec<Thunk>) -> Self {
Thunk::List(other)
}
}
//
// struct Flatten
//
/// Flattens a thunk into linear list of basic blocks.
#[derive(Default)]
struct Flatten {
// using a btreemap instead of a vec because we can insert things out-of-order
blocks: BasicBlockList,
}
//
// impl Flatten
//
impl Flatten {
pub fn flatten(mut self, thunk: Thunk) -> BasicBlockList {
// "It's 4pm babe, time for your thunk flattening!"
// "Yes, honey..."
let last_block = thunk.basic_block_count();
self.flatten_next(last_block, thunk);
assert_eq!(self.blocks.len(), last_block);
self.blocks
}
fn flatten_next(&mut self, next_block: usize, thunk: Thunk) {
match thunk {
Thunk::Body(thunk) => {
let this_block = self.this_block();
let prev = self.blocks.insert(this_block, BasicBlock::Block {
exit: next_block,
block: thunk,
});
assert!(prev.is_none());
}
Thunk::List(thunks) => {
for thunk in thunks.into_iter() {
let next_block = self.this_block() + thunk.basic_block_count();
self.flatten_next(next_block, thunk);
assert_eq!(next_block, self.this_block());
}
assert_eq!(next_block, self.this_block());
}
Thunk::Branch { thunk_true, thunk_false, } => {
let branch_block = self.this_block();
let block_true = self.this_block() + 1;
let block_false = block_true + thunk_true.basic_block_count();
self.blocks.insert(branch_block, BasicBlock::Branch {
block_true,
block_false,
});
self.flatten_next(next_block, *thunk_true);
self.flatten_next(next_block, *thunk_false);
assert_eq!(self.this_block(), next_block);
}
Thunk::Loop(_) => todo!(),
Thunk::Nop => {}
}
}
fn this_block(&self) -> usize {
self.blocks.len()
}
}
//
// struct CompileBody
//
/// Compiles an AST body down to a `Thunk`.
///
/// Thunks are the basic building blocks of the IR. Thunks form a chain of decision paths that may
/// be taken, which allows an optimizer to remove dead code, detect endless loops, and so on. This
/// allows for shrinking blocks of code without having to recalculate jump addresses.
pub struct CompileBody<'c> {
compile: &'c mut Compile,
}
impl<'c> CompileBody<'c> {
pub fn new(compile: &'c mut Compile) -> Self {
CompileBody { compile }
}
pub fn compile(&mut self, body: &'c Body) -> Result<Thunk> {
let thunk = self.visit_body(body)?;
Ok(thunk)
}
}
//
// impl Visit for CompileBody
//
impl Visit for CompileBody<'_> {
// XXX
// Trying to "future-proof" by using Result<_> in case there's some reason that an error
// may need to be thrown in the future so I don't have to wrap every return value in Ok(_)
type Out = Result<Thunk>;
fn visit_body(&mut self, body: &Body) -> Self::Out {
self.compile.collect_locals(body);
let mut thunk = Thunk::Nop;
for stmt in body.iter() {
thunk.push_thunk(stmt.accept(self)?);
}
Ok(thunk)
}
fn visit_stmt(&mut self, stmt: &Stmt) -> Self::Out {
DefaultAccept::default_accept(stmt, self)
}
fn visit_assign_stmt(&mut self, assign: &AssignStmt) -> Self::Out {
// - push rhs
// - push lhs (which handles the assignment)
let mut thunk = self.visit_expr(&assign.rhs)?;
thunk.push_thunk(self.visit_lhs_expr(&assign.lhs)?);
Ok(thunk)
}
fn visit_lhs_expr(&mut self, lhs_expr: &LhsExpr) -> Self::Out {
// Do different things depending on the LHS
let mut thunk;
match &lhs_expr {
LhsExpr::SetAttr(expr) => {
// - push lhs expression (without accessor)
// - setattr (access) NOTE : rhs should already be on stack
thunk = self.visit_expr(&expr.expr)?;
let attr = global_sym(expr.access.to_string());
thunk.push(Inst::SetAttr(attr));
}
LhsExpr::Name(local_name) => {
let sym = global_sym(local_name.to_string());
if let Some(local) = self.compile.lookup_local(sym) {
thunk = Inst::PopLocal(Some(local)).into();
} else {
let global = self.compile.lookup_global(sym).expect("name expected to exist someplace(?)");
thunk = Inst::PopGlobal(Some(global)).into();
}
}
}
Ok(thunk)
}
fn visit_expr(&mut self, expr: &Expr) -> Self::Out {
DefaultAccept::default_accept(expr, self)
}
fn visit_bin_expr(&mut self, expr: &BinExpr) -> Self::Out {
// - push lhs
// - push rhs
// - call operator's function
let mut thunk = self.visit_expr(&expr.lhs)?;
thunk.push_thunk(self.visit_expr(&expr.rhs)?);
let inst = match expr.op {
BinOp::Plus => Inst::BinPlus,
BinOp::Minus => Inst::BinMinus,
BinOp::Times => Inst::BinMul,
BinOp::Div => Inst::BinDiv,
BinOp::Eq => Inst::BinEq,
BinOp::Neq => Inst::BinNeq,
BinOp::Lt => Inst::BinLt,
BinOp::Le => Inst::BinLe,
BinOp::Gt => Inst::BinGt,
BinOp::Ge => Inst::BinGe,
BinOp::And => Inst::BinAnd,
BinOp::Or => Inst::BinOr,
};
thunk.push(inst);
Ok(thunk)
}
fn visit_un_expr(&mut self, expr: &UnExpr) -> Self::Out {
// - push expr
// - call operator's function
let mut thunk = self.visit_expr(&expr.expr)?;
match expr.op {
UnOp::Plus => thunk.push(Inst::UnPos),
UnOp::Minus => thunk.push(Inst::UnNeg),
}
Ok(thunk)
}
fn visit_call_expr(&mut self, expr: &CallExpr) -> Self::Out {
// - push expr
// - push args in order
// - call function
let mut thunk = self.visit_expr(&expr.expr)?;
for arg in expr.args.iter() {
thunk.push_thunk(self.visit_expr(&arg)?);
}
thunk.push(Inst::Call(expr.args.len()));
Ok(thunk)
}
fn visit_index_expr(&mut self, expr: &IndexExpr) -> Self::Out {
// - eval expr
// - eval index
// - index
let mut thunk = self.visit_expr(&expr.expr)?;
thunk.push_thunk(self.visit_expr(&expr.index)?);
thunk.push(Inst::Index);
Ok(thunk)
}
fn visit_access_expr(&mut self, expr: &AccessExpr) -> Self::Out {
// - eval expr
// - getattr (expr.access)
let mut thunk = self.visit_expr(&expr.expr)?;
thunk.push_thunk(Thunk::Body(vec![Inst::GetAttr(global_sym(
expr.access.to_string(),
))]));
Ok(thunk)
}
fn visit_fun_expr(&mut self, expr: &FunExpr) -> Self::Out {
// TODO(fun) : need captures for functions, built dynamically (or statically?)
// - static is not possible, since captures are *created* at runtime, and there's no
// instruction that will look up just one scope level - it's either locals or globals.
// - an entire "create function" instruction is probably the best way to solve it, don't
// try to be clever, just implement it like that (since I mean, python does too...)
// - push const
// (functions are unique const values so a new function will be created for every literal
// function defined in code)
// This is pretty much the only place where a new scope layer gets pushed beyond the start
// of the program
self.compile.push_scope_layer();
for param in expr.params.iter() {
let sym = global_sym(param.to_string());
self.compile.create_local(sym);
}
// remap (Sym -> Name) to be (Name -> Sym) and make sure it's all in order.
let scope_locals: BTreeMap<_, _> = self.compile.pop_scope_layer()
.unwrap()
.into_iter()
.map(|(sym, name)| (name, sym))
.collect();
// this should be in numeric order since:
// 1. locals are created exactly once or looked up
// 2. scope_locals is a btreemap, keyed by names, which are in order from 0..N
let locals: FunLocals = scope_locals.into_iter()
.enumerate()
.map(|(index, (name, sym))| {
assert_eq!(index, name.index());
sym
})
.collect();
let code = self.visit_body(&expr.body)?
.flatten()
.to_vec();
let (hdl, _fun) = self.compile.push_const(UserFun::new_obj(code, locals));
// TODO(compile) : determine return value at the end of the body (preferably at parse-time)
// oh yeah, we were compiling a function body weren't we
Ok(Inst::PushConst(hdl).into())
}
fn visit_atom(&mut self, atom: &Atom) -> Self::Out {
let thunk = match atom {
Atom::Ident(ident) => {
let sym = global_sym(ident.to_string());
if let Some(local) = self.compile.lookup_local(sym) {
// get local
Inst::LoadLocal(local).into()
} else {
// get or create global
let global = self.compile.create_global(sym);
Inst::LoadGlobal(global).into()
}
}
Atom::Sym(sym) => {
// push symbol
Inst::PushSym(global_sym(sym.clone())).into()
}
Atom::Num(num) => {
// push const
let (hdl, _) = self.compile.const_int(*num);
Inst::PushConst(hdl).into()
}
Atom::String(s) => {
// push const
let (hdl, _) = self.compile.const_str(s);
Inst::PushConst(hdl).into()
}
};
Ok(thunk)
}
}
//
// Tests
//
#[test]
fn test_flatten_thunk() {
let init_body = vec![
Inst::PushSym(Sym::new(0)),
Inst::PushSym(Sym::new(1)),
Inst::Call(1)
];
let true_body = vec![Inst::PushSym(Sym::new(2))];
let false_body = vec![Inst::PushSym(Sym::new(3))];
let end_body = vec![
Inst::PushSym(Sym::new(1)),
Inst::Call(1)
];
let thunk = Thunk::List(vec![
// do something before
Thunk::Body(init_body.clone()),
// branch
Thunk::Branch {
thunk_true: Thunk::Body(true_body.clone()).into(),
thunk_false: Thunk::Body(false_body.clone()).into(),
},
// do something after
Thunk::Body(end_body.clone()),
]);
let block_count = thunk.basic_block_count();
let blocks = thunk.flatten();
assert_eq!(blocks.len(), block_count);
let mut iter = blocks.into_iter();
assert_eq!(iter.next().unwrap(), (0, BasicBlock::Block { exit: 1, block: init_body, }));
assert_eq!(iter.next().unwrap(), (1, BasicBlock::Branch { block_true: 2, block_false: 3, }));
assert_eq!(iter.next().unwrap(), (2, BasicBlock::Block { exit: 4, block: true_body, }));
assert_eq!(iter.next().unwrap(), (3, BasicBlock::Block { exit: 4, block: false_body, }));
assert_eq!(iter.next().unwrap(), (4, BasicBlock::Block { exit: 5, block: end_body, }));
assert!(iter.next().is_none());
}