Add basic blocks and implementation of flattening thunks -> basic blocks

* Basic are a more linear way of representing code. Thunks beget basic
  blocks, which beget vectors of instructions.
* Basic blocks are also being flattened into a vector of instructions
  (hopefully, no tests done yet)
* OH yeah locals can be collected too (but currently are not being
  collected in the compiler, that should come soon)

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-09-16 17:18:31 -07:00
parent ef38680fe5
commit 582b3a4b73
14 changed files with 540 additions and 124 deletions

View File

@@ -1,18 +1,47 @@
use crate::{
compile::{error::*, Compile},
syn::{ast::*, visit::*},
compile::{basic_block::*, error::*, Compile},
obj::prelude::*,
vm::inst::*
syn::{ast::*, visit::*},
vm::inst::*,
};
use std::mem;
/// A basic block of VM code.
///
/// Thunks are precomputed chunks of code that may allow for branching and/or looping.
#[derive(Debug, Clone, PartialEq)]
pub enum Thunk {
/// A list of instructions.
///
/// This is the core of all `Thunk` values.
Body(Vec<Inst>),
/// A list of thunks.
List(Vec<Thunk>),
/// Based on the conditional flag in the VM, code for one of these thunks will be executed.
///
/// The conditional flag is expected to be set upon entry to this thunk.
///
/// Only one of these thunks will be executed. At the end of either thunk, the program will
/// continue at the address following this branch.
Branch {
thunk_true: Box<Thunk>,
thunk_false: Box<Thunk>,
},
/// Based on the conditional flag in the VM, code for this loop will continue to execute.
///
/// The conditional flag is expected to be set upon entry to this thunk.
///
/// At the start of the body, the condition flag is initially checked. If it is not true,
/// the program jumps to the end of the body and continues.
///
/// At the end of the body, the program jumps back to the start where the condition is checked
/// again.
Loop(Box<Thunk>),
/// A placeholder/default thunk that compiles to nothing.
Nop,
}
@@ -54,6 +83,32 @@ impl Thunk {
(lhs, rhs) => Thunk::List(vec![lhs, rhs]),
};
}
/// Gets the number of basic blocks that this thunk will produce.
///
/// This is necessary for compiling to a basic block, in order to predict the "next block" that
/// a thunk will be jumping to.
fn basic_block_count(&self) -> usize {
match self {
Thunk::Body(_) => 1,
Thunk::List(thunks) => thunks
.iter()
.fold(0, |n, thunk| n + thunk.basic_block_count()),
Thunk::Branch {
thunk_true,
thunk_false,
// length is true + false block count, + 1 for the branch basic block at the start
} => thunk_true.basic_block_count() + thunk_false.basic_block_count() + 1,
// length is thunk, + 1 for branch at the start of the loop
Thunk::Loop(thunk) => thunk.basic_block_count() + 1,
Thunk::Nop => 0,
}
}
pub fn flatten(self) -> BasicBlockList {
Flatten::default()
.flatten(self)
}
}
impl From<Inst> for Thunk {
@@ -74,6 +129,80 @@ impl From<Vec<Thunk>> for Thunk {
}
}
//
// struct Flatten
//
/// Flattens a thunk into linear list of basic blocks.
#[derive(Default)]
struct Flatten {
// using a btreemap instead of a vec because we can insert things out-of-order
blocks: BasicBlockList,
}
//
// impl Flatten
//
impl Flatten {
pub fn flatten(mut self, thunk: Thunk) -> BasicBlockList {
// "It's 4pm babe, time for your thunk flattening!"
// "Yes, honey..."
let last_block = thunk.basic_block_count();
self.flatten_next(last_block, thunk);
assert_eq!(self.blocks.len(), last_block);
self.blocks
}
fn flatten_next(&mut self, next_block: usize, thunk: Thunk) {
match thunk {
Thunk::Body(thunk) => {
let this_block = self.this_block();
let prev = self.blocks.insert(this_block, BasicBlock::Block {
exit: next_block,
block: thunk,
});
assert!(prev.is_none());
}
Thunk::List(thunks) => {
for thunk in thunks.into_iter() {
let next_block = self.this_block() + thunk.basic_block_count();
self.flatten_next(next_block, thunk);
assert_eq!(next_block, self.this_block());
}
assert_eq!(next_block, self.this_block());
}
Thunk::Branch { thunk_true, thunk_false, } => {
let branch_block = self.this_block();
let block_true = self.this_block() + 1;
let block_false = block_true + thunk_true.basic_block_count();
self.blocks.insert(branch_block, BasicBlock::Branch {
block_true,
block_false,
});
self.flatten_next(next_block, *thunk_true);
self.flatten_next(next_block, *thunk_false);
assert_eq!(self.this_block(), next_block);
}
Thunk::Loop(_) => todo!(),
Thunk::Nop => {}
}
}
fn this_block(&self) -> usize {
self.blocks.len()
}
}
//
// struct CompileBody
//
/// Compiles an AST body down to a `Thunk`.
///
/// Thunks are the basic building blocks of the IR. Thunks form a chain of decision paths that may
/// be taken, which allows an optimizer to remove dead code, detect endless loops, and so on. This
/// allows for shrinking blocks of code without having to recalculate jump addresses.
pub struct CompileBody<'c, 't> {
compile: &'c mut Compile<'t>,
}
@@ -88,7 +217,14 @@ impl<'c, 't> CompileBody<'c, 't> {
}
}
//
// impl Visit for CompileBody
//
impl Visit for CompileBody<'_, '_> {
// XXX
// Trying to "future-proof" by using Result<_> in case there's some reason that an error
// may need to be thrown in the future so I don't have to wrap every return value in Ok(_)
type Out = Result<Thunk>;
fn visit_body(&mut self, body: &Body) -> Self::Out {
@@ -197,9 +333,9 @@ impl Visit for CompileBody<'_, '_> {
// - eval expr
// - getattr (expr.access)
let mut thunk = self.visit_expr(&expr.expr)?;
thunk.push_thunk(Thunk::Body(vec![
Inst::GetAttr(global_sym(expr.access.to_string())),
]));
thunk.push_thunk(Thunk::Body(vec![Inst::GetAttr(global_sym(
expr.access.to_string(),
))]));
Ok(thunk)
}
@@ -233,3 +369,49 @@ impl Visit for CompileBody<'_, '_> {
Ok(thunk)
}
}
//
// Tests
//
#[test]
fn test_flatten_thunk() {
let init_body = vec![
Inst::PushSym(Sym::new(0)),
Inst::PushSym(Sym::new(1)),
Inst::Call(1)
];
let true_body = vec![Inst::PushSym(Sym::new(2))];
let false_body = vec![Inst::PushSym(Sym::new(3))];
let end_body = vec![
Inst::PushSym(Sym::new(1)),
Inst::Call(1)
];
let thunk = Thunk::List(vec![
// do something before
Thunk::Body(init_body.clone()),
// branch
Thunk::Branch {
thunk_true: Thunk::Body(true_body.clone()).into(),
thunk_false: Thunk::Body(false_body.clone()).into(),
},
// do something after
Thunk::Body(end_body.clone()),
]);
let block_count = thunk.basic_block_count();
let blocks = thunk.flatten();
assert_eq!(blocks.len(), block_count);
let mut iter = blocks.into_iter();
assert_eq!(iter.next().unwrap(), (0, BasicBlock::Block { exit: 1, block: init_body, }));
assert_eq!(iter.next().unwrap(), (1, BasicBlock::Branch { block_true: 2, block_false: 3, }));
assert_eq!(iter.next().unwrap(), (2, BasicBlock::Block { exit: 4, block: true_body, }));
assert_eq!(iter.next().unwrap(), (3, BasicBlock::Block { exit: 4, block: false_body, }));
assert_eq!(iter.next().unwrap(), (4, BasicBlock::Block { exit: 5, block: end_body, }));
assert!(iter.next().is_none());
}