Use lrpar for parsing, big 'ol syntax overhaul
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
823
Cargo.lock
generated
823
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
12
Cargo.toml
12
Cargo.toml
@@ -11,9 +11,15 @@ build = "build.rs"
|
||||
[dependencies]
|
||||
bitflags = "1"
|
||||
byteorder = "1"
|
||||
lalrpop-util = "0.17.2"
|
||||
regex = "*"
|
||||
lazy_static = "1"
|
||||
snafu = "0.6.2"
|
||||
|
||||
cfgrammar = "0.6"
|
||||
lrlex = "0.6"
|
||||
lrpar = "0.6"
|
||||
regex = "*"
|
||||
|
||||
[build-dependencies]
|
||||
lalrpop = "0.17.2"
|
||||
cfgrammar = "0.6"
|
||||
lrlex = "0.6"
|
||||
lrpar = "0.6"
|
||||
|
||||
14
build.rs
14
build.rs
@@ -1,5 +1,13 @@
|
||||
use lalrpop;
|
||||
use cfgrammar::yacc::YaccKind;
|
||||
use lrlex::LexerBuilder;
|
||||
use lrpar::{CTParserBuilder};
|
||||
|
||||
fn main() {
|
||||
lalrpop::process_root().unwrap();
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let lex_rule_ids_map = CTParserBuilder::new()
|
||||
.yacckind(YaccKind::Grmtools)
|
||||
.process_file_in_src("vm/obj/syn/parser.y")?;
|
||||
LexerBuilder::new()
|
||||
.rule_ids_map(lex_rule_ids_map)
|
||||
.process_file_in_src("vm/obj/syn/lexer.l")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
|
||||
pub struct Pos {
|
||||
pub source: usize,
|
||||
pub line: usize,
|
||||
@@ -22,23 +22,20 @@ impl Pos {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
|
||||
Pos::new(source, line, col, byte, c.len_utf8())
|
||||
pub fn from_char(c: char) -> Self {
|
||||
Pos::new(0, 0, 0, 0, c.len_utf8())
|
||||
}
|
||||
|
||||
pub fn adv_char(self, c: char) -> Self {
|
||||
let mut next = self;
|
||||
next.byte += next.len;
|
||||
next.len = c.len_utf8();
|
||||
next.source += 1;
|
||||
next.col += 1;
|
||||
next
|
||||
pub fn adv_char(&mut self, c: char) {
|
||||
self.byte += self.len;
|
||||
self.len = c.len_utf8();
|
||||
self.source += 1;
|
||||
self.col += 1;
|
||||
}
|
||||
|
||||
pub fn adv_line(self) -> Self {
|
||||
let mut next = self;
|
||||
next.line += 1;
|
||||
next
|
||||
pub fn adv_line(&mut self) {
|
||||
self.line += 1;
|
||||
self.col = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,8 +53,8 @@ impl Ord for Pos {
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct Span {
|
||||
start: Pos,
|
||||
end: Pos,
|
||||
pub start: Pos,
|
||||
pub end: Pos,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
|
||||
59
src/main.rs
59
src/main.rs
@@ -3,7 +3,7 @@
|
||||
mod common;
|
||||
mod vm;
|
||||
|
||||
use std::{convert::TryFrom, env, fs, io, process};
|
||||
use std::{env, fs, io, process};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
||||
|
||||
@@ -21,55 +21,14 @@ fn get_input_string() -> io::Result<String> {
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
use vm::obj::syn::parser::SectionsParser;
|
||||
let contents = get_input_string()?;
|
||||
let ast = match SectionsParser::new().parse(&contents) {
|
||||
Ok(ast) => ast,
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
let obj = vm::obj::obj::Object::try_from(&ast)?;
|
||||
dump(&obj)?;
|
||||
let mut vm = vm::vm::Vm::new();
|
||||
vm.load_object(obj, 1024 * 1024 * 64)?; // 64mb
|
||||
let status = vm.run()?;
|
||||
println!("{}", status);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
fn dump(obj: &vm::obj::obj::Object) -> Result<()> {
|
||||
use vm::obj::obj::Section;
|
||||
use vm::visit::VisitInst;
|
||||
let mut stdout = io::stdout();
|
||||
for section in &obj.sections {
|
||||
match section {
|
||||
Section::Data { start, contents, .. } => {
|
||||
const WIDTH: usize = 4;
|
||||
println!("data section at 0x{:08x}", start);
|
||||
for (i, b) in contents.iter().enumerate() {
|
||||
if i % WIDTH == 0 {
|
||||
print!("{:08x} | ", ((*start as usize) + i));
|
||||
}
|
||||
print!("{:02x} ", b);
|
||||
if i % WIDTH == (WIDTH - 1) {
|
||||
println!();
|
||||
}
|
||||
}
|
||||
println!();
|
||||
}
|
||||
Section::Code { start, contents, .. } => {
|
||||
println!("code section at 0x{:08x}", start);
|
||||
let mut disasm = vm::disassemble::Disassemble::new(&mut stdout, contents, *start);
|
||||
while !disasm.is_done() {
|
||||
disasm.visit_inst()?;
|
||||
}
|
||||
println!();
|
||||
}
|
||||
Section::Meta { .. } => continue,
|
||||
};
|
||||
use vm::obj::syn::{lexer, parser};
|
||||
let text = get_input_string()?;
|
||||
let lexerdef = lexer::lexerdef();
|
||||
let lexer = lexerdef.lexer(&text);
|
||||
let (res, errors) = parser::parse(&lexer);
|
||||
for err in errors {
|
||||
println!("{}", err.pp(&lexer, &parser::token_epp));
|
||||
}
|
||||
println!("{:?}", res);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
73
src/vm/common.rs
Normal file
73
src/vm/common.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
fmt::{self, Formatter, LowerHex},
|
||||
ops::{Add, AddAssign},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Addr(pub u64);
|
||||
|
||||
impl LowerHex for Addr {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
|
||||
let Addr(v) = self;
|
||||
LowerHex::fmt(v, fmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Add<T> for Addr
|
||||
where
|
||||
T: Add<u64, Output = u64>,
|
||||
u64: Add<T, Output = u64>,
|
||||
{
|
||||
type Output = Addr;
|
||||
|
||||
fn add(self, rhs: T) -> Self::Output {
|
||||
Addr(self.0 + rhs)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_add_assign {
|
||||
($ty:ty) => {
|
||||
impl AddAssign<$ty> for Addr {
|
||||
fn add_assign(&mut self, rhs: $ty) {
|
||||
self.0 = self.0 + (rhs as u64);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_add_assign!(usize);
|
||||
impl_add_assign!(u64);
|
||||
|
||||
macro_rules! impl_cmp {
|
||||
($ty:ty) => {
|
||||
impl PartialEq<$ty> for Addr {
|
||||
fn eq(&self, other: &$ty) -> bool {
|
||||
self.0 == (*other as u64)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd<$ty> for Addr {
|
||||
fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
|
||||
let other = *other as u64;
|
||||
self.0.partial_cmp(&other)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_cmp!(usize);
|
||||
impl_cmp!(u64);
|
||||
|
||||
macro_rules! impl_from {
|
||||
($ty:ty) => {
|
||||
impl From<$ty> for Addr {
|
||||
fn from(other: $ty) -> Self {
|
||||
Addr(other as u64)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_from!(usize);
|
||||
impl_from!(u64);
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::vm::{inst::InstOp, reg::Reg, vm::*};
|
||||
use crate::vm::{inst::InstOp, reg::Reg, common::*,};
|
||||
use snafu::Snafu;
|
||||
|
||||
#[derive(Snafu, Debug, Clone)]
|
||||
@@ -9,6 +9,10 @@ pub enum VmError {
|
||||
MemOutOfBounds { addr: Addr },
|
||||
#[snafu(display("illegal instruction opcode: 0x{:04x}", op))]
|
||||
IllegalOp { op: InstOp },
|
||||
#[snafu(display("illegal destination specification: 0b{:08b}", spec))]
|
||||
IllegalDestSpec { spec: u8 },
|
||||
#[snafu(display("illegal source specification: 0b{:08b}", spec))]
|
||||
IllegalSourceSpec { spec: u8 },
|
||||
}
|
||||
|
||||
pub type Result<T, E = VmError> = std::result::Result<T, E>;
|
||||
|
||||
132
src/vm/inst.rs
132
src/vm/inst.rs
@@ -1,3 +1,5 @@
|
||||
use crate::vm::{common::Addr, reg::Reg};
|
||||
|
||||
macro_rules! instructions {
|
||||
{
|
||||
$($variant:ident = $value:expr),* $(,)?
|
||||
@@ -21,42 +23,114 @@ pub type InstOp = u16;
|
||||
|
||||
instructions! {
|
||||
ADD = 0x0000,
|
||||
MUL = 0x0001,
|
||||
DIV = 0x0002,
|
||||
MOD = 0x0003,
|
||||
INEG = 0x0004,
|
||||
SUB = 0x0001,
|
||||
MUL = 0x0002,
|
||||
DIV = 0x0003,
|
||||
MOD = 0x0004,
|
||||
AND = 0x0005,
|
||||
OR = 0x0006,
|
||||
INV = 0x0007,
|
||||
NOT = 0x0008,
|
||||
XOR = 0x0009,
|
||||
SHL = 0x000a,
|
||||
SHR = 0x000b,
|
||||
XOR = 0x0007,
|
||||
SHL = 0x0008,
|
||||
SHR = 0x0009,
|
||||
INEG = 0x000a,
|
||||
INV = 0x000b,
|
||||
NOT = 0x000c,
|
||||
CMPEQ = 0x1000,
|
||||
CMPLT = 0x1001,
|
||||
JMP = 0x1100,
|
||||
JZ = 0x1101,
|
||||
JNZ = 0x1102,
|
||||
LOAD = 0x2000,
|
||||
REGCOPY = 0x2001,
|
||||
STOREIMM64 = 0x2100,
|
||||
STOREIMM32 = 0x2101,
|
||||
MEMCOPY = 0x2200,
|
||||
STORE = 0x2201,
|
||||
JMP = 0x1002,
|
||||
JZ = 0x1003,
|
||||
JNZ = 0x1004,
|
||||
MOV = 0x2000,
|
||||
HALT = 0xF000,
|
||||
NOP = 0xF001,
|
||||
DUMP = 0xF002,
|
||||
}
|
||||
|
||||
pub fn inst_len(op: InstOp) -> usize {
|
||||
match op {
|
||||
// 2 bytes
|
||||
HALT | NOP => 2,
|
||||
// 4 bytes
|
||||
ADD | MUL | DIV | INEG | INV | NOT | MOD | AND | OR | XOR | SHL | SHR | CMPEQ | CMPLT
|
||||
| JMP | JZ | JNZ | LOAD | REGCOPY | MEMCOPY | STORE => 4,
|
||||
// Immediates - 4+ bytes
|
||||
STOREIMM64 => 16,
|
||||
STOREIMM32 => 8,
|
||||
_ => panic!("unknown instruction op 0x{:04x}", op),
|
||||
pub enum Inst {
|
||||
Add(Dest, Source),
|
||||
Sub(Dest, Source),
|
||||
Mul(Dest, Source),
|
||||
Div(Dest, Source),
|
||||
Mod(Dest, Source),
|
||||
And(Dest, Source),
|
||||
Or(Dest, Source),
|
||||
Xor(Dest, Source),
|
||||
Shl(Dest, Source),
|
||||
Shr(Dest, Source),
|
||||
INeg(Dest, Source),
|
||||
Inv(Dest, Source),
|
||||
Not(Dest, Source),
|
||||
CmpEq(Source, Source),
|
||||
CmpLt(Source, Source),
|
||||
Jmp(Source),
|
||||
Jz(Source),
|
||||
Jnz(Source),
|
||||
Mov(Dest, Source),
|
||||
Halt,
|
||||
Nop,
|
||||
Dump,
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
pub fn op(&self) -> InstOp {
|
||||
match self {
|
||||
Inst::Add(_, _) => ADD,
|
||||
Inst::Sub(_, _) => SUB,
|
||||
Inst::Mul(_, _) => MUL,
|
||||
Inst::Div(_, _) => DIV,
|
||||
Inst::Mod(_, _) => MOD,
|
||||
Inst::And(_, _) => AND,
|
||||
Inst::Or(_, _) => OR,
|
||||
Inst::Xor(_, _) => XOR,
|
||||
Inst::Shl(_, _) => SHL,
|
||||
Inst::Shr(_, _) => SHL,
|
||||
Inst::INeg(_, _) => INEG,
|
||||
Inst::Inv(_, _) => INV,
|
||||
Inst::Not(_, _) => NOT,
|
||||
Inst::CmpEq(_, _) => CMPEQ,
|
||||
Inst::CmpLt(_, _) => CMPLT,
|
||||
Inst::Jmp(_) => JMP,
|
||||
Inst::Jz(_) => JZ,
|
||||
Inst::Jnz(_) => JNZ,
|
||||
Inst::Mov(_, _) => MOV,
|
||||
Inst::Halt => HALT,
|
||||
Inst::Nop => NOP,
|
||||
Inst::Dump => DUMP,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Source {
|
||||
Addr64(Addr),
|
||||
Addr32(Addr),
|
||||
Addr16(Addr),
|
||||
Addr8(Addr),
|
||||
Reg(Reg),
|
||||
Imm(u64),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Dest {
|
||||
Addr64(Addr),
|
||||
Addr32(Addr),
|
||||
Addr16(Addr),
|
||||
Addr8(Addr),
|
||||
Reg(Reg),
|
||||
}
|
||||
|
||||
pub const DEST_ADDR64: u8 = 0b0000;
|
||||
pub const DEST_ADDR32: u8 = 0b0001;
|
||||
pub const DEST_ADDR16: u8 = 0b0010;
|
||||
pub const DEST_ADDR8: u8 = 0b0011;
|
||||
pub const DEST_REG: u8 = 0b0100;
|
||||
|
||||
pub const SOURCE_ADDR64: u8 = 0b0000;
|
||||
pub const SOURCE_ADDR32: u8 = 0b0001;
|
||||
pub const SOURCE_ADDR16: u8 = 0b0010;
|
||||
pub const SOURCE_ADDR8: u8 = 0b0011;
|
||||
pub const SOURCE_REG: u8 = 0b0100;
|
||||
pub const SOURCE_IMM64: u8 = 0b0101;
|
||||
pub const SOURCE_IMM32: u8 = 0b0110;
|
||||
pub const SOURCE_IMM16: u8 = 0b0111;
|
||||
pub const SOURCE_IMM8: u8 = 0b1000;
|
||||
|
||||
236
src/vm/mem.rs
236
src/vm/mem.rs
@@ -1,81 +1,219 @@
|
||||
use crate::vm::{error::*, reg::*};
|
||||
use byteorder::{ReadBytesExt, LE};
|
||||
use std::{
|
||||
io::Cursor,
|
||||
ops::{Deref, DerefMut},
|
||||
};
|
||||
use crate::vm::{common::*, error::*, inst::*, reg::*};
|
||||
use std::{convert::TryInto, ops::Index, mem};
|
||||
|
||||
const R1_MASK: u16 = 0b1111_1100_0000_0000;
|
||||
const R2_MASK: u16 = 0b0000_0011_1111_0000;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MemCursor<'mem> {
|
||||
cursor: Cursor<&'mem [u8]>,
|
||||
pub struct MemCursor<T> {
|
||||
pos: Addr,
|
||||
mem: T,
|
||||
}
|
||||
|
||||
impl<'mem> MemCursor<'mem> {
|
||||
pub fn new(mem: &'mem [u8]) -> Self {
|
||||
MemCursor {
|
||||
cursor: Cursor::new(mem),
|
||||
}
|
||||
impl<T> MemCursor<T>
|
||||
where T: AsRef<[u8]>
|
||||
{
|
||||
pub fn new(mem: T) -> Self {
|
||||
MemCursor { pos: Addr(0), mem }
|
||||
}
|
||||
|
||||
pub fn cursor(&self) -> &Cursor<&'mem [u8]> {
|
||||
&self.cursor
|
||||
pub fn position(&self) -> Addr {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn cursor_mut(&mut self) -> &mut Cursor<&'mem [u8]> {
|
||||
&mut self.cursor
|
||||
pub fn set_position<P: Into<Addr>>(&mut self, position: P) {
|
||||
self.pos = position.into();
|
||||
}
|
||||
|
||||
pub fn next_u8_unchecked(&mut self) -> u8 {
|
||||
let val = self[self.pos];
|
||||
self.pos += 1u64;
|
||||
val
|
||||
}
|
||||
|
||||
pub fn next_u8(&mut self) -> Result<u8> {
|
||||
self.read_u8().map_err(|_| VmError::MemOutOfBounds {
|
||||
addr: self.position(),
|
||||
})
|
||||
self.check_addr(self.pos)
|
||||
.map(|_| self.next_u8_unchecked())
|
||||
}
|
||||
|
||||
pub fn next_u16_unchecked(&mut self) -> u16 {
|
||||
let (int_bytes, _) = self.mem.as_ref()
|
||||
.split_at(mem::size_of::<u16>());
|
||||
let val = u16::from_le_bytes(int_bytes.try_into().unwrap());
|
||||
self.pos += 2u64;
|
||||
val
|
||||
}
|
||||
|
||||
pub fn next_u16(&mut self) -> Result<u16> {
|
||||
self.read_u16::<LE>().map_err(|_| VmError::MemOutOfBounds {
|
||||
addr: self.position(),
|
||||
})
|
||||
self.check_addr(self.pos)
|
||||
.map(|_| self.next_u16_unchecked())
|
||||
}
|
||||
|
||||
pub fn next_u32_unchecked(&mut self) -> u32 {
|
||||
let (int_bytes, _) = self.mem.as_ref()
|
||||
.split_at(mem::size_of::<u32>());
|
||||
let val = u32::from_le_bytes(int_bytes.try_into().unwrap());
|
||||
self.pos += 4u64;
|
||||
val
|
||||
}
|
||||
|
||||
pub fn next_u32(&mut self) -> Result<u32> {
|
||||
self.read_u32::<LE>().map_err(|_| VmError::MemOutOfBounds {
|
||||
addr: self.position(),
|
||||
})
|
||||
self.check_addr(self.pos)
|
||||
.map(|_| self.next_u32_unchecked())
|
||||
}
|
||||
|
||||
pub fn next_u64_unchecked(&mut self) -> u64 {
|
||||
let (int_bytes, _) = self.mem.as_ref()
|
||||
.split_at(mem::size_of::<u64>());
|
||||
let val = u64::from_le_bytes(int_bytes.try_into().unwrap());
|
||||
self.pos += 8u64;
|
||||
val
|
||||
}
|
||||
|
||||
pub fn next_u64(&mut self) -> Result<u64> {
|
||||
self.read_u64::<LE>().map_err(|_| VmError::MemOutOfBounds {
|
||||
addr: self.position(),
|
||||
})
|
||||
self.check_addr(self.pos)
|
||||
.map(|_| self.next_u64_unchecked())
|
||||
}
|
||||
|
||||
pub fn next_regs(&mut self) -> Result<(Reg, Reg)> {
|
||||
let next16 = self.next_u16()?;
|
||||
let r1 = ((R1_MASK & next16) >> 10) as Reg;
|
||||
let r2 = ((R2_MASK & next16) >> 4) as Reg;
|
||||
Ok((r1, r2))
|
||||
pub fn next_addr(&mut self) -> Result<Addr> {
|
||||
self.check_addr(self.pos)
|
||||
.map(|_| self.next_addr_unchecked())
|
||||
}
|
||||
|
||||
pub fn next_reg(&mut self) -> Result<Reg> {
|
||||
let next16 = self.next_u16()?;
|
||||
let r1 = ((R1_MASK & next16) >> 10) as Reg;
|
||||
Ok(r1)
|
||||
pub fn next_addr_unchecked(&mut self) -> Addr {
|
||||
Addr(self.next_u64_unchecked())
|
||||
}
|
||||
|
||||
pub fn next_inst(&mut self) -> Result<Inst> {
|
||||
let op = self.next_u16()?;
|
||||
|
||||
macro_rules! dest_source {
|
||||
($variant:ident) => {{
|
||||
let (d, s) = self.next_dest_source()?;
|
||||
Ok(Inst::$variant(d, s))
|
||||
}};
|
||||
}
|
||||
macro_rules! source_source {
|
||||
($variant:ident) => {{
|
||||
let (s1, s2) = self.next_source_source()?;
|
||||
Ok(Inst::$variant(s1, s2))
|
||||
}};
|
||||
}
|
||||
macro_rules! source {
|
||||
($variant:ident) => {{
|
||||
let spec = (self.next_u8()? & 0xF0) >> 4;
|
||||
let source = self.next_source(spec)?;
|
||||
Ok(Inst::$variant(source))
|
||||
}};
|
||||
}
|
||||
match op {
|
||||
ADD => dest_source!(Add),
|
||||
SUB => dest_source!(Sub),
|
||||
MUL => dest_source!(Mul),
|
||||
DIV => dest_source!(Div),
|
||||
MOD => dest_source!(Mod),
|
||||
AND => dest_source!(And),
|
||||
OR => dest_source!(Or),
|
||||
XOR => dest_source!(Xor),
|
||||
SHL => dest_source!(Shl),
|
||||
SHR => dest_source!(Shr),
|
||||
INEG => dest_source!(INeg),
|
||||
INV => dest_source!(Inv),
|
||||
NOT => dest_source!(Not),
|
||||
CMPEQ => source_source!(CmpEq),
|
||||
CMPLT => source_source!(CmpLt),
|
||||
JMP => source!(Jmp),
|
||||
JZ => source!(Jz),
|
||||
JNZ => source!(Jnz),
|
||||
MOV => dest_source!(Mov),
|
||||
HALT => Ok(Inst::Halt),
|
||||
NOP => Ok(Inst::Nop),
|
||||
DUMP => Ok(Inst::Dump),
|
||||
_ => Err(VmError::IllegalOp { op }),
|
||||
}
|
||||
}
|
||||
|
||||
impl<'mem> Deref for MemCursor<'mem> {
|
||||
type Target = Cursor<&'mem [u8]>;
|
||||
fn next_source_source(&mut self) -> Result<(Source, Source)> {
|
||||
let spec = self.next_u8()?;
|
||||
let s1_spec = (spec & 0xF0) >> 4;
|
||||
let s2_spec = spec & 0x0F;
|
||||
let s1 = self.next_source(s1_spec)?;
|
||||
let s2 = self.next_source(s2_spec)?;
|
||||
Ok((s1, s2))
|
||||
}
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.cursor()
|
||||
fn next_dest_source(&mut self) -> Result<(Dest, Source)> {
|
||||
let spec = self.next_u8()?;
|
||||
let dest_spec = (spec & 0xF0) >> 4;
|
||||
let source_spec = spec & 0x0F;
|
||||
let dest = self.next_dest(dest_spec)?;
|
||||
let source = self.next_source(source_spec)?;
|
||||
Ok((dest, source))
|
||||
}
|
||||
|
||||
fn next_dest(&mut self, spec: u8) -> Result<Dest> {
|
||||
match spec {
|
||||
DEST_ADDR64 => Ok(Dest::Addr64(self.next_addr()?)),
|
||||
DEST_ADDR32 => Ok(Dest::Addr32(self.next_addr()?)),
|
||||
DEST_ADDR16 => Ok(Dest::Addr16(self.next_addr()?)),
|
||||
DEST_ADDR8 => Ok(Dest::Addr8(self.next_addr()?)),
|
||||
DEST_REG => Ok(Dest::Reg(self.next_reg()?)),
|
||||
_ => Err(VmError::IllegalDestSpec { spec }),
|
||||
}
|
||||
}
|
||||
|
||||
impl<'mem> DerefMut for MemCursor<'mem> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
self.cursor_mut()
|
||||
fn next_source(&mut self, spec: u8) -> Result<Source> {
|
||||
match spec {
|
||||
SOURCE_ADDR64 => Ok(Source::Addr64(self.next_addr()?)),
|
||||
SOURCE_ADDR32 => Ok(Source::Addr32(self.next_addr()?)),
|
||||
SOURCE_ADDR16 => Ok(Source::Addr16(self.next_addr()?)),
|
||||
SOURCE_ADDR8 => Ok(Source::Addr8(self.next_addr()?)),
|
||||
SOURCE_REG => Ok(Source::Reg(self.next_reg()?)),
|
||||
SOURCE_IMM64 => Ok(Source::Imm(self.next_u64()?)),
|
||||
SOURCE_IMM32 => Ok(Source::Imm(self.next_u32()? as u64)),
|
||||
SOURCE_IMM16 => Ok(Source::Imm(self.next_u16()? as u64)),
|
||||
SOURCE_IMM8 => Ok(Source::Imm(self.next_u8()? as u64)),
|
||||
_ => Err(VmError::IllegalSourceSpec { spec }),
|
||||
}
|
||||
}
|
||||
|
||||
fn next_reg(&mut self) -> Result<Reg> {
|
||||
let reg = self.next_u8()?;
|
||||
if (reg as usize) >= NUM_REGS {
|
||||
Err(VmError::IllegalReg { reg })
|
||||
} else {
|
||||
Ok(reg)
|
||||
}
|
||||
}
|
||||
|
||||
fn check_addr(&self, addr: Addr) -> Result<()> {
|
||||
if addr > self.mem.as_ref().len() {
|
||||
Err(VmError::MemOutOfBounds { addr })
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Index impl
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
impl<T: AsRef<[u8]>> Index<usize> for MemCursor<T> {
|
||||
type Output = u8;
|
||||
|
||||
fn index(&self, addr: usize) -> &Self::Output {
|
||||
self.mem.as_ref().index(addr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]>> Index<u64> for MemCursor<T> {
|
||||
type Output = u8;
|
||||
|
||||
fn index(&self, addr: u64) -> &Self::Output {
|
||||
self.index(addr as usize)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<[u8]>> Index<Addr> for MemCursor<T> {
|
||||
type Output = u8;
|
||||
|
||||
fn index(&self, addr: Addr) -> &Self::Output {
|
||||
self.index(addr.0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
pub mod disassemble;
|
||||
pub mod common;
|
||||
pub mod error;
|
||||
pub mod flags;
|
||||
pub mod inst;
|
||||
pub mod mem;
|
||||
pub mod obj;
|
||||
pub mod reg;
|
||||
mod tick;
|
||||
pub mod visit;
|
||||
pub mod vm;
|
||||
pub mod state;
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
use snafu::Snafu;
|
||||
use std::{fmt::Debug, io};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum ParseError {
|
||||
#[snafu(display("IO error: {}", source))]
|
||||
Io { source: io::Error },
|
||||
|
||||
#[snafu(display("wrong magic number"))]
|
||||
WrongMagic,
|
||||
|
||||
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
|
||||
UnknownSectionKind { kind: u8 },
|
||||
|
||||
#[snafu(display("invalid UTF-8 string: {}", source))]
|
||||
InvalidUtf8String { source: std::string::FromUtf8Error },
|
||||
|
||||
#[snafu(display("duplicate symbol name: {}", name))]
|
||||
DuplicateName { name: String },
|
||||
|
||||
#[snafu(display("duplicate exported symbol name: {}", name))]
|
||||
DuplicateExportName { name: String },
|
||||
}
|
||||
|
||||
macro_rules! into_parse_error {
|
||||
(
|
||||
$($type:ty : $variant:ident),* $(,)?
|
||||
) => {
|
||||
$(
|
||||
impl From<$type> for ParseError {
|
||||
fn from(other: $type) -> Self {
|
||||
ParseError::$variant { source: other }
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
into_parse_error! {
|
||||
io::Error: Io,
|
||||
std::string::FromUtf8Error: InvalidUtf8String,
|
||||
}
|
||||
|
||||
pub type Result<T, E = ParseError> = std::result::Result<T, E>;
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
pub mod assemble;
|
||||
pub mod error;
|
||||
pub mod obj;
|
||||
pub mod syn;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::vm::obj::syn::error::{ParseError, Result};
|
||||
use crate::vm::obj::error::{ParseError, Result};
|
||||
use byteorder::{ReadBytesExt, LE};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
|
||||
@@ -1,165 +1,82 @@
|
||||
use crate::vm::{inst::*, reg::Reg};
|
||||
use crate::vm::reg::Reg;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SectionBlock {
|
||||
Data {
|
||||
org: SectionOrg,
|
||||
body: Vec<Line>,
|
||||
},
|
||||
Code {
|
||||
org: SectionOrg,
|
||||
body: Vec<Line>,
|
||||
},
|
||||
Meta {
|
||||
entries: Vec<(String, ImmValue)>,
|
||||
},
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SectionDef {
|
||||
Meta(MetaSection),
|
||||
Data(DataSection),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetaSection {
|
||||
pub values: Vec<MetaLine>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetaLine {
|
||||
pub name: String,
|
||||
pub value: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DataSection {
|
||||
pub name: String,
|
||||
pub org: Option<SectionOrg>,
|
||||
pub lines: Vec<DataLine>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SectionOrg {
|
||||
Start(u64),
|
||||
Range(u64, u64),
|
||||
StartEnd(u64, u64),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Line {
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DataLine {
|
||||
ValueDef(ValueDef),
|
||||
Inst(Inst),
|
||||
LabelDef(String),
|
||||
ValueDecl(ValueDecl),
|
||||
Export(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ImmValue {
|
||||
Number(u64),
|
||||
Label(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ValueDecl {
|
||||
U64(u64),
|
||||
U32(u64),
|
||||
U16(u64),
|
||||
U8(u64),
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ValueDef {
|
||||
Int(u64),
|
||||
String(String),
|
||||
ZString(String),
|
||||
}
|
||||
|
||||
impl ValueDecl {
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
ValueDecl::U64(_) => 8,
|
||||
ValueDecl::U32(_) => 4,
|
||||
ValueDecl::U16(_) => 2,
|
||||
ValueDecl::U8(_) => 1,
|
||||
ValueDecl::String(s) => s.as_bytes().len() + 8,
|
||||
ValueDecl::ZString(s) => s.as_bytes().len() + 1,
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Value {
|
||||
Int(u64),
|
||||
Reg(Reg),
|
||||
Name(String),
|
||||
Here,
|
||||
//Array(Vec<Value>),
|
||||
//Deref(Value, Size
|
||||
}
|
||||
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
let len = self.len();
|
||||
let bytes = match self {
|
||||
ValueDecl::U64(v) => v.to_le_bytes().to_vec(),
|
||||
ValueDecl::U32(v) => v.to_le_bytes()[0..4].to_vec(),
|
||||
ValueDecl::U16(v) => v.to_le_bytes()[0..2].to_vec(),
|
||||
ValueDecl::U8(v) => vec![(v & 0xff) as u8],
|
||||
ValueDecl::String(s) => {
|
||||
let mut bytes = Vec::with_capacity(self.len());
|
||||
bytes.extend(&(s.len() as u64).to_le_bytes());
|
||||
bytes.extend(s.as_bytes());
|
||||
bytes
|
||||
}
|
||||
ValueDecl::ZString(s) => {
|
||||
let mut bytes = Vec::with_capacity(self.len());
|
||||
bytes.extend(s.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes
|
||||
}
|
||||
};
|
||||
assert_eq!(bytes.len(), len);
|
||||
bytes
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Inst {
|
||||
Add(Reg, Reg),
|
||||
Mul(Reg, Reg),
|
||||
Div(Reg, Reg),
|
||||
Mod(Reg, Reg),
|
||||
INeg(Reg),
|
||||
And(Reg, Reg),
|
||||
Or(Reg, Reg),
|
||||
Inv(Reg),
|
||||
Not(Reg),
|
||||
Xor(Reg, Reg),
|
||||
Shl(Reg, Reg),
|
||||
Shr(Reg, Reg),
|
||||
|
||||
CmpEq(Reg, Reg),
|
||||
CmpLt(Reg, Reg),
|
||||
Jmp(Reg),
|
||||
Jz(Reg),
|
||||
Jnz(Reg),
|
||||
|
||||
Load(Reg, Reg),
|
||||
Store(Reg, Reg),
|
||||
StoreImm(Reg, ImmValue),
|
||||
StoreImm32(Reg, ImmValue),
|
||||
StoreImm64(Reg, ImmValue),
|
||||
MemCopy(Reg, Reg),
|
||||
RegCopy(Reg, Reg),
|
||||
|
||||
Nop,
|
||||
Add(Value, Value),
|
||||
Sub(Value, Value),
|
||||
Mul(Value, Value),
|
||||
Div(Value, Value),
|
||||
Mod(Value, Value),
|
||||
And(Value, Value),
|
||||
Or(Value, Value),
|
||||
Xor(Value, Value),
|
||||
Shl(Value, Value),
|
||||
Shr(Value, Value),
|
||||
INeg(Value, Value),
|
||||
Inv(Value, Value),
|
||||
Not(Value, Value),
|
||||
CmpEq(Value, Value),
|
||||
CmpLt(Value, Value),
|
||||
Jmp(Value),
|
||||
Jz(Value),
|
||||
Jnz(Value),
|
||||
Mov(Value, Value),
|
||||
Halt,
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
pub fn op(&self) -> InstOp {
|
||||
match self {
|
||||
Inst::Add(_, _) => ADD,
|
||||
Inst::Mul(_, _) => MUL,
|
||||
Inst::Div(_, _) => DIV,
|
||||
Inst::Mod(_, _) => MOD,
|
||||
Inst::INeg(_) => INEG,
|
||||
Inst::And(_, _) => AND,
|
||||
Inst::Or(_, _) => OR,
|
||||
Inst::Inv(_) => INV,
|
||||
Inst::Not(_) => NOT,
|
||||
Inst::Xor(_, _) => XOR,
|
||||
Inst::Shl(_, _) => SHL,
|
||||
Inst::Shr(_, _) => SHR,
|
||||
|
||||
Inst::CmpEq(_, _) => CMPEQ,
|
||||
Inst::CmpLt(_, _) => CMPLT,
|
||||
Inst::Jmp(_) => JMP,
|
||||
Inst::Jz(_) => JZ,
|
||||
Inst::Jnz(_) => JNZ,
|
||||
|
||||
Inst::Load(_, _) => LOAD,
|
||||
Inst::Store(_, _) => STORE,
|
||||
Inst::StoreImm(_, imm) => {
|
||||
if let ImmValue::Number(imm) = imm {
|
||||
if *imm > (u32::max_value() as u64) {
|
||||
STOREIMM64
|
||||
} else {
|
||||
STOREIMM32
|
||||
}
|
||||
} else {
|
||||
STOREIMM64
|
||||
}
|
||||
}
|
||||
Inst::StoreImm32(_, _) => STOREIMM32,
|
||||
Inst::StoreImm64(_, _) => STOREIMM64,
|
||||
Inst::MemCopy(_, _) => MEMCOPY,
|
||||
Inst::RegCopy(_, _) => REGCOPY,
|
||||
|
||||
Inst::Nop => NOP,
|
||||
Inst::Halt => HALT,
|
||||
}
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
inst_len(self.op())
|
||||
}
|
||||
Nop,
|
||||
Dump,
|
||||
}
|
||||
|
||||
@@ -1,44 +1,16 @@
|
||||
use snafu::Snafu;
|
||||
use std::{fmt::Debug, io};
|
||||
//use std::{fmt::Debug, io};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum ParseError {
|
||||
#[snafu(display("IO error: {}", source))]
|
||||
Io { source: io::Error },
|
||||
pub enum SyntaxError {
|
||||
//#[snafu(display("IO error: {}", source))]
|
||||
//Io { source: io::Error },
|
||||
|
||||
#[snafu(display("wrong magic number"))]
|
||||
WrongMagic,
|
||||
#[snafu(display("unexpected {}", what))]
|
||||
Unexpected { what: String },
|
||||
|
||||
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
|
||||
UnknownSectionKind { kind: u8 },
|
||||
|
||||
#[snafu(display("invalid UTF-8 string: {}", source))]
|
||||
InvalidUtf8String { source: std::string::FromUtf8Error },
|
||||
|
||||
#[snafu(display("duplicate symbol name: {}", name))]
|
||||
DuplicateName { name: String },
|
||||
|
||||
#[snafu(display("duplicate exported symbol name: {}", name))]
|
||||
DuplicateExportName { name: String },
|
||||
#[snafu(display("expected {}, but got {} instead", expected, got))]
|
||||
ExpectedGot { expected: String, got: String },
|
||||
}
|
||||
|
||||
macro_rules! into_parse_error {
|
||||
(
|
||||
$($type:ty : $variant:ident),* $(,)?
|
||||
) => {
|
||||
$(
|
||||
impl From<$type> for ParseError {
|
||||
fn from(other: $type) -> Self {
|
||||
ParseError::$variant { source: other }
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
into_parse_error! {
|
||||
io::Error: Io,
|
||||
std::string::FromUtf8Error: InvalidUtf8String,
|
||||
}
|
||||
|
||||
pub type Result<T, E = ParseError> = std::result::Result<T, E>;
|
||||
pub type Result<T, E = SyntaxError> = std::result::Result<T, E>;
|
||||
|
||||
50
src/vm/obj/syn/lexer.l
Normal file
50
src/vm/obj/syn/lexer.l
Normal file
@@ -0,0 +1,50 @@
|
||||
%%
|
||||
\$[0-9]+ "DEC_INT"
|
||||
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
|
||||
\$0[Bb][01]+ "BIN_INT"
|
||||
\.meta "DIR_META"
|
||||
\.section "DIR_SECTION"
|
||||
\.export "DIR_EXPORT"
|
||||
\{ "LBRACE"
|
||||
\} "RBRACE"
|
||||
\.\. "DOTDOT"
|
||||
: "COLON"
|
||||
, "COMMA"
|
||||
\$\$ "BUCKBUCK"
|
||||
[iu](8|16|32|64) "INT_TYPE"
|
||||
\.[iu](8|16|32|64) "INT_DEF"
|
||||
\.string "STR_DEF"
|
||||
\.zstring "ZSTR_DEF"
|
||||
"([^"]|\\[\\nt0"'])*" "STRING"
|
||||
add "ADD"
|
||||
sub "SUB"
|
||||
mul "MUL"
|
||||
div "DIV"
|
||||
mod "MOD"
|
||||
and "AND"
|
||||
or "OR"
|
||||
xor "XOR"
|
||||
shl "SHL"
|
||||
shr "SHR"
|
||||
ineg "INEG"
|
||||
inv "INV"
|
||||
not "NOT"
|
||||
cmpeq "CMPEQ"
|
||||
cmplt "CMPLT"
|
||||
jmp "JMP"
|
||||
jz "JZ"
|
||||
jnz "JNZ"
|
||||
mov "MOV"
|
||||
halt "HALT"
|
||||
nop "NOP"
|
||||
dump "DUMP"
|
||||
%ip "REG_IP"
|
||||
%sp "REG_SP"
|
||||
%fp "REG_FP"
|
||||
%flags "REG_FLAGS"
|
||||
%null "REG_NULL"
|
||||
%status "REG_STATUS"
|
||||
%r[0-9]{1,2} "REG_GENERAL"
|
||||
[a-zA-Z_][a-zA-Z0-9_]* "NAME"
|
||||
;[^\n]* ;
|
||||
[ \n\t]+ ;
|
||||
@@ -1,16 +1,16 @@
|
||||
use lalrpop_util::lalrpop_mod;
|
||||
|
||||
lalrpop_mod!(pub parser, "/vm/obj/syn/parser.rs");
|
||||
pub mod ast;
|
||||
pub mod error;
|
||||
|
||||
pub fn unescape_string(s: impl AsRef<str>) -> String {
|
||||
let s = s.as_ref();
|
||||
s.replace(r"\\", "\\")
|
||||
.replace("\\n", "\n")
|
||||
.replace("\\r", "\r")
|
||||
.replace("\\t", "\t")
|
||||
.replace("\\t", "\t")
|
||||
.replace("\\0", "\0")
|
||||
.replace("\\\"", "\"")
|
||||
use lrlex::lrlex_mod;
|
||||
use lrpar::lrpar_mod;
|
||||
|
||||
lrlex_mod!("vm/obj/syn/lexer.l");
|
||||
lrpar_mod!("vm/obj/syn/parser.y");
|
||||
|
||||
pub mod parser {
|
||||
pub use super::parser_y::*;
|
||||
}
|
||||
|
||||
pub mod lexer {
|
||||
pub use super::lexer_l::*;
|
||||
}
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
use std::str::FromStr;
|
||||
use crate::vm::{
|
||||
common::Addr,
|
||||
inst::*,
|
||||
obj::syn::{unescape_string, ast::*},
|
||||
reg::*,
|
||||
};
|
||||
|
||||
grammar;
|
||||
|
||||
LabelDef: String = {
|
||||
<Label> ":" => <>
|
||||
}
|
||||
|
||||
ImmValue: ImmValue = {
|
||||
<Label> => ImmValue::Label(<>),
|
||||
<Number> => ImmValue::Number(<>),
|
||||
}
|
||||
|
||||
Label: String = {
|
||||
r"[a-zA-Z_][a-zA-Z0-9_]*" => String::from(<>),
|
||||
}
|
||||
@@ -33,45 +26,13 @@ String: String = {
|
||||
}
|
||||
|
||||
Reg: Reg = {
|
||||
r"%ip" => IP,
|
||||
r"%sp" => SP,
|
||||
r"%fp" => FP,
|
||||
r"%flags" => FLAGS,
|
||||
r"%null" => NULL,
|
||||
r"%status" => STATUS,
|
||||
r"%r[0-9]{2}" => {
|
||||
let offset = (&<>[2..]).parse::<u8>().unwrap();
|
||||
let reg = R00 + offset;
|
||||
assert!(reg < LAST_REG, "invalid register");
|
||||
reg
|
||||
}
|
||||
}
|
||||
|
||||
Inst: Inst = {
|
||||
"add" <d:Reg> "," <s:Reg> => Inst::Add(d, s),
|
||||
"mul" <d:Reg> "," <s:Reg> => Inst::Mul(d, s),
|
||||
"div" <d:Reg> "," <s:Reg> =>Inst::Div(d, s),
|
||||
"mod" <d:Reg> "," <s:Reg> => Inst::Mod(d, s),
|
||||
"ineg" <d:Reg> => Inst::INeg(d),
|
||||
"and" <d:Reg> "," <s:Reg> => Inst::And(d, s),
|
||||
"or" <d:Reg> "," <s:Reg> => Inst::Or(d, s),
|
||||
"xor" <d:Reg> "," <s:Reg> => Inst::Xor(d, s),
|
||||
"shl" <d:Reg> "," <s:Reg> => Inst::Shl(d, s),
|
||||
"shr" <d:Reg> "," <s:Reg> => Inst::Shr(d, s),
|
||||
"cmpeq" <d:Reg> "," <s:Reg> => Inst::CmpEq(d, s),
|
||||
"cmplt" <d:Reg> "," <s:Reg> => Inst::CmpLt(d, s),
|
||||
"jmp" <d:Reg> => Inst::Jmp(d),
|
||||
"jz" <d:Reg> => Inst::Jz(d),
|
||||
"jnz" <d:Reg> => Inst::Jnz(d),
|
||||
"load" <d:Reg> "," <s:Reg> => Inst::Load(d, s),
|
||||
"store" <d:Reg> "," <s:Reg> => Inst::Store(d, s),
|
||||
"storeimm" <d:Reg> "," <s:ImmValue> => Inst::StoreImm(d, s),
|
||||
"storeimm32" <d:Reg> "," <s:ImmValue> => Inst::StoreImm32(d, s),
|
||||
"storeimm64" <d:Reg> "," <s:ImmValue> => Inst::StoreImm64(d, s),
|
||||
"memcopy" <d:Reg> "," <s:Reg> => Inst::MemCopy(d, s),
|
||||
"regcopy" <d:Reg> "," <s:Reg> => Inst::RegCopy(d, s),
|
||||
"nop" => Inst::Nop,
|
||||
"halt" => Inst::Halt,
|
||||
r"%ip" => todo!(),
|
||||
r"%sp" => todo!(),
|
||||
r"%fp" => todo!(),
|
||||
r"%flags" => todo!(),
|
||||
r"%nil" => todo!(),
|
||||
r"%status" => todo!(),
|
||||
r"%r[0-9]{1,2}" => todo!(),
|
||||
}
|
||||
|
||||
ValueDecl: ValueDecl = {
|
||||
@@ -83,34 +44,13 @@ ValueDecl: ValueDecl = {
|
||||
r"\.zstring" <String> => ValueDecl::ZString(<>),
|
||||
}
|
||||
|
||||
Line: Line = {
|
||||
<Inst> => Line::Inst(<>),
|
||||
<LabelDef> => Line::LabelDef(<>),
|
||||
<ValueDecl> => Line::ValueDecl(<>),
|
||||
r"\.export" <Label> => Line::Export(<>),
|
||||
}
|
||||
|
||||
MetaLine: (String, ImmValue) = {
|
||||
<name:Label> ":" <value:ImmValue> => (name, value),
|
||||
}
|
||||
|
||||
SectionOrg: SectionOrg = {
|
||||
<start:Number> => SectionOrg::Start(start),
|
||||
<start:Number> r"\.\." <end:Number> => SectionOrg::Range(start, end),
|
||||
}
|
||||
|
||||
Section: SectionBlock = {
|
||||
"data" <org:SectionOrg> "{" <body:Line*> "}" => {
|
||||
SectionBlock::Data { org, body }
|
||||
},
|
||||
|
||||
"code" <org:SectionOrg> "{" <body:Line*> "}" => {
|
||||
SectionBlock::Code { org, body }
|
||||
},
|
||||
|
||||
"meta" "{" <entries:MetaLine*> "}" => {
|
||||
SectionBlock::Meta { entries, }
|
||||
}
|
||||
}
|
||||
|
||||
pub Sections: Vec<SectionBlock> = {
|
||||
|
||||
204
src/vm/obj/syn/parser.y
Normal file
204
src/vm/obj/syn/parser.y
Normal file
@@ -0,0 +1,204 @@
|
||||
%start SectionDefs
|
||||
%%
|
||||
|
||||
SectionDefs -> Vec<SectionDef>:
|
||||
SectionDefs SectionDef { $1.push($2); $1 }
|
||||
| { Vec::new() }
|
||||
;
|
||||
|
||||
SectionDef -> SectionDef:
|
||||
'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { values: $2 }) }
|
||||
| 'DIR_SECTION' Name MaybeSectionOrg DataBlock {
|
||||
SectionDef::Data(DataSection {
|
||||
name: $2,
|
||||
org: $3,
|
||||
lines: $4,
|
||||
})
|
||||
}
|
||||
;
|
||||
|
||||
MetaBlock -> Vec<MetaLine>: 'LBRACE' MetaLines 'RBRACE' { $2 };
|
||||
|
||||
MetaLines -> Vec<MetaLine>:
|
||||
MetaLines MetaLine { $1.push($2); $1 }
|
||||
| { Vec::new() }
|
||||
;
|
||||
|
||||
|
||||
MetaLine -> MetaLine: Name 'COLON' Value { MetaLine { name: $1, value: $3 } };
|
||||
|
||||
MaybeSectionOrg -> Option<SectionOrg>:
|
||||
SectionOrg { Some($1) }
|
||||
| { None }
|
||||
;
|
||||
|
||||
SectionOrg -> SectionOrg:
|
||||
Int { SectionOrg::Start($1) }
|
||||
| Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) }
|
||||
;
|
||||
|
||||
DataBlock -> Vec<DataLine>: 'LBRACE' DataLines 'RBRACE' { $2 };
|
||||
|
||||
DataLines -> Vec<DataLine>:
|
||||
DataLines DataLine { $1.push($2); $1 }
|
||||
| { Vec::new() }
|
||||
;
|
||||
|
||||
DataLine -> DataLine:
|
||||
ValueDef { DataLine::ValueDef($1) }
|
||||
| Inst { DataLine::Inst($1) }
|
||||
| 'DIR_EXPORT' Name { DataLine::Export($2) }
|
||||
| Name 'COLON' { DataLine::Label($1) }
|
||||
;
|
||||
|
||||
ValueDef -> ValueDef:
|
||||
'INT_DEF' Int { ValueDef::Int($2) }
|
||||
| 'STR_DEF' String { ValueDef::String($2) }
|
||||
| 'ZSTR_DEF' String { ValueDef::ZString($2) }
|
||||
;
|
||||
|
||||
Value -> Value:
|
||||
Int { Value::Int($1) }
|
||||
| Reg { Value::Reg($1) }
|
||||
| Name { Value::Name($1) }
|
||||
| 'BUCKBUCK' { Value::Here }
|
||||
;
|
||||
|
||||
Inst -> Inst:
|
||||
'ADD' Value 'COMMA' Value { Inst::Add($2, $4) }
|
||||
| 'SUB' Value 'COMMA' Value { Inst::Sub($2, $4) }
|
||||
| 'MUL' Value 'COMMA' Value { Inst::Mul($2, $4) }
|
||||
| 'DIV' Value 'COMMA' Value { Inst::Div($2, $4) }
|
||||
| 'MOD' Value 'COMMA' Value { Inst::Mod($2, $4) }
|
||||
| 'AND' Value 'COMMA' Value { Inst::And($2, $4) }
|
||||
| 'OR' Value 'COMMA' Value { Inst::Or($2, $4) }
|
||||
| 'XOR' Value 'COMMA' Value { Inst::Xor($2, $4) }
|
||||
| 'SHL' Value 'COMMA' Value { Inst::Shl($2, $4) }
|
||||
| 'SHR' Value 'COMMA' Value { Inst::Shr($2, $4) }
|
||||
| 'INEG' Value 'COMMA' Value { Inst::INeg($2, $4) }
|
||||
| 'INV' Value 'COMMA' Value { Inst::Inv($2, $4) }
|
||||
| 'NOT' Value 'COMMA' Value { Inst::Not($2, $4) }
|
||||
| 'CMPEQ' Value 'COMMA' Value { Inst::CmpEq($2, $4) }
|
||||
| 'CMPLT' Value 'COMMA' Value { Inst::CmpLt($2, $4) }
|
||||
| 'JMP' Value { Inst::Jmp($2) }
|
||||
| 'JZ' Value { Inst::Jz($2) }
|
||||
| 'JNZ' Value { Inst::Jnz($2) }
|
||||
| 'MOV' Value 'COMMA' Value { Inst::Mov($2, $4) }
|
||||
| 'HALT' { Inst::Halt }
|
||||
| 'NOP' { Inst::Nop }
|
||||
| 'DUMP' { Inst::Dump }
|
||||
;
|
||||
|
||||
Name -> String:
|
||||
'NAME' {
|
||||
let v = $1.expect("could not parse name");
|
||||
$lexer.span_str(v.span()).to_string()
|
||||
}
|
||||
;
|
||||
|
||||
Int -> u64:
|
||||
'DEC_INT' {
|
||||
let span = $1.expect("could not parse dec_int").span();
|
||||
let s = &$lexer.span_str(span)[1..];
|
||||
s.parse().unwrap()
|
||||
}
|
||||
| 'HEX_INT' {
|
||||
let span = $1.expect("could not parse hex_int").span();
|
||||
let s = &$lexer.span_str(span)[3..];
|
||||
u64::from_str_radix(s, 16).unwrap()
|
||||
}
|
||||
| 'BIN_INT' {
|
||||
let span = $1.expect("could not parse bin_int").span();
|
||||
let s = &$lexer.span_str(span)[3..];
|
||||
u64::from_str_radix(s, 2).unwrap()
|
||||
}
|
||||
;
|
||||
|
||||
Reg -> Reg:
|
||||
'REG_IP' { IP }
|
||||
| 'REG_SP' { SP }
|
||||
| 'REG_FP' { FP }
|
||||
| 'REG_FLAGS' { FLAGS }
|
||||
| 'REG_NULL' { NULL }
|
||||
| 'REG_STATUS' { STATUS }
|
||||
| 'REG_GENERAL' {
|
||||
let v = $1.expect("could not parse reg");
|
||||
parse_reg($lexer.span_str(v.span())).unwrap()
|
||||
}
|
||||
;
|
||||
|
||||
String -> String:
|
||||
'STRING' {
|
||||
let v = $1.expect("could not parse string");
|
||||
parse_string($lexer.span_str(v.span()))
|
||||
}
|
||||
;
|
||||
%%
|
||||
|
||||
use crate::vm::{
|
||||
obj::syn::ast::*,
|
||||
reg::*,
|
||||
};
|
||||
|
||||
fn parse_string(input: &str) -> String {
|
||||
let mut s = String::new();
|
||||
let input = &input[1..input.bytes().len() - 2];
|
||||
let mut chars = input.chars();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
let next = chars.next().unwrap();
|
||||
let c = match next {
|
||||
'\\' => '\\',
|
||||
'n' => '\n',
|
||||
't' => '\t',
|
||||
'"' => '"',
|
||||
'\'' => '\'',
|
||||
'0' => '\0',
|
||||
_ => unreachable!(),
|
||||
};
|
||||
s.push(c);
|
||||
} else {
|
||||
s.push(c);
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
fn parse_reg(input: &str) -> Option<Reg> {
|
||||
use regex::Regex;
|
||||
use lazy_static::lazy_static;
|
||||
lazy_static! {
|
||||
static ref REG_RE: Regex = Regex::new(r"^%r([0-9]{1,2})$").unwrap();
|
||||
}
|
||||
let captures = REG_RE.captures(input)?;
|
||||
let reg_no: Reg = captures.get(1)?
|
||||
.as_str()
|
||||
.parse()
|
||||
.unwrap();
|
||||
let reg = R00 + reg_no;
|
||||
if reg > R31 {
|
||||
None
|
||||
} else {
|
||||
Some(reg)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::vm::reg::*;
|
||||
use super::parse_reg;
|
||||
#[test]
|
||||
fn test_parse_reg() {
|
||||
assert_eq!(parse_reg("%r00"), Some(R00));
|
||||
assert_eq!(parse_reg("%r0"), Some(R00));
|
||||
assert_eq!(parse_reg("%r1"), Some(R01));
|
||||
assert_eq!(parse_reg("%r01"), Some(R01));
|
||||
|
||||
assert_eq!(parse_reg("%r31"), Some(R31));
|
||||
assert_eq!(parse_reg("%r32"), None);
|
||||
assert_eq!(parse_reg("%r0000"), None);
|
||||
assert_eq!(parse_reg("%r9"), Some(R09));
|
||||
assert_eq!(parse_reg("%r"), None);
|
||||
assert_eq!(parse_reg("%r12"), Some(R12));
|
||||
}
|
||||
}
|
||||
@@ -20,9 +20,6 @@ macro_rules! registers {
|
||||
}
|
||||
|
||||
registers! {
|
||||
// https://crates.io/crates/packed_struct
|
||||
// TODO : check this muffugin shit out!!
|
||||
|
||||
// Instruction pointer
|
||||
IP = 0,
|
||||
|
||||
@@ -38,68 +35,42 @@ registers! {
|
||||
// Zero
|
||||
NULL = 4,
|
||||
|
||||
UNUSED01 = 5,
|
||||
UNUSED02 = 6,
|
||||
UNUSED03 = 7,
|
||||
UNUSED04 = 8,
|
||||
UNUSED05 = 9,
|
||||
UNUSED06 = 10,
|
||||
UNUSED07 = 11,
|
||||
UNUSED08 = 12,
|
||||
|
||||
// General status code
|
||||
STATUS = 13,
|
||||
STATUS = 5,
|
||||
|
||||
R00 = 14,
|
||||
R01 = 15,
|
||||
R02 = 16,
|
||||
R03 = 17,
|
||||
R04 = 18,
|
||||
R05 = 19,
|
||||
R06 = 20,
|
||||
R07 = 21,
|
||||
R08 = 22,
|
||||
R09 = 23,
|
||||
R10 = 24,
|
||||
R11 = 25,
|
||||
R12 = 26,
|
||||
R13 = 27,
|
||||
R14 = 28,
|
||||
R15 = 29,
|
||||
R16 = 30,
|
||||
R17 = 31,
|
||||
R18 = 32,
|
||||
R19 = 33,
|
||||
R20 = 34,
|
||||
R21 = 35,
|
||||
R22 = 36,
|
||||
R23 = 37,
|
||||
R24 = 38,
|
||||
R25 = 39,
|
||||
R26 = 40,
|
||||
R27 = 41,
|
||||
R28 = 42,
|
||||
R29 = 43,
|
||||
R30 = 44,
|
||||
R31 = 45,
|
||||
R32 = 46,
|
||||
R33 = 47,
|
||||
R34 = 48,
|
||||
R35 = 49,
|
||||
R36 = 50,
|
||||
R37 = 51,
|
||||
R38 = 52,
|
||||
R39 = 53,
|
||||
R40 = 54,
|
||||
R41 = 55,
|
||||
R42 = 56,
|
||||
R43 = 57,
|
||||
R44 = 58,
|
||||
R45 = 59,
|
||||
R46 = 60,
|
||||
R47 = 61,
|
||||
R48 = 62,
|
||||
R49 = 63,
|
||||
R00 = 6,
|
||||
R01 = 7,
|
||||
R02 = 8,
|
||||
R03 = 9,
|
||||
R04 = 10,
|
||||
R05 = 11,
|
||||
R06 = 12,
|
||||
R07 = 13,
|
||||
R08 = 14,
|
||||
R09 = 15,
|
||||
R10 = 16,
|
||||
R11 = 17,
|
||||
R12 = 18,
|
||||
R13 = 19,
|
||||
R14 = 20,
|
||||
R15 = 21,
|
||||
R16 = 22,
|
||||
R17 = 23,
|
||||
R18 = 24,
|
||||
R19 = 25,
|
||||
R20 = 26,
|
||||
R21 = 27,
|
||||
R22 = 28,
|
||||
R23 = 29,
|
||||
R24 = 30,
|
||||
R25 = 31,
|
||||
R26 = 32,
|
||||
R27 = 33,
|
||||
R28 = 34,
|
||||
R29 = 35,
|
||||
R30 = 36,
|
||||
R31 = 37,
|
||||
}
|
||||
|
||||
pub const LAST_REG: Reg = R49;
|
||||
pub const LAST_REG: Reg = 63;
|
||||
pub const NUM_REGS: usize = 64;
|
||||
|
||||
78
src/vm/state.rs
Normal file
78
src/vm/state.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
use crate::vm::{common::*, error::*, flags::*, mem::*, reg::*};
|
||||
|
||||
pub struct State {
|
||||
regs: [u64; NUM_REGS],
|
||||
mem: Vec<u8>,
|
||||
}
|
||||
|
||||
impl State {
|
||||
pub fn new() -> Self {
|
||||
State {
|
||||
regs: [0; NUM_REGS],
|
||||
mem: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mem_cursor(&self, addr: Addr) -> MemCursor<&[u8]> {
|
||||
let mut cursor = MemCursor::new(self.mem.as_slice());
|
||||
cursor.set_position(addr);
|
||||
cursor
|
||||
}
|
||||
|
||||
pub fn run(&mut self) -> Result<u64> {
|
||||
|
||||
Ok(self.get_reg_unchecked(STATUS))
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Registers
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
pub fn get_reg_unchecked(&self, reg: Reg) -> u64 {
|
||||
self.regs[reg as usize]
|
||||
}
|
||||
|
||||
pub fn get_reg(&self, reg: Reg) -> Result<u64> {
|
||||
if (reg as usize) >= NUM_REGS {
|
||||
Err(VmError::IllegalReg { reg })
|
||||
} else {
|
||||
Ok(self.get_reg_unchecked(reg))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_reg_unchecked(&mut self, reg: Reg, value: u64) {
|
||||
self.regs[reg as usize] = value;
|
||||
}
|
||||
|
||||
pub fn set_reg(&mut self, reg: Reg, value: u64) -> Result<()> {
|
||||
if (reg as usize) >= NUM_REGS {
|
||||
Err(VmError::IllegalReg { reg })
|
||||
} else {
|
||||
Ok(self.set_reg_unchecked(reg, value))
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
pub fn flags(&self) -> Flags {
|
||||
// this is safe because it's OK if there are random bits flipped - this shouldn't happen
|
||||
// anyway, but if it does, they're ignored
|
||||
unsafe { Flags::from_bits_unchecked(self.get_reg_unchecked(FLAGS)) }
|
||||
}
|
||||
|
||||
pub fn insert_flags(&mut self, flags: Flags) {
|
||||
let mut new_flags = self.flags();
|
||||
new_flags.insert(flags);
|
||||
self.set_flags(new_flags);
|
||||
}
|
||||
|
||||
pub fn remove_flags(&mut self, flags: Flags) {
|
||||
let mut new_flags = self.flags();
|
||||
new_flags.remove(flags);
|
||||
self.set_flags(new_flags);
|
||||
}
|
||||
|
||||
pub fn set_flags(&mut self, flags: Flags) {
|
||||
self.set_reg_unchecked(FLAGS, flags.bits());
|
||||
}
|
||||
}
|
||||
208
src/vm/vm.rs
208
src/vm/vm.rs
@@ -1,208 +0,0 @@
|
||||
use crate::vm::{error::*, flags::*, inst::InstOp, mem::*, obj::obj::*, reg::*};
|
||||
use byteorder::{WriteBytesExt, LE};
|
||||
use std::{io::Cursor, mem};
|
||||
|
||||
pub type Word = u64;
|
||||
pub type HalfWord = u32;
|
||||
pub type Registers = [Word; 64];
|
||||
pub type Addr = u64;
|
||||
|
||||
pub struct Vm {
|
||||
pub(super) mem: Vec<u8>,
|
||||
pub(super) registers: Registers,
|
||||
}
|
||||
|
||||
impl Vm {
|
||||
pub fn new() -> Self {
|
||||
Vm {
|
||||
mem: Default::default(),
|
||||
registers: [0; 64],
|
||||
}
|
||||
}
|
||||
|
||||
/// Loads an object into this VM, clearing out all previous memory and resetting the registers.
|
||||
pub fn load_object(&mut self, object: Object, max_mem: usize) -> Result<()> {
|
||||
self.registers = [0; 64];
|
||||
// determine memory spread
|
||||
let mem_size = object
|
||||
.sections
|
||||
.iter()
|
||||
.filter_map(|s| match s {
|
||||
Section::Data { end, .. } | Section::Code { end, .. } => Some(*end),
|
||||
Section::Meta { .. } => None,
|
||||
})
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
if mem_size > (max_mem as u64) {
|
||||
todo!("raise max memory error");
|
||||
}
|
||||
self.mem = vec![0; mem_size as usize];
|
||||
|
||||
let mut entry = 0;
|
||||
// write sections to memory
|
||||
for section in object.sections.into_iter() {
|
||||
match section {
|
||||
Section::Data {
|
||||
start,
|
||||
contents,
|
||||
..
|
||||
}
|
||||
| Section::Code {
|
||||
start,
|
||||
contents,
|
||||
..
|
||||
} => {
|
||||
let start = start as usize;
|
||||
for (value, dest) in contents.into_iter().zip(&mut self.mem[start..])
|
||||
{
|
||||
*dest = value;
|
||||
}
|
||||
}
|
||||
Section::Meta { entries } => {
|
||||
if let Some(e) = entries.get("entry") {
|
||||
// set the entry point
|
||||
entry = *e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.set_reg(IP, entry);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn mem_cursor(&self, at: usize) -> MemCursor {
|
||||
let mut cursor = MemCursor::new(&self.mem);
|
||||
cursor.set_position(at as u64);
|
||||
cursor
|
||||
}
|
||||
|
||||
pub fn run(&mut self) -> Result<u64> {
|
||||
while !self.is_halted() {
|
||||
self.tick()?;
|
||||
}
|
||||
Ok(self.get_reg(STATUS))
|
||||
}
|
||||
|
||||
pub fn resume(&mut self) {
|
||||
self.remove_flags(Flags::HALT);
|
||||
}
|
||||
|
||||
pub fn is_halted(&self) -> bool {
|
||||
self.flags().contains(Flags::HALT)
|
||||
}
|
||||
|
||||
pub fn get_word(&self, addr: Addr) -> Result<Word> {
|
||||
self.check_read(addr, 8)?;
|
||||
Ok(self.mem_cursor(addr as usize).next_u64().unwrap())
|
||||
}
|
||||
|
||||
pub fn get_halfword(&self, addr: Addr) -> Result<HalfWord> {
|
||||
self.check_read(addr, 4)?;
|
||||
Ok(self.mem_cursor(addr as usize).next_u32().unwrap())
|
||||
}
|
||||
|
||||
pub fn get_inst_op(&self, addr: Addr) -> Result<InstOp> {
|
||||
self.check_read(addr, 2)?;
|
||||
Ok(self.mem_cursor(addr as usize).next_u16().unwrap())
|
||||
}
|
||||
|
||||
pub fn get_byte(&self, addr: Addr) -> Result<u8> {
|
||||
self.check_addr(addr)?;
|
||||
Ok(self.mem_cursor(addr as usize).next_u8().unwrap())
|
||||
}
|
||||
|
||||
pub fn set_word(&mut self, addr: Addr, value: Word) -> Result<()> {
|
||||
self.check_read(addr, 8)?;
|
||||
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
|
||||
Ok(cursor.write_u64::<LE>(value).unwrap())
|
||||
}
|
||||
|
||||
pub fn set_halfword(&mut self, addr: Addr, value: HalfWord) -> Result<()> {
|
||||
self.check_read(addr, 4)?;
|
||||
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
|
||||
Ok(cursor.write_u32::<LE>(value).unwrap())
|
||||
}
|
||||
|
||||
pub fn set_byte(&mut self, addr: Addr, value: u8) -> Result<()> {
|
||||
self.check_addr(addr)?;
|
||||
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
|
||||
Ok(cursor.write_u8(value).unwrap())
|
||||
}
|
||||
|
||||
pub fn load(&self, reg: Reg) -> Result<Word> {
|
||||
self.get_word(self.get_reg_checked(reg)?)
|
||||
}
|
||||
|
||||
pub fn store(&mut self, reg: Reg, value: Word) -> Result<()> {
|
||||
let addr = self.get_reg_checked(reg)?;
|
||||
self.set_word(addr, value)
|
||||
}
|
||||
|
||||
pub fn get_reg_checked(&self, reg: Reg) -> Result<Word> {
|
||||
self.check_reg(reg)?;
|
||||
Ok(self.get_reg(reg))
|
||||
}
|
||||
|
||||
pub fn get_reg(&self, reg: Reg) -> Word {
|
||||
self.registers[reg as usize]
|
||||
}
|
||||
|
||||
pub fn set_reg_checked(&mut self, reg: Reg, value: Word) -> Result<Word> {
|
||||
self.check_reg(reg)?;
|
||||
Ok(self.set_reg(reg, value))
|
||||
}
|
||||
|
||||
pub fn set_reg(&mut self, reg: Reg, value: Word) -> Word {
|
||||
if reg == NULL {
|
||||
return 0;
|
||||
}
|
||||
mem::replace(&mut self.registers[reg as usize], value)
|
||||
}
|
||||
|
||||
pub fn ip(&self) -> Word {
|
||||
self.get_reg(IP)
|
||||
}
|
||||
|
||||
pub fn flags(&self) -> Flags {
|
||||
// this is safe because it's OK if there are random bits flipped - this shouldn't happen
|
||||
// anyway, but if it does, they're ignored
|
||||
unsafe { Flags::from_bits_unchecked(self.get_reg(FLAGS)) }
|
||||
}
|
||||
|
||||
pub fn insert_flags(&mut self, flags: Flags) {
|
||||
let mut new_flags = self.flags();
|
||||
new_flags.insert(flags);
|
||||
self.set_flags(new_flags);
|
||||
}
|
||||
|
||||
pub fn remove_flags(&mut self, flags: Flags) {
|
||||
let mut new_flags = self.flags();
|
||||
new_flags.remove(flags);
|
||||
self.set_flags(new_flags);
|
||||
}
|
||||
|
||||
pub fn set_flags(&mut self, flags: Flags) {
|
||||
self.set_reg(FLAGS, flags.bits());
|
||||
}
|
||||
|
||||
fn check_addr(&self, addr: Addr) -> Result<()> {
|
||||
if addr >= (self.mem.len() as u64) {
|
||||
Err(VmError::MemOutOfBounds { addr })
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn check_read(&self, addr: Addr, len: Word) -> Result<()> {
|
||||
self.check_addr(addr)
|
||||
.and_then(|_| self.check_addr(addr + len - 1))
|
||||
}
|
||||
|
||||
fn check_reg(&self, reg: Reg) -> Result<()> {
|
||||
if reg > LAST_REG {
|
||||
Err(VmError::IllegalReg { reg })
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
443
vm.md
443
vm.md
@@ -4,7 +4,7 @@ This is an outline of the VM that drives this language.
|
||||
|
||||
# Primitives
|
||||
|
||||
* Numbers may be big endian (BE) or little endian (LE) at the byte level. This guide will use LE.
|
||||
* Numbers are little endian (LE) at the byte level.
|
||||
* Addresses point to single bytes.
|
||||
* Signed numbers use two's complement.
|
||||
|
||||
@@ -23,10 +23,10 @@ CPU registers are addressed by a value between 0-63 (6 bits). All registers are
|
||||
* SP - Stack pointer
|
||||
* FP - Frame pointer
|
||||
* FLAGS - CPU flags
|
||||
* NULL - Always zero for reading and will never change after writing.
|
||||
* (8 unused registers)
|
||||
* STATUS - Generic status code
|
||||
* R0-R49
|
||||
* NIL - Always zero for reading and will never change after writing.
|
||||
* R0-R31
|
||||
* (26 unused registers)
|
||||
|
||||
## CPU Flags
|
||||
|
||||
@@ -42,14 +42,75 @@ CPU flags are addressed by bit index, going from right to left.
|
||||
* Overwriting a register without its value being used
|
||||
* Mixing arithmetic with bit twiddling on the same target
|
||||
|
||||
## Register ideas
|
||||
|
||||
* Other possible names: Z, NIL
|
||||
|
||||
# Instructions
|
||||
|
||||
Instructions attempt to be as small as possible while conforming to 8-bit, 16-bit, 32-bit, or 64-bit
|
||||
alignment. All instructions have 16-bit opcodes.
|
||||
All instructions have 16-bit opcodes. There are three types of instructions:
|
||||
|
||||
* Those whose operations require a source and a destination.
|
||||
* Those whose operations require two sources
|
||||
* The sources of these instructions is implied by the instruction itself; e.g. the `CMPEQ`
|
||||
instruction implicitly sets a bit in the `FLAGS` register.
|
||||
* Those whose operations require a source, but no destination.
|
||||
* Those whose operations require a destination, but no source.
|
||||
* There aren't any of these instructions yet
|
||||
* Those whose operations require neither a source nor a destination.
|
||||
|
||||
Destinations may be:
|
||||
|
||||
* A 64-bit address pointing at a 64-bit or 8-bit value
|
||||
* A 6-bit register
|
||||
|
||||
Sources may be one of:
|
||||
|
||||
* A 64-bit address pointing at a 64-bit or 8-bit value
|
||||
* A 6-bit register
|
||||
* A 64-bit immediate value
|
||||
|
||||
Counting all source and destination value sizes as their own configuration, there are:
|
||||
|
||||
* 3 possible destination types
|
||||
* 4 possible source types
|
||||
|
||||
Instructions have different layouts depending on whether its operation takes a source and/or
|
||||
destination. For example, the `ADD` instruction takes a source and a destination, the `JMP`
|
||||
instruction takes a source, and the `NOP` instruction takes neither a source nor a destination.
|
||||
|
||||
For instructions that take neither a source nor a destination, they are simply 16 bits long and
|
||||
that's that. All other instructions are followed by a byte determining its source and/or
|
||||
destination.
|
||||
|
||||
An instruction that has a source and destination looks like this:
|
||||
|
||||
```
|
||||
| XXXXXXXX | XXXXXXXX | DDDDSSSS | ...source and destination |
|
||||
```
|
||||
|
||||
An instruction that has either a source or a destination (but not both) looks like this:
|
||||
|
||||
```
|
||||
| XXXXXXXX | XXXXXXXX | YYYY0000 | ...source or destination |
|
||||
```
|
||||
|
||||
An instruction that has neither a source nor a destination looks like this:
|
||||
|
||||
```
|
||||
| XXXXXXXX | XXXXXXXX |
|
||||
```
|
||||
|
||||
## Source/destination flags
|
||||
|
||||
| Bits | Source/destination |
|
||||
| - | - |
|
||||
| 0b0000 | Address (64 bit value) |
|
||||
| 0b0001 | Address (32 bit value) |
|
||||
| 0b0010 | Address (16 bit value) |
|
||||
| 0b0011 | Address (8 bit value) |
|
||||
| 0b0100 | 6-bit register |
|
||||
| 0b0101 | Immediate (64 bits, source only) |
|
||||
| 0b0110 | Immediate (32 bits, source only) |
|
||||
| 0b0111 | Immediate (16 bits, source only) |
|
||||
| 0b1000 | Immediate (8 bits, source only) |
|
||||
|
||||
|
||||
## Arithmetic
|
||||
|
||||
@@ -58,160 +119,43 @@ wrapping around to 0.
|
||||
|
||||
* Add
|
||||
* Opcode: 0x0000
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 + REG2`
|
||||
* Unsigned addition
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000000000 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Mul
|
||||
* Params: Destination, source
|
||||
* Sub
|
||||
* Opcode: 0x0001
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 * REG2`
|
||||
* Unsigned multiplication
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000000001 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Div
|
||||
* Params: Destination, source
|
||||
* Mul
|
||||
* Opcode: 0x0002
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 / REG2`
|
||||
* Unsigned division
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000000010 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Mod
|
||||
* Params: Destination, source
|
||||
* Div
|
||||
* Opcode: 0x0003
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 % REG2` (exact semantics TBD)
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000000011 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* INeg
|
||||
* Params: Destination, source
|
||||
* Mod
|
||||
* Opcode: 0x0004
|
||||
* **Params**: REG1
|
||||
* `REG1 = REG1 * -1`
|
||||
* Signed negative
|
||||
* ```
|
||||
32 16 10 0
|
||||
opcode reg1 unused
|
||||
/ / /
|
||||
+----------------------------------------+
|
||||
| 0000000000000100 | ...... | XXXXXXXXXX |
|
||||
+----------------------------------------+
|
||||
```
|
||||
* Params: Destination, source
|
||||
* And
|
||||
* Opcode: 0x0005
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 & REG2`
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000000101 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Params: Destination, source
|
||||
* Or
|
||||
* Opcode: 0x0006
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 | REG2`
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000000110 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Inv
|
||||
* Opcode: 0x0007
|
||||
* **Params**: REG1
|
||||
* `REG1 = ~REG1`
|
||||
* ```
|
||||
32 16 10 0
|
||||
opcode reg1 unused
|
||||
/ / /
|
||||
+----------------------------------------+
|
||||
| 0000000000000111 | ...... | XXXXXXXXXX |
|
||||
+----------------------------------------+
|
||||
```
|
||||
* Not
|
||||
* Opcode: 0x0008
|
||||
* **Params**: REG1
|
||||
* ```
|
||||
if REG1 == 0 {
|
||||
REG1 = 0;
|
||||
} else {
|
||||
REG1 = 1;
|
||||
}
|
||||
```
|
||||
* Boolean NOT; equivalent of C's `!` unary operator
|
||||
* ```
|
||||
32 16 10 0
|
||||
opcode reg1 unused
|
||||
/ / /
|
||||
+----------------------------------------+
|
||||
| 0000000000001000 | ...... | XXXXXXXXXX |
|
||||
+----------------------------------------+
|
||||
```
|
||||
* Params: Destination, source
|
||||
* Xor
|
||||
* Opcode: 0x0009
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 ^ REG2`
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000001001 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Opcode: 0x0007
|
||||
* Params: Destination, source
|
||||
* Shl
|
||||
* Opcode: 0x000A
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 << REG2`
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000001010 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Opcode: 0x0008
|
||||
* Params: Destination, source
|
||||
* Shr
|
||||
* Opcode: 0x000B
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG1 >> REG2`
|
||||
* Does not sign extend
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0000000000001011 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Opcode: 0x0009
|
||||
* Params: Destination, source
|
||||
* INeg
|
||||
* Opcode: 0x000a
|
||||
* Params: Destination, source
|
||||
* Inv
|
||||
* Opcode: 0x000b
|
||||
* Params: Destination, source
|
||||
* Not
|
||||
* Opcode: 0x000c
|
||||
* Params: Destination, source
|
||||
|
||||
### TODO
|
||||
|
||||
@@ -223,196 +167,33 @@ wrapping around to 0.
|
||||
|
||||
* CmpEq
|
||||
* Opcode: 0x1000
|
||||
* **Params**: REG1, REG2
|
||||
* ```
|
||||
if REG1 == REG2 {
|
||||
FLAGS[1] = 1;
|
||||
} else {
|
||||
FLAGS[1] = 0;
|
||||
}
|
||||
```
|
||||
* Sets the COMPARE flag to 1 if REG1 == REG2
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0001000000000000 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Params: Source, source
|
||||
* CmpLt
|
||||
* Opcode: 0x1001
|
||||
* **Params**: REG1, REG2
|
||||
* ```
|
||||
if REG1 < REG2 {
|
||||
FLAGS[1] = 1;
|
||||
} else {
|
||||
FLAGS[1] = 0;
|
||||
}
|
||||
```
|
||||
* Sets the COMPARE flag to 1 if REG1 < REG2
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0001000000000001 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Params: Source, source
|
||||
* Jmp
|
||||
* Opcode: 0x1100
|
||||
* **Params**: REG1
|
||||
* `IP = REG1;`
|
||||
* Jumps to the address in REG1 unconditionally.
|
||||
* ```
|
||||
32 16 10 0
|
||||
opcode reg1 unused
|
||||
/ / /
|
||||
+----------------------------------------+
|
||||
| 0001000100000000 | ...... | XXXXXXXXXX |
|
||||
+----------------------------------------+
|
||||
```
|
||||
|
||||
* Jz
|
||||
* Opcode: 0x1101
|
||||
* **Params**: REG1
|
||||
* ```
|
||||
if FLAGS[1] == 0 {
|
||||
IP = REG1;
|
||||
}
|
||||
```
|
||||
* Jumps to the address in REG1 if COMPARE flag is 0.
|
||||
* ```
|
||||
32 16 10 0
|
||||
opcode reg1 unused
|
||||
/ / /
|
||||
+----------------------------------------+
|
||||
| 0001000100000001 | ...... | XXXXXXXXXX |
|
||||
+----------------------------------------+
|
||||
```
|
||||
* Jnz
|
||||
* Opcode: 0x1002
|
||||
* **Params**: REG1
|
||||
* ```
|
||||
if FLAGS[1] != 0 {
|
||||
IP = REG1;
|
||||
}
|
||||
```
|
||||
* Jumps to the address in REG1 if COMPARE flag is 1.
|
||||
* ```
|
||||
32 16 10 0
|
||||
opcode reg1 unused
|
||||
/ / /
|
||||
+----------------------------------------+
|
||||
| 0001000100000002 | ...... | XXXXXXXXXX |
|
||||
+----------------------------------------+
|
||||
```
|
||||
* Params: Source
|
||||
* Jz
|
||||
* Opcode: 0x1003
|
||||
* Params: Source
|
||||
* Jnz
|
||||
* Opcode: 0x1004
|
||||
* Params: Source
|
||||
|
||||
## Data movement
|
||||
|
||||
* Load
|
||||
* Mov
|
||||
* Opcode: 0x2000
|
||||
* **Params**: REG1, REG2
|
||||
* ```
|
||||
REG1 = MEM[REG2];
|
||||
```
|
||||
* Sets REG1 to the value at the memory address in REG2.
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0010000000000000 | ...... | ...... | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* RegCopy
|
||||
* Opcode: 0x2001
|
||||
* **Params**: REG1, REG2
|
||||
* `REG1 = REG2`
|
||||
* Copies the value in REG2 into REG1.
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0010000000000001 | REG1.. | REG2.. | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* StoreImm64
|
||||
* Opcode: 0x2100
|
||||
* **Params**: REG1, IMM_64
|
||||
* `REG1 = IMM_64`
|
||||
* Sets REG1 to the specified 64-bit number.
|
||||
* StoreImm32
|
||||
* Opcode: 0x2101
|
||||
* **Params**: REG1, IMM_32
|
||||
* `REG1 = IMM_32`
|
||||
* Sets REG1 to the specified 32-bit number.
|
||||
* ```
|
||||
64 48 42 36 32 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / / immediate 32 bit value
|
||||
/ / / / /
|
||||
+------------------------------------------------------------------------------+
|
||||
| 0010000100000001 | REG1.. | REG2.. | XXXX | IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII |
|
||||
+------------------------------------------------------------------------------+
|
||||
```
|
||||
* MemCopy
|
||||
* Opcode: 0x2200
|
||||
* **Params**: REG1, REG2
|
||||
* `MEM[REG1] = MEM[REG2]`
|
||||
* Copies the value at the memory address in REG2 to the memory address in REG1.
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0010001000000000 | REG1.. | REG2.. | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
* Store
|
||||
* Opcode: 0x2201
|
||||
* **Params**: REG1, REG2
|
||||
* ```
|
||||
MEM[REG2] = REG1;
|
||||
```
|
||||
* Sets the value at the memory address in REG2 to the value in REG1.
|
||||
* ```
|
||||
32 16 10 4 0
|
||||
opcode reg1 reg2 unused
|
||||
/ / / /
|
||||
+-------------------------------------------+
|
||||
| 0010001000000001 | REG1.. | REG2.. | XXXX |
|
||||
+-------------------------------------------+
|
||||
```
|
||||
|
||||
## Miscellaneous
|
||||
|
||||
* Halt
|
||||
* Opcode: 0xF000
|
||||
* **Params**: (none)
|
||||
* `FLAGS[0] = 1`
|
||||
* Halts the machine
|
||||
* ```
|
||||
16
|
||||
opcode
|
||||
/
|
||||
+------------------+
|
||||
| 1111000000000000 |
|
||||
+------------------+
|
||||
```
|
||||
* Nop
|
||||
* Opcode: 0xF001
|
||||
* **Params**: (none)
|
||||
* Does nothing
|
||||
* ```
|
||||
16
|
||||
opcode
|
||||
/
|
||||
+------------------+
|
||||
| 1111000000000001 |
|
||||
+------------------+
|
||||
```
|
||||
* Dump
|
||||
* Opcode: 0xF002
|
||||
|
||||
## Other instructions TODO
|
||||
|
||||
@@ -423,8 +204,6 @@ wrapping around to 0.
|
||||
* Uses FP to determine previous SP, FP, and IP and restores them
|
||||
* Push
|
||||
* Pop
|
||||
* More immediate stores?
|
||||
* Idea: Store42 (or whatever number of bits) that maximizes the usage of a 64-bit instruction
|
||||
|
||||
# Binary object format
|
||||
|
||||
@@ -435,7 +214,7 @@ the object.
|
||||
|
||||
The header is composed of:
|
||||
|
||||
* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11).
|
||||
* 64 bits - A magic number (0xDEAD\_BEA7\_BA5E\_BA11).
|
||||
* 32 bits - Version of the file
|
||||
* 32 bits - The number of sections in the file
|
||||
* section descriptions detailed below
|
||||
@@ -458,7 +237,7 @@ the section contents.
|
||||
The data section contains static data that is initialized to some known value.
|
||||
|
||||
* 64 bits - section load start - where in memory the content of this section begins
|
||||
* 64 bits - section load end - where in memory the content of this section ends
|
||||
* 64 bits - section length - how long the memory content is
|
||||
|
||||
### Code section
|
||||
|
||||
|
||||
Reference in New Issue
Block a user