Use lrpar for parsing, big 'ol syntax overhaul

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-02-17 16:15:06 -05:00
parent cf9ba376aa
commit 2c4b56e362
23 changed files with 1394 additions and 1494 deletions

823
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,9 +11,15 @@ build = "build.rs"
[dependencies]
bitflags = "1"
byteorder = "1"
lalrpop-util = "0.17.2"
regex = "*"
lazy_static = "1"
snafu = "0.6.2"
cfgrammar = "0.6"
lrlex = "0.6"
lrpar = "0.6"
regex = "*"
[build-dependencies]
lalrpop = "0.17.2"
cfgrammar = "0.6"
lrlex = "0.6"
lrpar = "0.6"

View File

@@ -1,5 +1,13 @@
use lalrpop;
use cfgrammar::yacc::YaccKind;
use lrlex::LexerBuilder;
use lrpar::{CTParserBuilder};
fn main() {
lalrpop::process_root().unwrap();
fn main() -> Result<(), Box<dyn std::error::Error>> {
let lex_rule_ids_map = CTParserBuilder::new()
.yacckind(YaccKind::Grmtools)
.process_file_in_src("vm/obj/syn/parser.y")?;
LexerBuilder::new()
.rule_ids_map(lex_rule_ids_map)
.process_file_in_src("vm/obj/syn/lexer.l")?;
Ok(())
}

View File

@@ -2,7 +2,7 @@
use std::cmp::Ordering;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
pub struct Pos {
pub source: usize,
pub line: usize,
@@ -22,23 +22,20 @@ impl Pos {
}
}
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
Pos::new(source, line, col, byte, c.len_utf8())
pub fn from_char(c: char) -> Self {
Pos::new(0, 0, 0, 0, c.len_utf8())
}
pub fn adv_char(self, c: char) -> Self {
let mut next = self;
next.byte += next.len;
next.len = c.len_utf8();
next.source += 1;
next.col += 1;
next
pub fn adv_char(&mut self, c: char) {
self.byte += self.len;
self.len = c.len_utf8();
self.source += 1;
self.col += 1;
}
pub fn adv_line(self) -> Self {
let mut next = self;
next.line += 1;
next
pub fn adv_line(&mut self) {
self.line += 1;
self.col = 0;
}
}
@@ -56,8 +53,8 @@ impl Ord for Pos {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Span {
start: Pos,
end: Pos,
pub start: Pos,
pub end: Pos,
}
impl Span {

View File

@@ -3,7 +3,7 @@
mod common;
mod vm;
use std::{convert::TryFrom, env, fs, io, process};
use std::{env, fs, io, process};
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
@@ -21,55 +21,14 @@ fn get_input_string() -> io::Result<String> {
}
fn main() -> Result<()> {
use vm::obj::syn::parser::SectionsParser;
let contents = get_input_string()?;
let ast = match SectionsParser::new().parse(&contents) {
Ok(ast) => ast,
Err(err) => {
eprintln!("{}", err);
process::exit(1);
}
};
let obj = vm::obj::obj::Object::try_from(&ast)?;
dump(&obj)?;
let mut vm = vm::vm::Vm::new();
vm.load_object(obj, 1024 * 1024 * 64)?; // 64mb
let status = vm.run()?;
println!("{}", status);
Ok(())
}
fn dump(obj: &vm::obj::obj::Object) -> Result<()> {
use vm::obj::obj::Section;
use vm::visit::VisitInst;
let mut stdout = io::stdout();
for section in &obj.sections {
match section {
Section::Data { start, contents, .. } => {
const WIDTH: usize = 4;
println!("data section at 0x{:08x}", start);
for (i, b) in contents.iter().enumerate() {
if i % WIDTH == 0 {
print!("{:08x} | ", ((*start as usize) + i));
}
print!("{:02x} ", b);
if i % WIDTH == (WIDTH - 1) {
println!();
}
}
println!();
}
Section::Code { start, contents, .. } => {
println!("code section at 0x{:08x}", start);
let mut disasm = vm::disassemble::Disassemble::new(&mut stdout, contents, *start);
while !disasm.is_done() {
disasm.visit_inst()?;
}
println!();
}
Section::Meta { .. } => continue,
};
use vm::obj::syn::{lexer, parser};
let text = get_input_string()?;
let lexerdef = lexer::lexerdef();
let lexer = lexerdef.lexer(&text);
let (res, errors) = parser::parse(&lexer);
for err in errors {
println!("{}", err.pp(&lexer, &parser::token_epp));
}
println!("{:?}", res);
Ok(())
}

73
src/vm/common.rs Normal file
View File

@@ -0,0 +1,73 @@
use std::{
cmp::Ordering,
fmt::{self, Formatter, LowerHex},
ops::{Add, AddAssign},
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Addr(pub u64);
impl LowerHex for Addr {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
let Addr(v) = self;
LowerHex::fmt(v, fmt)
}
}
impl<T> Add<T> for Addr
where
T: Add<u64, Output = u64>,
u64: Add<T, Output = u64>,
{
type Output = Addr;
fn add(self, rhs: T) -> Self::Output {
Addr(self.0 + rhs)
}
}
macro_rules! impl_add_assign {
($ty:ty) => {
impl AddAssign<$ty> for Addr {
fn add_assign(&mut self, rhs: $ty) {
self.0 = self.0 + (rhs as u64);
}
}
}
}
impl_add_assign!(usize);
impl_add_assign!(u64);
macro_rules! impl_cmp {
($ty:ty) => {
impl PartialEq<$ty> for Addr {
fn eq(&self, other: &$ty) -> bool {
self.0 == (*other as u64)
}
}
impl PartialOrd<$ty> for Addr {
fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
let other = *other as u64;
self.0.partial_cmp(&other)
}
}
}
}
impl_cmp!(usize);
impl_cmp!(u64);
macro_rules! impl_from {
($ty:ty) => {
impl From<$ty> for Addr {
fn from(other: $ty) -> Self {
Addr(other as u64)
}
}
}
}
impl_from!(usize);
impl_from!(u64);

View File

@@ -1,4 +1,4 @@
use crate::vm::{inst::InstOp, reg::Reg, vm::*};
use crate::vm::{inst::InstOp, reg::Reg, common::*,};
use snafu::Snafu;
#[derive(Snafu, Debug, Clone)]
@@ -9,6 +9,10 @@ pub enum VmError {
MemOutOfBounds { addr: Addr },
#[snafu(display("illegal instruction opcode: 0x{:04x}", op))]
IllegalOp { op: InstOp },
#[snafu(display("illegal destination specification: 0b{:08b}", spec))]
IllegalDestSpec { spec: u8 },
#[snafu(display("illegal source specification: 0b{:08b}", spec))]
IllegalSourceSpec { spec: u8 },
}
pub type Result<T, E = VmError> = std::result::Result<T, E>;

View File

@@ -1,3 +1,5 @@
use crate::vm::{common::Addr, reg::Reg};
macro_rules! instructions {
{
$($variant:ident = $value:expr),* $(,)?
@@ -21,42 +23,114 @@ pub type InstOp = u16;
instructions! {
ADD = 0x0000,
MUL = 0x0001,
DIV = 0x0002,
MOD = 0x0003,
INEG = 0x0004,
SUB = 0x0001,
MUL = 0x0002,
DIV = 0x0003,
MOD = 0x0004,
AND = 0x0005,
OR = 0x0006,
INV = 0x0007,
NOT = 0x0008,
XOR = 0x0009,
SHL = 0x000a,
SHR = 0x000b,
XOR = 0x0007,
SHL = 0x0008,
SHR = 0x0009,
INEG = 0x000a,
INV = 0x000b,
NOT = 0x000c,
CMPEQ = 0x1000,
CMPLT = 0x1001,
JMP = 0x1100,
JZ = 0x1101,
JNZ = 0x1102,
LOAD = 0x2000,
REGCOPY = 0x2001,
STOREIMM64 = 0x2100,
STOREIMM32 = 0x2101,
MEMCOPY = 0x2200,
STORE = 0x2201,
JMP = 0x1002,
JZ = 0x1003,
JNZ = 0x1004,
MOV = 0x2000,
HALT = 0xF000,
NOP = 0xF001,
DUMP = 0xF002,
}
pub fn inst_len(op: InstOp) -> usize {
match op {
// 2 bytes
HALT | NOP => 2,
// 4 bytes
ADD | MUL | DIV | INEG | INV | NOT | MOD | AND | OR | XOR | SHL | SHR | CMPEQ | CMPLT
| JMP | JZ | JNZ | LOAD | REGCOPY | MEMCOPY | STORE => 4,
// Immediates - 4+ bytes
STOREIMM64 => 16,
STOREIMM32 => 8,
_ => panic!("unknown instruction op 0x{:04x}", op),
pub enum Inst {
Add(Dest, Source),
Sub(Dest, Source),
Mul(Dest, Source),
Div(Dest, Source),
Mod(Dest, Source),
And(Dest, Source),
Or(Dest, Source),
Xor(Dest, Source),
Shl(Dest, Source),
Shr(Dest, Source),
INeg(Dest, Source),
Inv(Dest, Source),
Not(Dest, Source),
CmpEq(Source, Source),
CmpLt(Source, Source),
Jmp(Source),
Jz(Source),
Jnz(Source),
Mov(Dest, Source),
Halt,
Nop,
Dump,
}
impl Inst {
pub fn op(&self) -> InstOp {
match self {
Inst::Add(_, _) => ADD,
Inst::Sub(_, _) => SUB,
Inst::Mul(_, _) => MUL,
Inst::Div(_, _) => DIV,
Inst::Mod(_, _) => MOD,
Inst::And(_, _) => AND,
Inst::Or(_, _) => OR,
Inst::Xor(_, _) => XOR,
Inst::Shl(_, _) => SHL,
Inst::Shr(_, _) => SHL,
Inst::INeg(_, _) => INEG,
Inst::Inv(_, _) => INV,
Inst::Not(_, _) => NOT,
Inst::CmpEq(_, _) => CMPEQ,
Inst::CmpLt(_, _) => CMPLT,
Inst::Jmp(_) => JMP,
Inst::Jz(_) => JZ,
Inst::Jnz(_) => JNZ,
Inst::Mov(_, _) => MOV,
Inst::Halt => HALT,
Inst::Nop => NOP,
Inst::Dump => DUMP,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Source {
Addr64(Addr),
Addr32(Addr),
Addr16(Addr),
Addr8(Addr),
Reg(Reg),
Imm(u64),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Dest {
Addr64(Addr),
Addr32(Addr),
Addr16(Addr),
Addr8(Addr),
Reg(Reg),
}
pub const DEST_ADDR64: u8 = 0b0000;
pub const DEST_ADDR32: u8 = 0b0001;
pub const DEST_ADDR16: u8 = 0b0010;
pub const DEST_ADDR8: u8 = 0b0011;
pub const DEST_REG: u8 = 0b0100;
pub const SOURCE_ADDR64: u8 = 0b0000;
pub const SOURCE_ADDR32: u8 = 0b0001;
pub const SOURCE_ADDR16: u8 = 0b0010;
pub const SOURCE_ADDR8: u8 = 0b0011;
pub const SOURCE_REG: u8 = 0b0100;
pub const SOURCE_IMM64: u8 = 0b0101;
pub const SOURCE_IMM32: u8 = 0b0110;
pub const SOURCE_IMM16: u8 = 0b0111;
pub const SOURCE_IMM8: u8 = 0b1000;

View File

@@ -1,81 +1,219 @@
use crate::vm::{error::*, reg::*};
use byteorder::{ReadBytesExt, LE};
use std::{
io::Cursor,
ops::{Deref, DerefMut},
};
use crate::vm::{common::*, error::*, inst::*, reg::*};
use std::{convert::TryInto, ops::Index, mem};
const R1_MASK: u16 = 0b1111_1100_0000_0000;
const R2_MASK: u16 = 0b0000_0011_1111_0000;
#[derive(Debug, Clone)]
pub struct MemCursor<'mem> {
cursor: Cursor<&'mem [u8]>,
pub struct MemCursor<T> {
pos: Addr,
mem: T,
}
impl<'mem> MemCursor<'mem> {
pub fn new(mem: &'mem [u8]) -> Self {
MemCursor {
cursor: Cursor::new(mem),
}
impl<T> MemCursor<T>
where T: AsRef<[u8]>
{
pub fn new(mem: T) -> Self {
MemCursor { pos: Addr(0), mem }
}
pub fn cursor(&self) -> &Cursor<&'mem [u8]> {
&self.cursor
pub fn position(&self) -> Addr {
self.pos
}
pub fn cursor_mut(&mut self) -> &mut Cursor<&'mem [u8]> {
&mut self.cursor
pub fn set_position<P: Into<Addr>>(&mut self, position: P) {
self.pos = position.into();
}
pub fn next_u8_unchecked(&mut self) -> u8 {
let val = self[self.pos];
self.pos += 1u64;
val
}
pub fn next_u8(&mut self) -> Result<u8> {
self.read_u8().map_err(|_| VmError::MemOutOfBounds {
addr: self.position(),
})
self.check_addr(self.pos)
.map(|_| self.next_u8_unchecked())
}
pub fn next_u16_unchecked(&mut self) -> u16 {
let (int_bytes, _) = self.mem.as_ref()
.split_at(mem::size_of::<u16>());
let val = u16::from_le_bytes(int_bytes.try_into().unwrap());
self.pos += 2u64;
val
}
pub fn next_u16(&mut self) -> Result<u16> {
self.read_u16::<LE>().map_err(|_| VmError::MemOutOfBounds {
addr: self.position(),
})
self.check_addr(self.pos)
.map(|_| self.next_u16_unchecked())
}
pub fn next_u32_unchecked(&mut self) -> u32 {
let (int_bytes, _) = self.mem.as_ref()
.split_at(mem::size_of::<u32>());
let val = u32::from_le_bytes(int_bytes.try_into().unwrap());
self.pos += 4u64;
val
}
pub fn next_u32(&mut self) -> Result<u32> {
self.read_u32::<LE>().map_err(|_| VmError::MemOutOfBounds {
addr: self.position(),
})
self.check_addr(self.pos)
.map(|_| self.next_u32_unchecked())
}
pub fn next_u64_unchecked(&mut self) -> u64 {
let (int_bytes, _) = self.mem.as_ref()
.split_at(mem::size_of::<u64>());
let val = u64::from_le_bytes(int_bytes.try_into().unwrap());
self.pos += 8u64;
val
}
pub fn next_u64(&mut self) -> Result<u64> {
self.read_u64::<LE>().map_err(|_| VmError::MemOutOfBounds {
addr: self.position(),
})
self.check_addr(self.pos)
.map(|_| self.next_u64_unchecked())
}
pub fn next_regs(&mut self) -> Result<(Reg, Reg)> {
let next16 = self.next_u16()?;
let r1 = ((R1_MASK & next16) >> 10) as Reg;
let r2 = ((R2_MASK & next16) >> 4) as Reg;
Ok((r1, r2))
pub fn next_addr(&mut self) -> Result<Addr> {
self.check_addr(self.pos)
.map(|_| self.next_addr_unchecked())
}
pub fn next_reg(&mut self) -> Result<Reg> {
let next16 = self.next_u16()?;
let r1 = ((R1_MASK & next16) >> 10) as Reg;
Ok(r1)
pub fn next_addr_unchecked(&mut self) -> Addr {
Addr(self.next_u64_unchecked())
}
pub fn next_inst(&mut self) -> Result<Inst> {
let op = self.next_u16()?;
macro_rules! dest_source {
($variant:ident) => {{
let (d, s) = self.next_dest_source()?;
Ok(Inst::$variant(d, s))
}};
}
macro_rules! source_source {
($variant:ident) => {{
let (s1, s2) = self.next_source_source()?;
Ok(Inst::$variant(s1, s2))
}};
}
macro_rules! source {
($variant:ident) => {{
let spec = (self.next_u8()? & 0xF0) >> 4;
let source = self.next_source(spec)?;
Ok(Inst::$variant(source))
}};
}
match op {
ADD => dest_source!(Add),
SUB => dest_source!(Sub),
MUL => dest_source!(Mul),
DIV => dest_source!(Div),
MOD => dest_source!(Mod),
AND => dest_source!(And),
OR => dest_source!(Or),
XOR => dest_source!(Xor),
SHL => dest_source!(Shl),
SHR => dest_source!(Shr),
INEG => dest_source!(INeg),
INV => dest_source!(Inv),
NOT => dest_source!(Not),
CMPEQ => source_source!(CmpEq),
CMPLT => source_source!(CmpLt),
JMP => source!(Jmp),
JZ => source!(Jz),
JNZ => source!(Jnz),
MOV => dest_source!(Mov),
HALT => Ok(Inst::Halt),
NOP => Ok(Inst::Nop),
DUMP => Ok(Inst::Dump),
_ => Err(VmError::IllegalOp { op }),
}
}
impl<'mem> Deref for MemCursor<'mem> {
type Target = Cursor<&'mem [u8]>;
fn next_source_source(&mut self) -> Result<(Source, Source)> {
let spec = self.next_u8()?;
let s1_spec = (spec & 0xF0) >> 4;
let s2_spec = spec & 0x0F;
let s1 = self.next_source(s1_spec)?;
let s2 = self.next_source(s2_spec)?;
Ok((s1, s2))
}
fn deref(&self) -> &Self::Target {
self.cursor()
fn next_dest_source(&mut self) -> Result<(Dest, Source)> {
let spec = self.next_u8()?;
let dest_spec = (spec & 0xF0) >> 4;
let source_spec = spec & 0x0F;
let dest = self.next_dest(dest_spec)?;
let source = self.next_source(source_spec)?;
Ok((dest, source))
}
fn next_dest(&mut self, spec: u8) -> Result<Dest> {
match spec {
DEST_ADDR64 => Ok(Dest::Addr64(self.next_addr()?)),
DEST_ADDR32 => Ok(Dest::Addr32(self.next_addr()?)),
DEST_ADDR16 => Ok(Dest::Addr16(self.next_addr()?)),
DEST_ADDR8 => Ok(Dest::Addr8(self.next_addr()?)),
DEST_REG => Ok(Dest::Reg(self.next_reg()?)),
_ => Err(VmError::IllegalDestSpec { spec }),
}
}
impl<'mem> DerefMut for MemCursor<'mem> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.cursor_mut()
fn next_source(&mut self, spec: u8) -> Result<Source> {
match spec {
SOURCE_ADDR64 => Ok(Source::Addr64(self.next_addr()?)),
SOURCE_ADDR32 => Ok(Source::Addr32(self.next_addr()?)),
SOURCE_ADDR16 => Ok(Source::Addr16(self.next_addr()?)),
SOURCE_ADDR8 => Ok(Source::Addr8(self.next_addr()?)),
SOURCE_REG => Ok(Source::Reg(self.next_reg()?)),
SOURCE_IMM64 => Ok(Source::Imm(self.next_u64()?)),
SOURCE_IMM32 => Ok(Source::Imm(self.next_u32()? as u64)),
SOURCE_IMM16 => Ok(Source::Imm(self.next_u16()? as u64)),
SOURCE_IMM8 => Ok(Source::Imm(self.next_u8()? as u64)),
_ => Err(VmError::IllegalSourceSpec { spec }),
}
}
fn next_reg(&mut self) -> Result<Reg> {
let reg = self.next_u8()?;
if (reg as usize) >= NUM_REGS {
Err(VmError::IllegalReg { reg })
} else {
Ok(reg)
}
}
fn check_addr(&self, addr: Addr) -> Result<()> {
if addr > self.mem.as_ref().len() {
Err(VmError::MemOutOfBounds { addr })
} else {
Ok(())
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Index impl
////////////////////////////////////////////////////////////////////////////////
impl<T: AsRef<[u8]>> Index<usize> for MemCursor<T> {
type Output = u8;
fn index(&self, addr: usize) -> &Self::Output {
self.mem.as_ref().index(addr)
}
}
impl<T: AsRef<[u8]>> Index<u64> for MemCursor<T> {
type Output = u8;
fn index(&self, addr: u64) -> &Self::Output {
self.index(addr as usize)
}
}
impl<T: AsRef<[u8]>> Index<Addr> for MemCursor<T> {
type Output = u8;
fn index(&self, addr: Addr) -> &Self::Output {
self.index(addr.0)
}
}

View File

@@ -1,10 +1,8 @@
pub mod disassemble;
pub mod common;
pub mod error;
pub mod flags;
pub mod inst;
pub mod mem;
pub mod obj;
pub mod reg;
mod tick;
pub mod visit;
pub mod vm;
pub mod state;

View File

@@ -0,0 +1,45 @@
use snafu::Snafu;
use std::{fmt::Debug, io};
#[derive(Debug, Snafu)]
pub enum ParseError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
#[snafu(display("wrong magic number"))]
WrongMagic,
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
UnknownSectionKind { kind: u8 },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
}
macro_rules! into_parse_error {
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;

View File

@@ -1,3 +1,3 @@
pub mod assemble;
pub mod error;
pub mod obj;
pub mod syn;

View File

@@ -1,4 +1,4 @@
use crate::vm::obj::syn::error::{ParseError, Result};
use crate::vm::obj::error::{ParseError, Result};
use byteorder::{ReadBytesExt, LE};
use std::{
collections::HashMap,

View File

@@ -1,165 +1,82 @@
use crate::vm::{inst::*, reg::Reg};
use crate::vm::reg::Reg;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SectionBlock {
Data {
org: SectionOrg,
body: Vec<Line>,
},
Code {
org: SectionOrg,
body: Vec<Line>,
},
Meta {
entries: Vec<(String, ImmValue)>,
},
#[derive(Debug, Clone)]
pub enum SectionDef {
Meta(MetaSection),
Data(DataSection),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone)]
pub struct MetaSection {
pub values: Vec<MetaLine>,
}
#[derive(Debug, Clone)]
pub struct MetaLine {
pub name: String,
pub value: Value,
}
#[derive(Debug, Clone)]
pub struct DataSection {
pub name: String,
pub org: Option<SectionOrg>,
pub lines: Vec<DataLine>,
}
#[derive(Debug, Clone)]
pub enum SectionOrg {
Start(u64),
Range(u64, u64),
StartEnd(u64, u64),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Line {
#[derive(Debug, Clone)]
pub enum DataLine {
ValueDef(ValueDef),
Inst(Inst),
LabelDef(String),
ValueDecl(ValueDecl),
Export(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ImmValue {
Number(u64),
Label(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValueDecl {
U64(u64),
U32(u64),
U16(u64),
U8(u64),
#[derive(Debug, Clone)]
pub enum ValueDef {
Int(u64),
String(String),
ZString(String),
}
impl ValueDecl {
pub fn len(&self) -> usize {
match self {
ValueDecl::U64(_) => 8,
ValueDecl::U32(_) => 4,
ValueDecl::U16(_) => 2,
ValueDecl::U8(_) => 1,
ValueDecl::String(s) => s.as_bytes().len() + 8,
ValueDecl::ZString(s) => s.as_bytes().len() + 1,
}
#[derive(Debug, Clone)]
pub enum Value {
Int(u64),
Reg(Reg),
Name(String),
Here,
//Array(Vec<Value>),
//Deref(Value, Size
}
pub fn to_bytes(&self) -> Vec<u8> {
let len = self.len();
let bytes = match self {
ValueDecl::U64(v) => v.to_le_bytes().to_vec(),
ValueDecl::U32(v) => v.to_le_bytes()[0..4].to_vec(),
ValueDecl::U16(v) => v.to_le_bytes()[0..2].to_vec(),
ValueDecl::U8(v) => vec![(v & 0xff) as u8],
ValueDecl::String(s) => {
let mut bytes = Vec::with_capacity(self.len());
bytes.extend(&(s.len() as u64).to_le_bytes());
bytes.extend(s.as_bytes());
bytes
}
ValueDecl::ZString(s) => {
let mut bytes = Vec::with_capacity(self.len());
bytes.extend(s.as_bytes());
bytes.push(0);
bytes
}
};
assert_eq!(bytes.len(), len);
bytes
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone)]
pub enum Inst {
Add(Reg, Reg),
Mul(Reg, Reg),
Div(Reg, Reg),
Mod(Reg, Reg),
INeg(Reg),
And(Reg, Reg),
Or(Reg, Reg),
Inv(Reg),
Not(Reg),
Xor(Reg, Reg),
Shl(Reg, Reg),
Shr(Reg, Reg),
CmpEq(Reg, Reg),
CmpLt(Reg, Reg),
Jmp(Reg),
Jz(Reg),
Jnz(Reg),
Load(Reg, Reg),
Store(Reg, Reg),
StoreImm(Reg, ImmValue),
StoreImm32(Reg, ImmValue),
StoreImm64(Reg, ImmValue),
MemCopy(Reg, Reg),
RegCopy(Reg, Reg),
Nop,
Add(Value, Value),
Sub(Value, Value),
Mul(Value, Value),
Div(Value, Value),
Mod(Value, Value),
And(Value, Value),
Or(Value, Value),
Xor(Value, Value),
Shl(Value, Value),
Shr(Value, Value),
INeg(Value, Value),
Inv(Value, Value),
Not(Value, Value),
CmpEq(Value, Value),
CmpLt(Value, Value),
Jmp(Value),
Jz(Value),
Jnz(Value),
Mov(Value, Value),
Halt,
}
impl Inst {
pub fn op(&self) -> InstOp {
match self {
Inst::Add(_, _) => ADD,
Inst::Mul(_, _) => MUL,
Inst::Div(_, _) => DIV,
Inst::Mod(_, _) => MOD,
Inst::INeg(_) => INEG,
Inst::And(_, _) => AND,
Inst::Or(_, _) => OR,
Inst::Inv(_) => INV,
Inst::Not(_) => NOT,
Inst::Xor(_, _) => XOR,
Inst::Shl(_, _) => SHL,
Inst::Shr(_, _) => SHR,
Inst::CmpEq(_, _) => CMPEQ,
Inst::CmpLt(_, _) => CMPLT,
Inst::Jmp(_) => JMP,
Inst::Jz(_) => JZ,
Inst::Jnz(_) => JNZ,
Inst::Load(_, _) => LOAD,
Inst::Store(_, _) => STORE,
Inst::StoreImm(_, imm) => {
if let ImmValue::Number(imm) = imm {
if *imm > (u32::max_value() as u64) {
STOREIMM64
} else {
STOREIMM32
}
} else {
STOREIMM64
}
}
Inst::StoreImm32(_, _) => STOREIMM32,
Inst::StoreImm64(_, _) => STOREIMM64,
Inst::MemCopy(_, _) => MEMCOPY,
Inst::RegCopy(_, _) => REGCOPY,
Inst::Nop => NOP,
Inst::Halt => HALT,
}
}
pub fn len(&self) -> usize {
inst_len(self.op())
}
Nop,
Dump,
}

View File

@@ -1,44 +1,16 @@
use snafu::Snafu;
use std::{fmt::Debug, io};
//use std::{fmt::Debug, io};
#[derive(Debug, Snafu)]
pub enum ParseError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
pub enum SyntaxError {
//#[snafu(display("IO error: {}", source))]
//Io { source: io::Error },
#[snafu(display("wrong magic number"))]
WrongMagic,
#[snafu(display("unexpected {}", what))]
Unexpected { what: String },
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
UnknownSectionKind { kind: u8 },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
#[snafu(display("expected {}, but got {} instead", expected, got))]
ExpectedGot { expected: String, got: String },
}
macro_rules! into_parse_error {
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;
pub type Result<T, E = SyntaxError> = std::result::Result<T, E>;

50
src/vm/obj/syn/lexer.l Normal file
View File

@@ -0,0 +1,50 @@
%%
\$[0-9]+ "DEC_INT"
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
\$0[Bb][01]+ "BIN_INT"
\.meta "DIR_META"
\.section "DIR_SECTION"
\.export "DIR_EXPORT"
\{ "LBRACE"
\} "RBRACE"
\.\. "DOTDOT"
: "COLON"
, "COMMA"
\$\$ "BUCKBUCK"
[iu](8|16|32|64) "INT_TYPE"
\.[iu](8|16|32|64) "INT_DEF"
\.string "STR_DEF"
\.zstring "ZSTR_DEF"
"([^"]|\\[\\nt0"'])*" "STRING"
add "ADD"
sub "SUB"
mul "MUL"
div "DIV"
mod "MOD"
and "AND"
or "OR"
xor "XOR"
shl "SHL"
shr "SHR"
ineg "INEG"
inv "INV"
not "NOT"
cmpeq "CMPEQ"
cmplt "CMPLT"
jmp "JMP"
jz "JZ"
jnz "JNZ"
mov "MOV"
halt "HALT"
nop "NOP"
dump "DUMP"
%ip "REG_IP"
%sp "REG_SP"
%fp "REG_FP"
%flags "REG_FLAGS"
%null "REG_NULL"
%status "REG_STATUS"
%r[0-9]{1,2} "REG_GENERAL"
[a-zA-Z_][a-zA-Z0-9_]* "NAME"
;[^\n]* ;
[ \n\t]+ ;

View File

@@ -1,16 +1,16 @@
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser, "/vm/obj/syn/parser.rs");
pub mod ast;
pub mod error;
pub fn unescape_string(s: impl AsRef<str>) -> String {
let s = s.as_ref();
s.replace(r"\\", "\\")
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\t", "\t")
.replace("\\0", "\0")
.replace("\\\"", "\"")
use lrlex::lrlex_mod;
use lrpar::lrpar_mod;
lrlex_mod!("vm/obj/syn/lexer.l");
lrpar_mod!("vm/obj/syn/parser.y");
pub mod parser {
pub use super::parser_y::*;
}
pub mod lexer {
pub use super::lexer_l::*;
}

View File

@@ -1,20 +1,13 @@
use std::str::FromStr;
use crate::vm::{
common::Addr,
inst::*,
obj::syn::{unescape_string, ast::*},
reg::*,
};
grammar;
LabelDef: String = {
<Label> ":" => <>
}
ImmValue: ImmValue = {
<Label> => ImmValue::Label(<>),
<Number> => ImmValue::Number(<>),
}
Label: String = {
r"[a-zA-Z_][a-zA-Z0-9_]*" => String::from(<>),
}
@@ -33,45 +26,13 @@ String: String = {
}
Reg: Reg = {
r"%ip" => IP,
r"%sp" => SP,
r"%fp" => FP,
r"%flags" => FLAGS,
r"%null" => NULL,
r"%status" => STATUS,
r"%r[0-9]{2}" => {
let offset = (&<>[2..]).parse::<u8>().unwrap();
let reg = R00 + offset;
assert!(reg < LAST_REG, "invalid register");
reg
}
}
Inst: Inst = {
"add" <d:Reg> "," <s:Reg> => Inst::Add(d, s),
"mul" <d:Reg> "," <s:Reg> => Inst::Mul(d, s),
"div" <d:Reg> "," <s:Reg> =>Inst::Div(d, s),
"mod" <d:Reg> "," <s:Reg> => Inst::Mod(d, s),
"ineg" <d:Reg> => Inst::INeg(d),
"and" <d:Reg> "," <s:Reg> => Inst::And(d, s),
"or" <d:Reg> "," <s:Reg> => Inst::Or(d, s),
"xor" <d:Reg> "," <s:Reg> => Inst::Xor(d, s),
"shl" <d:Reg> "," <s:Reg> => Inst::Shl(d, s),
"shr" <d:Reg> "," <s:Reg> => Inst::Shr(d, s),
"cmpeq" <d:Reg> "," <s:Reg> => Inst::CmpEq(d, s),
"cmplt" <d:Reg> "," <s:Reg> => Inst::CmpLt(d, s),
"jmp" <d:Reg> => Inst::Jmp(d),
"jz" <d:Reg> => Inst::Jz(d),
"jnz" <d:Reg> => Inst::Jnz(d),
"load" <d:Reg> "," <s:Reg> => Inst::Load(d, s),
"store" <d:Reg> "," <s:Reg> => Inst::Store(d, s),
"storeimm" <d:Reg> "," <s:ImmValue> => Inst::StoreImm(d, s),
"storeimm32" <d:Reg> "," <s:ImmValue> => Inst::StoreImm32(d, s),
"storeimm64" <d:Reg> "," <s:ImmValue> => Inst::StoreImm64(d, s),
"memcopy" <d:Reg> "," <s:Reg> => Inst::MemCopy(d, s),
"regcopy" <d:Reg> "," <s:Reg> => Inst::RegCopy(d, s),
"nop" => Inst::Nop,
"halt" => Inst::Halt,
r"%ip" => todo!(),
r"%sp" => todo!(),
r"%fp" => todo!(),
r"%flags" => todo!(),
r"%nil" => todo!(),
r"%status" => todo!(),
r"%r[0-9]{1,2}" => todo!(),
}
ValueDecl: ValueDecl = {
@@ -83,34 +44,13 @@ ValueDecl: ValueDecl = {
r"\.zstring" <String> => ValueDecl::ZString(<>),
}
Line: Line = {
<Inst> => Line::Inst(<>),
<LabelDef> => Line::LabelDef(<>),
<ValueDecl> => Line::ValueDecl(<>),
r"\.export" <Label> => Line::Export(<>),
}
MetaLine: (String, ImmValue) = {
<name:Label> ":" <value:ImmValue> => (name, value),
}
SectionOrg: SectionOrg = {
<start:Number> => SectionOrg::Start(start),
<start:Number> r"\.\." <end:Number> => SectionOrg::Range(start, end),
}
Section: SectionBlock = {
"data" <org:SectionOrg> "{" <body:Line*> "}" => {
SectionBlock::Data { org, body }
},
"code" <org:SectionOrg> "{" <body:Line*> "}" => {
SectionBlock::Code { org, body }
},
"meta" "{" <entries:MetaLine*> "}" => {
SectionBlock::Meta { entries, }
}
}
pub Sections: Vec<SectionBlock> = {

204
src/vm/obj/syn/parser.y Normal file
View File

@@ -0,0 +1,204 @@
%start SectionDefs
%%
SectionDefs -> Vec<SectionDef>:
SectionDefs SectionDef { $1.push($2); $1 }
| { Vec::new() }
;
SectionDef -> SectionDef:
'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { values: $2 }) }
| 'DIR_SECTION' Name MaybeSectionOrg DataBlock {
SectionDef::Data(DataSection {
name: $2,
org: $3,
lines: $4,
})
}
;
MetaBlock -> Vec<MetaLine>: 'LBRACE' MetaLines 'RBRACE' { $2 };
MetaLines -> Vec<MetaLine>:
MetaLines MetaLine { $1.push($2); $1 }
| { Vec::new() }
;
MetaLine -> MetaLine: Name 'COLON' Value { MetaLine { name: $1, value: $3 } };
MaybeSectionOrg -> Option<SectionOrg>:
SectionOrg { Some($1) }
| { None }
;
SectionOrg -> SectionOrg:
Int { SectionOrg::Start($1) }
| Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) }
;
DataBlock -> Vec<DataLine>: 'LBRACE' DataLines 'RBRACE' { $2 };
DataLines -> Vec<DataLine>:
DataLines DataLine { $1.push($2); $1 }
| { Vec::new() }
;
DataLine -> DataLine:
ValueDef { DataLine::ValueDef($1) }
| Inst { DataLine::Inst($1) }
| 'DIR_EXPORT' Name { DataLine::Export($2) }
| Name 'COLON' { DataLine::Label($1) }
;
ValueDef -> ValueDef:
'INT_DEF' Int { ValueDef::Int($2) }
| 'STR_DEF' String { ValueDef::String($2) }
| 'ZSTR_DEF' String { ValueDef::ZString($2) }
;
Value -> Value:
Int { Value::Int($1) }
| Reg { Value::Reg($1) }
| Name { Value::Name($1) }
| 'BUCKBUCK' { Value::Here }
;
Inst -> Inst:
'ADD' Value 'COMMA' Value { Inst::Add($2, $4) }
| 'SUB' Value 'COMMA' Value { Inst::Sub($2, $4) }
| 'MUL' Value 'COMMA' Value { Inst::Mul($2, $4) }
| 'DIV' Value 'COMMA' Value { Inst::Div($2, $4) }
| 'MOD' Value 'COMMA' Value { Inst::Mod($2, $4) }
| 'AND' Value 'COMMA' Value { Inst::And($2, $4) }
| 'OR' Value 'COMMA' Value { Inst::Or($2, $4) }
| 'XOR' Value 'COMMA' Value { Inst::Xor($2, $4) }
| 'SHL' Value 'COMMA' Value { Inst::Shl($2, $4) }
| 'SHR' Value 'COMMA' Value { Inst::Shr($2, $4) }
| 'INEG' Value 'COMMA' Value { Inst::INeg($2, $4) }
| 'INV' Value 'COMMA' Value { Inst::Inv($2, $4) }
| 'NOT' Value 'COMMA' Value { Inst::Not($2, $4) }
| 'CMPEQ' Value 'COMMA' Value { Inst::CmpEq($2, $4) }
| 'CMPLT' Value 'COMMA' Value { Inst::CmpLt($2, $4) }
| 'JMP' Value { Inst::Jmp($2) }
| 'JZ' Value { Inst::Jz($2) }
| 'JNZ' Value { Inst::Jnz($2) }
| 'MOV' Value 'COMMA' Value { Inst::Mov($2, $4) }
| 'HALT' { Inst::Halt }
| 'NOP' { Inst::Nop }
| 'DUMP' { Inst::Dump }
;
Name -> String:
'NAME' {
let v = $1.expect("could not parse name");
$lexer.span_str(v.span()).to_string()
}
;
Int -> u64:
'DEC_INT' {
let span = $1.expect("could not parse dec_int").span();
let s = &$lexer.span_str(span)[1..];
s.parse().unwrap()
}
| 'HEX_INT' {
let span = $1.expect("could not parse hex_int").span();
let s = &$lexer.span_str(span)[3..];
u64::from_str_radix(s, 16).unwrap()
}
| 'BIN_INT' {
let span = $1.expect("could not parse bin_int").span();
let s = &$lexer.span_str(span)[3..];
u64::from_str_radix(s, 2).unwrap()
}
;
Reg -> Reg:
'REG_IP' { IP }
| 'REG_SP' { SP }
| 'REG_FP' { FP }
| 'REG_FLAGS' { FLAGS }
| 'REG_NULL' { NULL }
| 'REG_STATUS' { STATUS }
| 'REG_GENERAL' {
let v = $1.expect("could not parse reg");
parse_reg($lexer.span_str(v.span())).unwrap()
}
;
String -> String:
'STRING' {
let v = $1.expect("could not parse string");
parse_string($lexer.span_str(v.span()))
}
;
%%
use crate::vm::{
obj::syn::ast::*,
reg::*,
};
fn parse_string(input: &str) -> String {
let mut s = String::new();
let input = &input[1..input.bytes().len() - 2];
let mut chars = input.chars();
while let Some(c) = chars.next() {
if c == '\\' {
let next = chars.next().unwrap();
let c = match next {
'\\' => '\\',
'n' => '\n',
't' => '\t',
'"' => '"',
'\'' => '\'',
'0' => '\0',
_ => unreachable!(),
};
s.push(c);
} else {
s.push(c);
}
}
s
}
fn parse_reg(input: &str) -> Option<Reg> {
use regex::Regex;
use lazy_static::lazy_static;
lazy_static! {
static ref REG_RE: Regex = Regex::new(r"^%r([0-9]{1,2})$").unwrap();
}
let captures = REG_RE.captures(input)?;
let reg_no: Reg = captures.get(1)?
.as_str()
.parse()
.unwrap();
let reg = R00 + reg_no;
if reg > R31 {
None
} else {
Some(reg)
}
}
#[cfg(test)]
mod test {
use crate::vm::reg::*;
use super::parse_reg;
#[test]
fn test_parse_reg() {
assert_eq!(parse_reg("%r00"), Some(R00));
assert_eq!(parse_reg("%r0"), Some(R00));
assert_eq!(parse_reg("%r1"), Some(R01));
assert_eq!(parse_reg("%r01"), Some(R01));
assert_eq!(parse_reg("%r31"), Some(R31));
assert_eq!(parse_reg("%r32"), None);
assert_eq!(parse_reg("%r0000"), None);
assert_eq!(parse_reg("%r9"), Some(R09));
assert_eq!(parse_reg("%r"), None);
assert_eq!(parse_reg("%r12"), Some(R12));
}
}

View File

@@ -20,9 +20,6 @@ macro_rules! registers {
}
registers! {
// https://crates.io/crates/packed_struct
// TODO : check this muffugin shit out!!
// Instruction pointer
IP = 0,
@@ -38,68 +35,42 @@ registers! {
// Zero
NULL = 4,
UNUSED01 = 5,
UNUSED02 = 6,
UNUSED03 = 7,
UNUSED04 = 8,
UNUSED05 = 9,
UNUSED06 = 10,
UNUSED07 = 11,
UNUSED08 = 12,
// General status code
STATUS = 13,
STATUS = 5,
R00 = 14,
R01 = 15,
R02 = 16,
R03 = 17,
R04 = 18,
R05 = 19,
R06 = 20,
R07 = 21,
R08 = 22,
R09 = 23,
R10 = 24,
R11 = 25,
R12 = 26,
R13 = 27,
R14 = 28,
R15 = 29,
R16 = 30,
R17 = 31,
R18 = 32,
R19 = 33,
R20 = 34,
R21 = 35,
R22 = 36,
R23 = 37,
R24 = 38,
R25 = 39,
R26 = 40,
R27 = 41,
R28 = 42,
R29 = 43,
R30 = 44,
R31 = 45,
R32 = 46,
R33 = 47,
R34 = 48,
R35 = 49,
R36 = 50,
R37 = 51,
R38 = 52,
R39 = 53,
R40 = 54,
R41 = 55,
R42 = 56,
R43 = 57,
R44 = 58,
R45 = 59,
R46 = 60,
R47 = 61,
R48 = 62,
R49 = 63,
R00 = 6,
R01 = 7,
R02 = 8,
R03 = 9,
R04 = 10,
R05 = 11,
R06 = 12,
R07 = 13,
R08 = 14,
R09 = 15,
R10 = 16,
R11 = 17,
R12 = 18,
R13 = 19,
R14 = 20,
R15 = 21,
R16 = 22,
R17 = 23,
R18 = 24,
R19 = 25,
R20 = 26,
R21 = 27,
R22 = 28,
R23 = 29,
R24 = 30,
R25 = 31,
R26 = 32,
R27 = 33,
R28 = 34,
R29 = 35,
R30 = 36,
R31 = 37,
}
pub const LAST_REG: Reg = R49;
pub const LAST_REG: Reg = 63;
pub const NUM_REGS: usize = 64;

78
src/vm/state.rs Normal file
View File

@@ -0,0 +1,78 @@
use crate::vm::{common::*, error::*, flags::*, mem::*, reg::*};
pub struct State {
regs: [u64; NUM_REGS],
mem: Vec<u8>,
}
impl State {
pub fn new() -> Self {
State {
regs: [0; NUM_REGS],
mem: Default::default(),
}
}
pub fn mem_cursor(&self, addr: Addr) -> MemCursor<&[u8]> {
let mut cursor = MemCursor::new(self.mem.as_slice());
cursor.set_position(addr);
cursor
}
pub fn run(&mut self) -> Result<u64> {
Ok(self.get_reg_unchecked(STATUS))
}
////////////////////////////////////////////////////////////////////////////////
// Registers
////////////////////////////////////////////////////////////////////////////////
pub fn get_reg_unchecked(&self, reg: Reg) -> u64 {
self.regs[reg as usize]
}
pub fn get_reg(&self, reg: Reg) -> Result<u64> {
if (reg as usize) >= NUM_REGS {
Err(VmError::IllegalReg { reg })
} else {
Ok(self.get_reg_unchecked(reg))
}
}
pub fn set_reg_unchecked(&mut self, reg: Reg, value: u64) {
self.regs[reg as usize] = value;
}
pub fn set_reg(&mut self, reg: Reg, value: u64) -> Result<()> {
if (reg as usize) >= NUM_REGS {
Err(VmError::IllegalReg { reg })
} else {
Ok(self.set_reg_unchecked(reg, value))
}
}
////////////////////////////////////////////////////////////////////////////////
// Flags
////////////////////////////////////////////////////////////////////////////////
pub fn flags(&self) -> Flags {
// this is safe because it's OK if there are random bits flipped - this shouldn't happen
// anyway, but if it does, they're ignored
unsafe { Flags::from_bits_unchecked(self.get_reg_unchecked(FLAGS)) }
}
pub fn insert_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.insert(flags);
self.set_flags(new_flags);
}
pub fn remove_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.remove(flags);
self.set_flags(new_flags);
}
pub fn set_flags(&mut self, flags: Flags) {
self.set_reg_unchecked(FLAGS, flags.bits());
}
}

View File

@@ -1,208 +0,0 @@
use crate::vm::{error::*, flags::*, inst::InstOp, mem::*, obj::obj::*, reg::*};
use byteorder::{WriteBytesExt, LE};
use std::{io::Cursor, mem};
pub type Word = u64;
pub type HalfWord = u32;
pub type Registers = [Word; 64];
pub type Addr = u64;
pub struct Vm {
pub(super) mem: Vec<u8>,
pub(super) registers: Registers,
}
impl Vm {
pub fn new() -> Self {
Vm {
mem: Default::default(),
registers: [0; 64],
}
}
/// Loads an object into this VM, clearing out all previous memory and resetting the registers.
pub fn load_object(&mut self, object: Object, max_mem: usize) -> Result<()> {
self.registers = [0; 64];
// determine memory spread
let mem_size = object
.sections
.iter()
.filter_map(|s| match s {
Section::Data { end, .. } | Section::Code { end, .. } => Some(*end),
Section::Meta { .. } => None,
})
.max()
.unwrap_or(0);
if mem_size > (max_mem as u64) {
todo!("raise max memory error");
}
self.mem = vec![0; mem_size as usize];
let mut entry = 0;
// write sections to memory
for section in object.sections.into_iter() {
match section {
Section::Data {
start,
contents,
..
}
| Section::Code {
start,
contents,
..
} => {
let start = start as usize;
for (value, dest) in contents.into_iter().zip(&mut self.mem[start..])
{
*dest = value;
}
}
Section::Meta { entries } => {
if let Some(e) = entries.get("entry") {
// set the entry point
entry = *e;
}
}
}
}
self.set_reg(IP, entry);
Ok(())
}
pub fn mem_cursor(&self, at: usize) -> MemCursor {
let mut cursor = MemCursor::new(&self.mem);
cursor.set_position(at as u64);
cursor
}
pub fn run(&mut self) -> Result<u64> {
while !self.is_halted() {
self.tick()?;
}
Ok(self.get_reg(STATUS))
}
pub fn resume(&mut self) {
self.remove_flags(Flags::HALT);
}
pub fn is_halted(&self) -> bool {
self.flags().contains(Flags::HALT)
}
pub fn get_word(&self, addr: Addr) -> Result<Word> {
self.check_read(addr, 8)?;
Ok(self.mem_cursor(addr as usize).next_u64().unwrap())
}
pub fn get_halfword(&self, addr: Addr) -> Result<HalfWord> {
self.check_read(addr, 4)?;
Ok(self.mem_cursor(addr as usize).next_u32().unwrap())
}
pub fn get_inst_op(&self, addr: Addr) -> Result<InstOp> {
self.check_read(addr, 2)?;
Ok(self.mem_cursor(addr as usize).next_u16().unwrap())
}
pub fn get_byte(&self, addr: Addr) -> Result<u8> {
self.check_addr(addr)?;
Ok(self.mem_cursor(addr as usize).next_u8().unwrap())
}
pub fn set_word(&mut self, addr: Addr, value: Word) -> Result<()> {
self.check_read(addr, 8)?;
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
Ok(cursor.write_u64::<LE>(value).unwrap())
}
pub fn set_halfword(&mut self, addr: Addr, value: HalfWord) -> Result<()> {
self.check_read(addr, 4)?;
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
Ok(cursor.write_u32::<LE>(value).unwrap())
}
pub fn set_byte(&mut self, addr: Addr, value: u8) -> Result<()> {
self.check_addr(addr)?;
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
Ok(cursor.write_u8(value).unwrap())
}
pub fn load(&self, reg: Reg) -> Result<Word> {
self.get_word(self.get_reg_checked(reg)?)
}
pub fn store(&mut self, reg: Reg, value: Word) -> Result<()> {
let addr = self.get_reg_checked(reg)?;
self.set_word(addr, value)
}
pub fn get_reg_checked(&self, reg: Reg) -> Result<Word> {
self.check_reg(reg)?;
Ok(self.get_reg(reg))
}
pub fn get_reg(&self, reg: Reg) -> Word {
self.registers[reg as usize]
}
pub fn set_reg_checked(&mut self, reg: Reg, value: Word) -> Result<Word> {
self.check_reg(reg)?;
Ok(self.set_reg(reg, value))
}
pub fn set_reg(&mut self, reg: Reg, value: Word) -> Word {
if reg == NULL {
return 0;
}
mem::replace(&mut self.registers[reg as usize], value)
}
pub fn ip(&self) -> Word {
self.get_reg(IP)
}
pub fn flags(&self) -> Flags {
// this is safe because it's OK if there are random bits flipped - this shouldn't happen
// anyway, but if it does, they're ignored
unsafe { Flags::from_bits_unchecked(self.get_reg(FLAGS)) }
}
pub fn insert_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.insert(flags);
self.set_flags(new_flags);
}
pub fn remove_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.remove(flags);
self.set_flags(new_flags);
}
pub fn set_flags(&mut self, flags: Flags) {
self.set_reg(FLAGS, flags.bits());
}
fn check_addr(&self, addr: Addr) -> Result<()> {
if addr >= (self.mem.len() as u64) {
Err(VmError::MemOutOfBounds { addr })
} else {
Ok(())
}
}
fn check_read(&self, addr: Addr, len: Word) -> Result<()> {
self.check_addr(addr)
.and_then(|_| self.check_addr(addr + len - 1))
}
fn check_reg(&self, reg: Reg) -> Result<()> {
if reg > LAST_REG {
Err(VmError::IllegalReg { reg })
} else {
Ok(())
}
}
}

443
vm.md
View File

@@ -4,7 +4,7 @@ This is an outline of the VM that drives this language.
# Primitives
* Numbers may be big endian (BE) or little endian (LE) at the byte level. This guide will use LE.
* Numbers are little endian (LE) at the byte level.
* Addresses point to single bytes.
* Signed numbers use two's complement.
@@ -23,10 +23,10 @@ CPU registers are addressed by a value between 0-63 (6 bits). All registers are
* SP - Stack pointer
* FP - Frame pointer
* FLAGS - CPU flags
* NULL - Always zero for reading and will never change after writing.
* (8 unused registers)
* STATUS - Generic status code
* R0-R49
* NIL - Always zero for reading and will never change after writing.
* R0-R31
* (26 unused registers)
## CPU Flags
@@ -42,14 +42,75 @@ CPU flags are addressed by bit index, going from right to left.
* Overwriting a register without its value being used
* Mixing arithmetic with bit twiddling on the same target
## Register ideas
* Other possible names: Z, NIL
# Instructions
Instructions attempt to be as small as possible while conforming to 8-bit, 16-bit, 32-bit, or 64-bit
alignment. All instructions have 16-bit opcodes.
All instructions have 16-bit opcodes. There are three types of instructions:
* Those whose operations require a source and a destination.
* Those whose operations require two sources
* The sources of these instructions is implied by the instruction itself; e.g. the `CMPEQ`
instruction implicitly sets a bit in the `FLAGS` register.
* Those whose operations require a source, but no destination.
* Those whose operations require a destination, but no source.
* There aren't any of these instructions yet
* Those whose operations require neither a source nor a destination.
Destinations may be:
* A 64-bit address pointing at a 64-bit or 8-bit value
* A 6-bit register
Sources may be one of:
* A 64-bit address pointing at a 64-bit or 8-bit value
* A 6-bit register
* A 64-bit immediate value
Counting all source and destination value sizes as their own configuration, there are:
* 3 possible destination types
* 4 possible source types
Instructions have different layouts depending on whether its operation takes a source and/or
destination. For example, the `ADD` instruction takes a source and a destination, the `JMP`
instruction takes a source, and the `NOP` instruction takes neither a source nor a destination.
For instructions that take neither a source nor a destination, they are simply 16 bits long and
that's that. All other instructions are followed by a byte determining its source and/or
destination.
An instruction that has a source and destination looks like this:
```
| XXXXXXXX | XXXXXXXX | DDDDSSSS | ...source and destination |
```
An instruction that has either a source or a destination (but not both) looks like this:
```
| XXXXXXXX | XXXXXXXX | YYYY0000 | ...source or destination |
```
An instruction that has neither a source nor a destination looks like this:
```
| XXXXXXXX | XXXXXXXX |
```
## Source/destination flags
| Bits | Source/destination |
| - | - |
| 0b0000 | Address (64 bit value) |
| 0b0001 | Address (32 bit value) |
| 0b0010 | Address (16 bit value) |
| 0b0011 | Address (8 bit value) |
| 0b0100 | 6-bit register |
| 0b0101 | Immediate (64 bits, source only) |
| 0b0110 | Immediate (32 bits, source only) |
| 0b0111 | Immediate (16 bits, source only) |
| 0b1000 | Immediate (8 bits, source only) |
## Arithmetic
@@ -58,160 +119,43 @@ wrapping around to 0.
* Add
* Opcode: 0x0000
* **Params**: REG1, REG2
* `REG1 = REG1 + REG2`
* Unsigned addition
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000000 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Mul
* Params: Destination, source
* Sub
* Opcode: 0x0001
* **Params**: REG1, REG2
* `REG1 = REG1 * REG2`
* Unsigned multiplication
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000001 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Div
* Params: Destination, source
* Mul
* Opcode: 0x0002
* **Params**: REG1, REG2
* `REG1 = REG1 / REG2`
* Unsigned division
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000010 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Mod
* Params: Destination, source
* Div
* Opcode: 0x0003
* **Params**: REG1, REG2
* `REG1 = REG1 % REG2` (exact semantics TBD)
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000011 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* INeg
* Params: Destination, source
* Mod
* Opcode: 0x0004
* **Params**: REG1
* `REG1 = REG1 * -1`
* Signed negative
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0000000000000100 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Params: Destination, source
* And
* Opcode: 0x0005
* **Params**: REG1, REG2
* `REG1 = REG1 & REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000101 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Params: Destination, source
* Or
* Opcode: 0x0006
* **Params**: REG1, REG2
* `REG1 = REG1 | REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000110 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Inv
* Opcode: 0x0007
* **Params**: REG1
* `REG1 = ~REG1`
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0000000000000111 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Not
* Opcode: 0x0008
* **Params**: REG1
* ```
if REG1 == 0 {
REG1 = 0;
} else {
REG1 = 1;
}
```
* Boolean NOT; equivalent of C's `!` unary operator
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0000000000001000 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Params: Destination, source
* Xor
* Opcode: 0x0009
* **Params**: REG1, REG2
* `REG1 = REG1 ^ REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000001001 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Opcode: 0x0007
* Params: Destination, source
* Shl
* Opcode: 0x000A
* **Params**: REG1, REG2
* `REG1 = REG1 << REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000001010 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Opcode: 0x0008
* Params: Destination, source
* Shr
* Opcode: 0x000B
* **Params**: REG1, REG2
* `REG1 = REG1 >> REG2`
* Does not sign extend
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000001011 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Opcode: 0x0009
* Params: Destination, source
* INeg
* Opcode: 0x000a
* Params: Destination, source
* Inv
* Opcode: 0x000b
* Params: Destination, source
* Not
* Opcode: 0x000c
* Params: Destination, source
### TODO
@@ -223,196 +167,33 @@ wrapping around to 0.
* CmpEq
* Opcode: 0x1000
* **Params**: REG1, REG2
* ```
if REG1 == REG2 {
FLAGS[1] = 1;
} else {
FLAGS[1] = 0;
}
```
* Sets the COMPARE flag to 1 if REG1 == REG2
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0001000000000000 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Params: Source, source
* CmpLt
* Opcode: 0x1001
* **Params**: REG1, REG2
* ```
if REG1 < REG2 {
FLAGS[1] = 1;
} else {
FLAGS[1] = 0;
}
```
* Sets the COMPARE flag to 1 if REG1 < REG2
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0001000000000001 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Params: Source, source
* Jmp
* Opcode: 0x1100
* **Params**: REG1
* `IP = REG1;`
* Jumps to the address in REG1 unconditionally.
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0001000100000000 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Jz
* Opcode: 0x1101
* **Params**: REG1
* ```
if FLAGS[1] == 0 {
IP = REG1;
}
```
* Jumps to the address in REG1 if COMPARE flag is 0.
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0001000100000001 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Jnz
* Opcode: 0x1002
* **Params**: REG1
* ```
if FLAGS[1] != 0 {
IP = REG1;
}
```
* Jumps to the address in REG1 if COMPARE flag is 1.
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0001000100000002 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Params: Source
* Jz
* Opcode: 0x1003
* Params: Source
* Jnz
* Opcode: 0x1004
* Params: Source
## Data movement
* Load
* Mov
* Opcode: 0x2000
* **Params**: REG1, REG2
* ```
REG1 = MEM[REG2];
```
* Sets REG1 to the value at the memory address in REG2.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010000000000000 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* RegCopy
* Opcode: 0x2001
* **Params**: REG1, REG2
* `REG1 = REG2`
* Copies the value in REG2 into REG1.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010000000000001 | REG1.. | REG2.. | XXXX |
+-------------------------------------------+
```
* StoreImm64
* Opcode: 0x2100
* **Params**: REG1, IMM_64
* `REG1 = IMM_64`
* Sets REG1 to the specified 64-bit number.
* StoreImm32
* Opcode: 0x2101
* **Params**: REG1, IMM_32
* `REG1 = IMM_32`
* Sets REG1 to the specified 32-bit number.
* ```
64 48 42 36 32 0
opcode reg1 reg2 unused
/ / / / immediate 32 bit value
/ / / / /
+------------------------------------------------------------------------------+
| 0010000100000001 | REG1.. | REG2.. | XXXX | IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII |
+------------------------------------------------------------------------------+
```
* MemCopy
* Opcode: 0x2200
* **Params**: REG1, REG2
* `MEM[REG1] = MEM[REG2]`
* Copies the value at the memory address in REG2 to the memory address in REG1.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010001000000000 | REG1.. | REG2.. | XXXX |
+-------------------------------------------+
```
* Store
* Opcode: 0x2201
* **Params**: REG1, REG2
* ```
MEM[REG2] = REG1;
```
* Sets the value at the memory address in REG2 to the value in REG1.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010001000000001 | REG1.. | REG2.. | XXXX |
+-------------------------------------------+
```
## Miscellaneous
* Halt
* Opcode: 0xF000
* **Params**: (none)
* `FLAGS[0] = 1`
* Halts the machine
* ```
16
opcode
/
+------------------+
| 1111000000000000 |
+------------------+
```
* Nop
* Opcode: 0xF001
* **Params**: (none)
* Does nothing
* ```
16
opcode
/
+------------------+
| 1111000000000001 |
+------------------+
```
* Dump
* Opcode: 0xF002
## Other instructions TODO
@@ -423,8 +204,6 @@ wrapping around to 0.
* Uses FP to determine previous SP, FP, and IP and restores them
* Push
* Pop
* More immediate stores?
* Idea: Store42 (or whatever number of bits) that maximizes the usage of a 64-bit instruction
# Binary object format
@@ -435,7 +214,7 @@ the object.
The header is composed of:
* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11).
* 64 bits - A magic number (0xDEAD\_BEA7\_BA5E\_BA11).
* 32 bits - Version of the file
* 32 bits - The number of sections in the file
* section descriptions detailed below
@@ -458,7 +237,7 @@ the section contents.
The data section contains static data that is initialized to some known value.
* 64 bits - section load start - where in memory the content of this section begins
* 64 bits - section load end - where in memory the content of this section ends
* 64 bits - section length - how long the memory content is
### Code section