Use lrpar for parsing, big 'ol syntax overhaul

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-02-17 16:15:06 -05:00
parent cf9ba376aa
commit 2c4b56e362
23 changed files with 1394 additions and 1494 deletions

823
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,9 +11,15 @@ build = "build.rs"
[dependencies] [dependencies]
bitflags = "1" bitflags = "1"
byteorder = "1" byteorder = "1"
lalrpop-util = "0.17.2" lazy_static = "1"
regex = "*"
snafu = "0.6.2" snafu = "0.6.2"
cfgrammar = "0.6"
lrlex = "0.6"
lrpar = "0.6"
regex = "*"
[build-dependencies] [build-dependencies]
lalrpop = "0.17.2" cfgrammar = "0.6"
lrlex = "0.6"
lrpar = "0.6"

View File

@@ -1,5 +1,13 @@
use lalrpop; use cfgrammar::yacc::YaccKind;
use lrlex::LexerBuilder;
use lrpar::{CTParserBuilder};
fn main() { fn main() -> Result<(), Box<dyn std::error::Error>> {
lalrpop::process_root().unwrap(); let lex_rule_ids_map = CTParserBuilder::new()
.yacckind(YaccKind::Grmtools)
.process_file_in_src("vm/obj/syn/parser.y")?;
LexerBuilder::new()
.rule_ids_map(lex_rule_ids_map)
.process_file_in_src("vm/obj/syn/lexer.l")?;
Ok(())
} }

View File

@@ -2,7 +2,7 @@
use std::cmp::Ordering; use std::cmp::Ordering;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
pub struct Pos { pub struct Pos {
pub source: usize, pub source: usize,
pub line: usize, pub line: usize,
@@ -22,23 +22,20 @@ impl Pos {
} }
} }
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self { pub fn from_char(c: char) -> Self {
Pos::new(source, line, col, byte, c.len_utf8()) Pos::new(0, 0, 0, 0, c.len_utf8())
} }
pub fn adv_char(self, c: char) -> Self { pub fn adv_char(&mut self, c: char) {
let mut next = self; self.byte += self.len;
next.byte += next.len; self.len = c.len_utf8();
next.len = c.len_utf8(); self.source += 1;
next.source += 1; self.col += 1;
next.col += 1;
next
} }
pub fn adv_line(self) -> Self { pub fn adv_line(&mut self) {
let mut next = self; self.line += 1;
next.line += 1; self.col = 0;
next
} }
} }
@@ -56,8 +53,8 @@ impl Ord for Pos {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Span { pub struct Span {
start: Pos, pub start: Pos,
end: Pos, pub end: Pos,
} }
impl Span { impl Span {

View File

@@ -3,7 +3,7 @@
mod common; mod common;
mod vm; mod vm;
use std::{convert::TryFrom, env, fs, io, process}; use std::{env, fs, io, process};
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
@@ -21,55 +21,14 @@ fn get_input_string() -> io::Result<String> {
} }
fn main() -> Result<()> { fn main() -> Result<()> {
use vm::obj::syn::parser::SectionsParser; use vm::obj::syn::{lexer, parser};
let contents = get_input_string()?; let text = get_input_string()?;
let ast = match SectionsParser::new().parse(&contents) { let lexerdef = lexer::lexerdef();
Ok(ast) => ast, let lexer = lexerdef.lexer(&text);
Err(err) => { let (res, errors) = parser::parse(&lexer);
eprintln!("{}", err); for err in errors {
process::exit(1); println!("{}", err.pp(&lexer, &parser::token_epp));
}
};
let obj = vm::obj::obj::Object::try_from(&ast)?;
dump(&obj)?;
let mut vm = vm::vm::Vm::new();
vm.load_object(obj, 1024 * 1024 * 64)?; // 64mb
let status = vm.run()?;
println!("{}", status);
Ok(())
}
fn dump(obj: &vm::obj::obj::Object) -> Result<()> {
use vm::obj::obj::Section;
use vm::visit::VisitInst;
let mut stdout = io::stdout();
for section in &obj.sections {
match section {
Section::Data { start, contents, .. } => {
const WIDTH: usize = 4;
println!("data section at 0x{:08x}", start);
for (i, b) in contents.iter().enumerate() {
if i % WIDTH == 0 {
print!("{:08x} | ", ((*start as usize) + i));
}
print!("{:02x} ", b);
if i % WIDTH == (WIDTH - 1) {
println!();
}
}
println!();
}
Section::Code { start, contents, .. } => {
println!("code section at 0x{:08x}", start);
let mut disasm = vm::disassemble::Disassemble::new(&mut stdout, contents, *start);
while !disasm.is_done() {
disasm.visit_inst()?;
}
println!();
}
Section::Meta { .. } => continue,
};
} }
println!("{:?}", res);
Ok(()) Ok(())
} }

73
src/vm/common.rs Normal file
View File

@@ -0,0 +1,73 @@
use std::{
cmp::Ordering,
fmt::{self, Formatter, LowerHex},
ops::{Add, AddAssign},
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Addr(pub u64);
impl LowerHex for Addr {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
let Addr(v) = self;
LowerHex::fmt(v, fmt)
}
}
impl<T> Add<T> for Addr
where
T: Add<u64, Output = u64>,
u64: Add<T, Output = u64>,
{
type Output = Addr;
fn add(self, rhs: T) -> Self::Output {
Addr(self.0 + rhs)
}
}
macro_rules! impl_add_assign {
($ty:ty) => {
impl AddAssign<$ty> for Addr {
fn add_assign(&mut self, rhs: $ty) {
self.0 = self.0 + (rhs as u64);
}
}
}
}
impl_add_assign!(usize);
impl_add_assign!(u64);
macro_rules! impl_cmp {
($ty:ty) => {
impl PartialEq<$ty> for Addr {
fn eq(&self, other: &$ty) -> bool {
self.0 == (*other as u64)
}
}
impl PartialOrd<$ty> for Addr {
fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
let other = *other as u64;
self.0.partial_cmp(&other)
}
}
}
}
impl_cmp!(usize);
impl_cmp!(u64);
macro_rules! impl_from {
($ty:ty) => {
impl From<$ty> for Addr {
fn from(other: $ty) -> Self {
Addr(other as u64)
}
}
}
}
impl_from!(usize);
impl_from!(u64);

View File

@@ -1,4 +1,4 @@
use crate::vm::{inst::InstOp, reg::Reg, vm::*}; use crate::vm::{inst::InstOp, reg::Reg, common::*,};
use snafu::Snafu; use snafu::Snafu;
#[derive(Snafu, Debug, Clone)] #[derive(Snafu, Debug, Clone)]
@@ -9,6 +9,10 @@ pub enum VmError {
MemOutOfBounds { addr: Addr }, MemOutOfBounds { addr: Addr },
#[snafu(display("illegal instruction opcode: 0x{:04x}", op))] #[snafu(display("illegal instruction opcode: 0x{:04x}", op))]
IllegalOp { op: InstOp }, IllegalOp { op: InstOp },
#[snafu(display("illegal destination specification: 0b{:08b}", spec))]
IllegalDestSpec { spec: u8 },
#[snafu(display("illegal source specification: 0b{:08b}", spec))]
IllegalSourceSpec { spec: u8 },
} }
pub type Result<T, E = VmError> = std::result::Result<T, E>; pub type Result<T, E = VmError> = std::result::Result<T, E>;

View File

@@ -1,3 +1,5 @@
use crate::vm::{common::Addr, reg::Reg};
macro_rules! instructions { macro_rules! instructions {
{ {
$($variant:ident = $value:expr),* $(,)? $($variant:ident = $value:expr),* $(,)?
@@ -21,42 +23,114 @@ pub type InstOp = u16;
instructions! { instructions! {
ADD = 0x0000, ADD = 0x0000,
MUL = 0x0001, SUB = 0x0001,
DIV = 0x0002, MUL = 0x0002,
MOD = 0x0003, DIV = 0x0003,
INEG = 0x0004, MOD = 0x0004,
AND = 0x0005, AND = 0x0005,
OR = 0x0006, OR = 0x0006,
INV = 0x0007, XOR = 0x0007,
NOT = 0x0008, SHL = 0x0008,
XOR = 0x0009, SHR = 0x0009,
SHL = 0x000a, INEG = 0x000a,
SHR = 0x000b, INV = 0x000b,
NOT = 0x000c,
CMPEQ = 0x1000, CMPEQ = 0x1000,
CMPLT = 0x1001, CMPLT = 0x1001,
JMP = 0x1100, JMP = 0x1002,
JZ = 0x1101, JZ = 0x1003,
JNZ = 0x1102, JNZ = 0x1004,
LOAD = 0x2000, MOV = 0x2000,
REGCOPY = 0x2001,
STOREIMM64 = 0x2100,
STOREIMM32 = 0x2101,
MEMCOPY = 0x2200,
STORE = 0x2201,
HALT = 0xF000, HALT = 0xF000,
NOP = 0xF001, NOP = 0xF001,
DUMP = 0xF002,
} }
pub fn inst_len(op: InstOp) -> usize { pub enum Inst {
match op { Add(Dest, Source),
// 2 bytes Sub(Dest, Source),
HALT | NOP => 2, Mul(Dest, Source),
// 4 bytes Div(Dest, Source),
ADD | MUL | DIV | INEG | INV | NOT | MOD | AND | OR | XOR | SHL | SHR | CMPEQ | CMPLT Mod(Dest, Source),
| JMP | JZ | JNZ | LOAD | REGCOPY | MEMCOPY | STORE => 4, And(Dest, Source),
// Immediates - 4+ bytes Or(Dest, Source),
STOREIMM64 => 16, Xor(Dest, Source),
STOREIMM32 => 8, Shl(Dest, Source),
_ => panic!("unknown instruction op 0x{:04x}", op), Shr(Dest, Source),
INeg(Dest, Source),
Inv(Dest, Source),
Not(Dest, Source),
CmpEq(Source, Source),
CmpLt(Source, Source),
Jmp(Source),
Jz(Source),
Jnz(Source),
Mov(Dest, Source),
Halt,
Nop,
Dump,
}
impl Inst {
pub fn op(&self) -> InstOp {
match self {
Inst::Add(_, _) => ADD,
Inst::Sub(_, _) => SUB,
Inst::Mul(_, _) => MUL,
Inst::Div(_, _) => DIV,
Inst::Mod(_, _) => MOD,
Inst::And(_, _) => AND,
Inst::Or(_, _) => OR,
Inst::Xor(_, _) => XOR,
Inst::Shl(_, _) => SHL,
Inst::Shr(_, _) => SHL,
Inst::INeg(_, _) => INEG,
Inst::Inv(_, _) => INV,
Inst::Not(_, _) => NOT,
Inst::CmpEq(_, _) => CMPEQ,
Inst::CmpLt(_, _) => CMPLT,
Inst::Jmp(_) => JMP,
Inst::Jz(_) => JZ,
Inst::Jnz(_) => JNZ,
Inst::Mov(_, _) => MOV,
Inst::Halt => HALT,
Inst::Nop => NOP,
Inst::Dump => DUMP,
}
} }
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Source {
Addr64(Addr),
Addr32(Addr),
Addr16(Addr),
Addr8(Addr),
Reg(Reg),
Imm(u64),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Dest {
Addr64(Addr),
Addr32(Addr),
Addr16(Addr),
Addr8(Addr),
Reg(Reg),
}
pub const DEST_ADDR64: u8 = 0b0000;
pub const DEST_ADDR32: u8 = 0b0001;
pub const DEST_ADDR16: u8 = 0b0010;
pub const DEST_ADDR8: u8 = 0b0011;
pub const DEST_REG: u8 = 0b0100;
pub const SOURCE_ADDR64: u8 = 0b0000;
pub const SOURCE_ADDR32: u8 = 0b0001;
pub const SOURCE_ADDR16: u8 = 0b0010;
pub const SOURCE_ADDR8: u8 = 0b0011;
pub const SOURCE_REG: u8 = 0b0100;
pub const SOURCE_IMM64: u8 = 0b0101;
pub const SOURCE_IMM32: u8 = 0b0110;
pub const SOURCE_IMM16: u8 = 0b0111;
pub const SOURCE_IMM8: u8 = 0b1000;

View File

@@ -1,81 +1,219 @@
use crate::vm::{error::*, reg::*}; use crate::vm::{common::*, error::*, inst::*, reg::*};
use byteorder::{ReadBytesExt, LE}; use std::{convert::TryInto, ops::Index, mem};
use std::{
io::Cursor,
ops::{Deref, DerefMut},
};
const R1_MASK: u16 = 0b1111_1100_0000_0000; pub struct MemCursor<T> {
const R2_MASK: u16 = 0b0000_0011_1111_0000; pos: Addr,
mem: T,
#[derive(Debug, Clone)]
pub struct MemCursor<'mem> {
cursor: Cursor<&'mem [u8]>,
} }
impl<'mem> MemCursor<'mem> { impl<T> MemCursor<T>
pub fn new(mem: &'mem [u8]) -> Self { where T: AsRef<[u8]>
MemCursor { {
cursor: Cursor::new(mem), pub fn new(mem: T) -> Self {
} MemCursor { pos: Addr(0), mem }
} }
pub fn cursor(&self) -> &Cursor<&'mem [u8]> { pub fn position(&self) -> Addr {
&self.cursor self.pos
} }
pub fn cursor_mut(&mut self) -> &mut Cursor<&'mem [u8]> { pub fn set_position<P: Into<Addr>>(&mut self, position: P) {
&mut self.cursor self.pos = position.into();
}
pub fn next_u8_unchecked(&mut self) -> u8 {
let val = self[self.pos];
self.pos += 1u64;
val
} }
pub fn next_u8(&mut self) -> Result<u8> { pub fn next_u8(&mut self) -> Result<u8> {
self.read_u8().map_err(|_| VmError::MemOutOfBounds { self.check_addr(self.pos)
addr: self.position(), .map(|_| self.next_u8_unchecked())
}) }
pub fn next_u16_unchecked(&mut self) -> u16 {
let (int_bytes, _) = self.mem.as_ref()
.split_at(mem::size_of::<u16>());
let val = u16::from_le_bytes(int_bytes.try_into().unwrap());
self.pos += 2u64;
val
} }
pub fn next_u16(&mut self) -> Result<u16> { pub fn next_u16(&mut self) -> Result<u16> {
self.read_u16::<LE>().map_err(|_| VmError::MemOutOfBounds { self.check_addr(self.pos)
addr: self.position(), .map(|_| self.next_u16_unchecked())
}) }
pub fn next_u32_unchecked(&mut self) -> u32 {
let (int_bytes, _) = self.mem.as_ref()
.split_at(mem::size_of::<u32>());
let val = u32::from_le_bytes(int_bytes.try_into().unwrap());
self.pos += 4u64;
val
} }
pub fn next_u32(&mut self) -> Result<u32> { pub fn next_u32(&mut self) -> Result<u32> {
self.read_u32::<LE>().map_err(|_| VmError::MemOutOfBounds { self.check_addr(self.pos)
addr: self.position(), .map(|_| self.next_u32_unchecked())
}) }
pub fn next_u64_unchecked(&mut self) -> u64 {
let (int_bytes, _) = self.mem.as_ref()
.split_at(mem::size_of::<u64>());
let val = u64::from_le_bytes(int_bytes.try_into().unwrap());
self.pos += 8u64;
val
} }
pub fn next_u64(&mut self) -> Result<u64> { pub fn next_u64(&mut self) -> Result<u64> {
self.read_u64::<LE>().map_err(|_| VmError::MemOutOfBounds { self.check_addr(self.pos)
addr: self.position(), .map(|_| self.next_u64_unchecked())
})
} }
pub fn next_regs(&mut self) -> Result<(Reg, Reg)> { pub fn next_addr(&mut self) -> Result<Addr> {
let next16 = self.next_u16()?; self.check_addr(self.pos)
let r1 = ((R1_MASK & next16) >> 10) as Reg; .map(|_| self.next_addr_unchecked())
let r2 = ((R2_MASK & next16) >> 4) as Reg;
Ok((r1, r2))
} }
pub fn next_reg(&mut self) -> Result<Reg> { pub fn next_addr_unchecked(&mut self) -> Addr {
let next16 = self.next_u16()?; Addr(self.next_u64_unchecked())
let r1 = ((R1_MASK & next16) >> 10) as Reg; }
Ok(r1)
pub fn next_inst(&mut self) -> Result<Inst> {
let op = self.next_u16()?;
macro_rules! dest_source {
($variant:ident) => {{
let (d, s) = self.next_dest_source()?;
Ok(Inst::$variant(d, s))
}};
}
macro_rules! source_source {
($variant:ident) => {{
let (s1, s2) = self.next_source_source()?;
Ok(Inst::$variant(s1, s2))
}};
}
macro_rules! source {
($variant:ident) => {{
let spec = (self.next_u8()? & 0xF0) >> 4;
let source = self.next_source(spec)?;
Ok(Inst::$variant(source))
}};
}
match op {
ADD => dest_source!(Add),
SUB => dest_source!(Sub),
MUL => dest_source!(Mul),
DIV => dest_source!(Div),
MOD => dest_source!(Mod),
AND => dest_source!(And),
OR => dest_source!(Or),
XOR => dest_source!(Xor),
SHL => dest_source!(Shl),
SHR => dest_source!(Shr),
INEG => dest_source!(INeg),
INV => dest_source!(Inv),
NOT => dest_source!(Not),
CMPEQ => source_source!(CmpEq),
CMPLT => source_source!(CmpLt),
JMP => source!(Jmp),
JZ => source!(Jz),
JNZ => source!(Jnz),
MOV => dest_source!(Mov),
HALT => Ok(Inst::Halt),
NOP => Ok(Inst::Nop),
DUMP => Ok(Inst::Dump),
_ => Err(VmError::IllegalOp { op }),
}
}
fn next_source_source(&mut self) -> Result<(Source, Source)> {
let spec = self.next_u8()?;
let s1_spec = (spec & 0xF0) >> 4;
let s2_spec = spec & 0x0F;
let s1 = self.next_source(s1_spec)?;
let s2 = self.next_source(s2_spec)?;
Ok((s1, s2))
}
fn next_dest_source(&mut self) -> Result<(Dest, Source)> {
let spec = self.next_u8()?;
let dest_spec = (spec & 0xF0) >> 4;
let source_spec = spec & 0x0F;
let dest = self.next_dest(dest_spec)?;
let source = self.next_source(source_spec)?;
Ok((dest, source))
}
fn next_dest(&mut self, spec: u8) -> Result<Dest> {
match spec {
DEST_ADDR64 => Ok(Dest::Addr64(self.next_addr()?)),
DEST_ADDR32 => Ok(Dest::Addr32(self.next_addr()?)),
DEST_ADDR16 => Ok(Dest::Addr16(self.next_addr()?)),
DEST_ADDR8 => Ok(Dest::Addr8(self.next_addr()?)),
DEST_REG => Ok(Dest::Reg(self.next_reg()?)),
_ => Err(VmError::IllegalDestSpec { spec }),
}
}
fn next_source(&mut self, spec: u8) -> Result<Source> {
match spec {
SOURCE_ADDR64 => Ok(Source::Addr64(self.next_addr()?)),
SOURCE_ADDR32 => Ok(Source::Addr32(self.next_addr()?)),
SOURCE_ADDR16 => Ok(Source::Addr16(self.next_addr()?)),
SOURCE_ADDR8 => Ok(Source::Addr8(self.next_addr()?)),
SOURCE_REG => Ok(Source::Reg(self.next_reg()?)),
SOURCE_IMM64 => Ok(Source::Imm(self.next_u64()?)),
SOURCE_IMM32 => Ok(Source::Imm(self.next_u32()? as u64)),
SOURCE_IMM16 => Ok(Source::Imm(self.next_u16()? as u64)),
SOURCE_IMM8 => Ok(Source::Imm(self.next_u8()? as u64)),
_ => Err(VmError::IllegalSourceSpec { spec }),
}
}
fn next_reg(&mut self) -> Result<Reg> {
let reg = self.next_u8()?;
if (reg as usize) >= NUM_REGS {
Err(VmError::IllegalReg { reg })
} else {
Ok(reg)
}
}
fn check_addr(&self, addr: Addr) -> Result<()> {
if addr > self.mem.as_ref().len() {
Err(VmError::MemOutOfBounds { addr })
} else {
Ok(())
}
} }
} }
impl<'mem> Deref for MemCursor<'mem> { ////////////////////////////////////////////////////////////////////////////////
type Target = Cursor<&'mem [u8]>; // Index impl
////////////////////////////////////////////////////////////////////////////////
impl<T: AsRef<[u8]>> Index<usize> for MemCursor<T> {
type Output = u8;
fn deref(&self) -> &Self::Target { fn index(&self, addr: usize) -> &Self::Output {
self.cursor() self.mem.as_ref().index(addr)
} }
} }
impl<'mem> DerefMut for MemCursor<'mem> { impl<T: AsRef<[u8]>> Index<u64> for MemCursor<T> {
fn deref_mut(&mut self) -> &mut Self::Target { type Output = u8;
self.cursor_mut()
fn index(&self, addr: u64) -> &Self::Output {
self.index(addr as usize)
}
}
impl<T: AsRef<[u8]>> Index<Addr> for MemCursor<T> {
type Output = u8;
fn index(&self, addr: Addr) -> &Self::Output {
self.index(addr.0)
} }
} }

View File

@@ -1,10 +1,8 @@
pub mod disassemble; pub mod common;
pub mod error; pub mod error;
pub mod flags; pub mod flags;
pub mod inst; pub mod inst;
pub mod mem; pub mod mem;
pub mod obj; pub mod obj;
pub mod reg; pub mod reg;
mod tick; pub mod state;
pub mod visit;
pub mod vm;

View File

@@ -0,0 +1,45 @@
use snafu::Snafu;
use std::{fmt::Debug, io};
#[derive(Debug, Snafu)]
pub enum ParseError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
#[snafu(display("wrong magic number"))]
WrongMagic,
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
UnknownSectionKind { kind: u8 },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
}
macro_rules! into_parse_error {
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;

View File

@@ -1,3 +1,3 @@
pub mod assemble; pub mod error;
pub mod obj; pub mod obj;
pub mod syn; pub mod syn;

View File

@@ -1,4 +1,4 @@
use crate::vm::obj::syn::error::{ParseError, Result}; use crate::vm::obj::error::{ParseError, Result};
use byteorder::{ReadBytesExt, LE}; use byteorder::{ReadBytesExt, LE};
use std::{ use std::{
collections::HashMap, collections::HashMap,

View File

@@ -1,165 +1,82 @@
use crate::vm::{inst::*, reg::Reg}; use crate::vm::reg::Reg;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone)]
pub enum SectionBlock { pub enum SectionDef {
Data { Meta(MetaSection),
org: SectionOrg, Data(DataSection),
body: Vec<Line>,
},
Code {
org: SectionOrg,
body: Vec<Line>,
},
Meta {
entries: Vec<(String, ImmValue)>,
},
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone)]
pub struct MetaSection {
pub values: Vec<MetaLine>,
}
#[derive(Debug, Clone)]
pub struct MetaLine {
pub name: String,
pub value: Value,
}
#[derive(Debug, Clone)]
pub struct DataSection {
pub name: String,
pub org: Option<SectionOrg>,
pub lines: Vec<DataLine>,
}
#[derive(Debug, Clone)]
pub enum SectionOrg { pub enum SectionOrg {
Start(u64), Start(u64),
Range(u64, u64), StartEnd(u64, u64),
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone)]
pub enum Line { pub enum DataLine {
ValueDef(ValueDef),
Inst(Inst), Inst(Inst),
LabelDef(String),
ValueDecl(ValueDecl),
Export(String), Export(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ImmValue {
Number(u64),
Label(String), Label(String),
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone)]
pub enum ValueDecl { pub enum ValueDef {
U64(u64), Int(u64),
U32(u64),
U16(u64),
U8(u64),
String(String), String(String),
ZString(String), ZString(String),
} }
impl ValueDecl { #[derive(Debug, Clone)]
pub fn len(&self) -> usize { pub enum Value {
match self { Int(u64),
ValueDecl::U64(_) => 8, Reg(Reg),
ValueDecl::U32(_) => 4, Name(String),
ValueDecl::U16(_) => 2, Here,
ValueDecl::U8(_) => 1, //Array(Vec<Value>),
ValueDecl::String(s) => s.as_bytes().len() + 8, //Deref(Value, Size
ValueDecl::ZString(s) => s.as_bytes().len() + 1,
}
}
pub fn to_bytes(&self) -> Vec<u8> {
let len = self.len();
let bytes = match self {
ValueDecl::U64(v) => v.to_le_bytes().to_vec(),
ValueDecl::U32(v) => v.to_le_bytes()[0..4].to_vec(),
ValueDecl::U16(v) => v.to_le_bytes()[0..2].to_vec(),
ValueDecl::U8(v) => vec![(v & 0xff) as u8],
ValueDecl::String(s) => {
let mut bytes = Vec::with_capacity(self.len());
bytes.extend(&(s.len() as u64).to_le_bytes());
bytes.extend(s.as_bytes());
bytes
}
ValueDecl::ZString(s) => {
let mut bytes = Vec::with_capacity(self.len());
bytes.extend(s.as_bytes());
bytes.push(0);
bytes
}
};
assert_eq!(bytes.len(), len);
bytes
}
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone)]
pub enum Inst { pub enum Inst {
Add(Reg, Reg), Add(Value, Value),
Mul(Reg, Reg), Sub(Value, Value),
Div(Reg, Reg), Mul(Value, Value),
Mod(Reg, Reg), Div(Value, Value),
INeg(Reg), Mod(Value, Value),
And(Reg, Reg), And(Value, Value),
Or(Reg, Reg), Or(Value, Value),
Inv(Reg), Xor(Value, Value),
Not(Reg), Shl(Value, Value),
Xor(Reg, Reg), Shr(Value, Value),
Shl(Reg, Reg), INeg(Value, Value),
Shr(Reg, Reg), Inv(Value, Value),
Not(Value, Value),
CmpEq(Reg, Reg), CmpEq(Value, Value),
CmpLt(Reg, Reg), CmpLt(Value, Value),
Jmp(Reg), Jmp(Value),
Jz(Reg), Jz(Value),
Jnz(Reg), Jnz(Value),
Mov(Value, Value),
Load(Reg, Reg),
Store(Reg, Reg),
StoreImm(Reg, ImmValue),
StoreImm32(Reg, ImmValue),
StoreImm64(Reg, ImmValue),
MemCopy(Reg, Reg),
RegCopy(Reg, Reg),
Nop,
Halt, Halt,
} Nop,
Dump,
impl Inst {
pub fn op(&self) -> InstOp {
match self {
Inst::Add(_, _) => ADD,
Inst::Mul(_, _) => MUL,
Inst::Div(_, _) => DIV,
Inst::Mod(_, _) => MOD,
Inst::INeg(_) => INEG,
Inst::And(_, _) => AND,
Inst::Or(_, _) => OR,
Inst::Inv(_) => INV,
Inst::Not(_) => NOT,
Inst::Xor(_, _) => XOR,
Inst::Shl(_, _) => SHL,
Inst::Shr(_, _) => SHR,
Inst::CmpEq(_, _) => CMPEQ,
Inst::CmpLt(_, _) => CMPLT,
Inst::Jmp(_) => JMP,
Inst::Jz(_) => JZ,
Inst::Jnz(_) => JNZ,
Inst::Load(_, _) => LOAD,
Inst::Store(_, _) => STORE,
Inst::StoreImm(_, imm) => {
if let ImmValue::Number(imm) = imm {
if *imm > (u32::max_value() as u64) {
STOREIMM64
} else {
STOREIMM32
}
} else {
STOREIMM64
}
}
Inst::StoreImm32(_, _) => STOREIMM32,
Inst::StoreImm64(_, _) => STOREIMM64,
Inst::MemCopy(_, _) => MEMCOPY,
Inst::RegCopy(_, _) => REGCOPY,
Inst::Nop => NOP,
Inst::Halt => HALT,
}
}
pub fn len(&self) -> usize {
inst_len(self.op())
}
} }

View File

@@ -1,44 +1,16 @@
use snafu::Snafu; use snafu::Snafu;
use std::{fmt::Debug, io}; //use std::{fmt::Debug, io};
#[derive(Debug, Snafu)] #[derive(Debug, Snafu)]
pub enum ParseError { pub enum SyntaxError {
#[snafu(display("IO error: {}", source))] //#[snafu(display("IO error: {}", source))]
Io { source: io::Error }, //Io { source: io::Error },
#[snafu(display("wrong magic number"))] #[snafu(display("unexpected {}", what))]
WrongMagic, Unexpected { what: String },
#[snafu(display("unknown section kind: 0x{:02x}", kind))] #[snafu(display("expected {}, but got {} instead", expected, got))]
UnknownSectionKind { kind: u8 }, ExpectedGot { expected: String, got: String },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
} }
macro_rules! into_parse_error { pub type Result<T, E = SyntaxError> = std::result::Result<T, E>;
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;

50
src/vm/obj/syn/lexer.l Normal file
View File

@@ -0,0 +1,50 @@
%%
\$[0-9]+ "DEC_INT"
\$0[Xx][0-9a-fA-F]+ "HEX_INT"
\$0[Bb][01]+ "BIN_INT"
\.meta "DIR_META"
\.section "DIR_SECTION"
\.export "DIR_EXPORT"
\{ "LBRACE"
\} "RBRACE"
\.\. "DOTDOT"
: "COLON"
, "COMMA"
\$\$ "BUCKBUCK"
[iu](8|16|32|64) "INT_TYPE"
\.[iu](8|16|32|64) "INT_DEF"
\.string "STR_DEF"
\.zstring "ZSTR_DEF"
"([^"]|\\[\\nt0"'])*" "STRING"
add "ADD"
sub "SUB"
mul "MUL"
div "DIV"
mod "MOD"
and "AND"
or "OR"
xor "XOR"
shl "SHL"
shr "SHR"
ineg "INEG"
inv "INV"
not "NOT"
cmpeq "CMPEQ"
cmplt "CMPLT"
jmp "JMP"
jz "JZ"
jnz "JNZ"
mov "MOV"
halt "HALT"
nop "NOP"
dump "DUMP"
%ip "REG_IP"
%sp "REG_SP"
%fp "REG_FP"
%flags "REG_FLAGS"
%null "REG_NULL"
%status "REG_STATUS"
%r[0-9]{1,2} "REG_GENERAL"
[a-zA-Z_][a-zA-Z0-9_]* "NAME"
;[^\n]* ;
[ \n\t]+ ;

View File

@@ -1,16 +1,16 @@
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser, "/vm/obj/syn/parser.rs");
pub mod ast; pub mod ast;
pub mod error; pub mod error;
pub fn unescape_string(s: impl AsRef<str>) -> String { use lrlex::lrlex_mod;
let s = s.as_ref(); use lrpar::lrpar_mod;
s.replace(r"\\", "\\")
.replace("\\n", "\n") lrlex_mod!("vm/obj/syn/lexer.l");
.replace("\\r", "\r") lrpar_mod!("vm/obj/syn/parser.y");
.replace("\\t", "\t")
.replace("\\t", "\t") pub mod parser {
.replace("\\0", "\0") pub use super::parser_y::*;
.replace("\\\"", "\"") }
pub mod lexer {
pub use super::lexer_l::*;
} }

View File

@@ -1,20 +1,13 @@
use std::str::FromStr; use std::str::FromStr;
use crate::vm::{ use crate::vm::{
common::Addr,
inst::*,
obj::syn::{unescape_string, ast::*}, obj::syn::{unescape_string, ast::*},
reg::*, reg::*,
}; };
grammar; grammar;
LabelDef: String = {
<Label> ":" => <>
}
ImmValue: ImmValue = {
<Label> => ImmValue::Label(<>),
<Number> => ImmValue::Number(<>),
}
Label: String = { Label: String = {
r"[a-zA-Z_][a-zA-Z0-9_]*" => String::from(<>), r"[a-zA-Z_][a-zA-Z0-9_]*" => String::from(<>),
} }
@@ -33,45 +26,13 @@ String: String = {
} }
Reg: Reg = { Reg: Reg = {
r"%ip" => IP, r"%ip" => todo!(),
r"%sp" => SP, r"%sp" => todo!(),
r"%fp" => FP, r"%fp" => todo!(),
r"%flags" => FLAGS, r"%flags" => todo!(),
r"%null" => NULL, r"%nil" => todo!(),
r"%status" => STATUS, r"%status" => todo!(),
r"%r[0-9]{2}" => { r"%r[0-9]{1,2}" => todo!(),
let offset = (&<>[2..]).parse::<u8>().unwrap();
let reg = R00 + offset;
assert!(reg < LAST_REG, "invalid register");
reg
}
}
Inst: Inst = {
"add" <d:Reg> "," <s:Reg> => Inst::Add(d, s),
"mul" <d:Reg> "," <s:Reg> => Inst::Mul(d, s),
"div" <d:Reg> "," <s:Reg> =>Inst::Div(d, s),
"mod" <d:Reg> "," <s:Reg> => Inst::Mod(d, s),
"ineg" <d:Reg> => Inst::INeg(d),
"and" <d:Reg> "," <s:Reg> => Inst::And(d, s),
"or" <d:Reg> "," <s:Reg> => Inst::Or(d, s),
"xor" <d:Reg> "," <s:Reg> => Inst::Xor(d, s),
"shl" <d:Reg> "," <s:Reg> => Inst::Shl(d, s),
"shr" <d:Reg> "," <s:Reg> => Inst::Shr(d, s),
"cmpeq" <d:Reg> "," <s:Reg> => Inst::CmpEq(d, s),
"cmplt" <d:Reg> "," <s:Reg> => Inst::CmpLt(d, s),
"jmp" <d:Reg> => Inst::Jmp(d),
"jz" <d:Reg> => Inst::Jz(d),
"jnz" <d:Reg> => Inst::Jnz(d),
"load" <d:Reg> "," <s:Reg> => Inst::Load(d, s),
"store" <d:Reg> "," <s:Reg> => Inst::Store(d, s),
"storeimm" <d:Reg> "," <s:ImmValue> => Inst::StoreImm(d, s),
"storeimm32" <d:Reg> "," <s:ImmValue> => Inst::StoreImm32(d, s),
"storeimm64" <d:Reg> "," <s:ImmValue> => Inst::StoreImm64(d, s),
"memcopy" <d:Reg> "," <s:Reg> => Inst::MemCopy(d, s),
"regcopy" <d:Reg> "," <s:Reg> => Inst::RegCopy(d, s),
"nop" => Inst::Nop,
"halt" => Inst::Halt,
} }
ValueDecl: ValueDecl = { ValueDecl: ValueDecl = {
@@ -83,34 +44,13 @@ ValueDecl: ValueDecl = {
r"\.zstring" <String> => ValueDecl::ZString(<>), r"\.zstring" <String> => ValueDecl::ZString(<>),
} }
Line: Line = {
<Inst> => Line::Inst(<>),
<LabelDef> => Line::LabelDef(<>),
<ValueDecl> => Line::ValueDecl(<>),
r"\.export" <Label> => Line::Export(<>),
}
MetaLine: (String, ImmValue) = {
<name:Label> ":" <value:ImmValue> => (name, value),
}
SectionOrg: SectionOrg = { SectionOrg: SectionOrg = {
<start:Number> => SectionOrg::Start(start), <start:Number> => SectionOrg::Start(start),
<start:Number> r"\.\." <end:Number> => SectionOrg::Range(start, end), <start:Number> r"\.\." <end:Number> => SectionOrg::Range(start, end),
} }
Section: SectionBlock = { Section: SectionBlock = {
"data" <org:SectionOrg> "{" <body:Line*> "}" => {
SectionBlock::Data { org, body }
},
"code" <org:SectionOrg> "{" <body:Line*> "}" => {
SectionBlock::Code { org, body }
},
"meta" "{" <entries:MetaLine*> "}" => {
SectionBlock::Meta { entries, }
}
} }
pub Sections: Vec<SectionBlock> = { pub Sections: Vec<SectionBlock> = {

204
src/vm/obj/syn/parser.y Normal file
View File

@@ -0,0 +1,204 @@
%start SectionDefs
%%
SectionDefs -> Vec<SectionDef>:
SectionDefs SectionDef { $1.push($2); $1 }
| { Vec::new() }
;
SectionDef -> SectionDef:
'DIR_META' MetaBlock { SectionDef::Meta(MetaSection { values: $2 }) }
| 'DIR_SECTION' Name MaybeSectionOrg DataBlock {
SectionDef::Data(DataSection {
name: $2,
org: $3,
lines: $4,
})
}
;
MetaBlock -> Vec<MetaLine>: 'LBRACE' MetaLines 'RBRACE' { $2 };
MetaLines -> Vec<MetaLine>:
MetaLines MetaLine { $1.push($2); $1 }
| { Vec::new() }
;
MetaLine -> MetaLine: Name 'COLON' Value { MetaLine { name: $1, value: $3 } };
MaybeSectionOrg -> Option<SectionOrg>:
SectionOrg { Some($1) }
| { None }
;
SectionOrg -> SectionOrg:
Int { SectionOrg::Start($1) }
| Int 'DOTDOT' Int { SectionOrg::StartEnd($1, $3) }
;
DataBlock -> Vec<DataLine>: 'LBRACE' DataLines 'RBRACE' { $2 };
DataLines -> Vec<DataLine>:
DataLines DataLine { $1.push($2); $1 }
| { Vec::new() }
;
DataLine -> DataLine:
ValueDef { DataLine::ValueDef($1) }
| Inst { DataLine::Inst($1) }
| 'DIR_EXPORT' Name { DataLine::Export($2) }
| Name 'COLON' { DataLine::Label($1) }
;
ValueDef -> ValueDef:
'INT_DEF' Int { ValueDef::Int($2) }
| 'STR_DEF' String { ValueDef::String($2) }
| 'ZSTR_DEF' String { ValueDef::ZString($2) }
;
Value -> Value:
Int { Value::Int($1) }
| Reg { Value::Reg($1) }
| Name { Value::Name($1) }
| 'BUCKBUCK' { Value::Here }
;
Inst -> Inst:
'ADD' Value 'COMMA' Value { Inst::Add($2, $4) }
| 'SUB' Value 'COMMA' Value { Inst::Sub($2, $4) }
| 'MUL' Value 'COMMA' Value { Inst::Mul($2, $4) }
| 'DIV' Value 'COMMA' Value { Inst::Div($2, $4) }
| 'MOD' Value 'COMMA' Value { Inst::Mod($2, $4) }
| 'AND' Value 'COMMA' Value { Inst::And($2, $4) }
| 'OR' Value 'COMMA' Value { Inst::Or($2, $4) }
| 'XOR' Value 'COMMA' Value { Inst::Xor($2, $4) }
| 'SHL' Value 'COMMA' Value { Inst::Shl($2, $4) }
| 'SHR' Value 'COMMA' Value { Inst::Shr($2, $4) }
| 'INEG' Value 'COMMA' Value { Inst::INeg($2, $4) }
| 'INV' Value 'COMMA' Value { Inst::Inv($2, $4) }
| 'NOT' Value 'COMMA' Value { Inst::Not($2, $4) }
| 'CMPEQ' Value 'COMMA' Value { Inst::CmpEq($2, $4) }
| 'CMPLT' Value 'COMMA' Value { Inst::CmpLt($2, $4) }
| 'JMP' Value { Inst::Jmp($2) }
| 'JZ' Value { Inst::Jz($2) }
| 'JNZ' Value { Inst::Jnz($2) }
| 'MOV' Value 'COMMA' Value { Inst::Mov($2, $4) }
| 'HALT' { Inst::Halt }
| 'NOP' { Inst::Nop }
| 'DUMP' { Inst::Dump }
;
Name -> String:
'NAME' {
let v = $1.expect("could not parse name");
$lexer.span_str(v.span()).to_string()
}
;
Int -> u64:
'DEC_INT' {
let span = $1.expect("could not parse dec_int").span();
let s = &$lexer.span_str(span)[1..];
s.parse().unwrap()
}
| 'HEX_INT' {
let span = $1.expect("could not parse hex_int").span();
let s = &$lexer.span_str(span)[3..];
u64::from_str_radix(s, 16).unwrap()
}
| 'BIN_INT' {
let span = $1.expect("could not parse bin_int").span();
let s = &$lexer.span_str(span)[3..];
u64::from_str_radix(s, 2).unwrap()
}
;
Reg -> Reg:
'REG_IP' { IP }
| 'REG_SP' { SP }
| 'REG_FP' { FP }
| 'REG_FLAGS' { FLAGS }
| 'REG_NULL' { NULL }
| 'REG_STATUS' { STATUS }
| 'REG_GENERAL' {
let v = $1.expect("could not parse reg");
parse_reg($lexer.span_str(v.span())).unwrap()
}
;
String -> String:
'STRING' {
let v = $1.expect("could not parse string");
parse_string($lexer.span_str(v.span()))
}
;
%%
use crate::vm::{
obj::syn::ast::*,
reg::*,
};
fn parse_string(input: &str) -> String {
let mut s = String::new();
let input = &input[1..input.bytes().len() - 2];
let mut chars = input.chars();
while let Some(c) = chars.next() {
if c == '\\' {
let next = chars.next().unwrap();
let c = match next {
'\\' => '\\',
'n' => '\n',
't' => '\t',
'"' => '"',
'\'' => '\'',
'0' => '\0',
_ => unreachable!(),
};
s.push(c);
} else {
s.push(c);
}
}
s
}
fn parse_reg(input: &str) -> Option<Reg> {
use regex::Regex;
use lazy_static::lazy_static;
lazy_static! {
static ref REG_RE: Regex = Regex::new(r"^%r([0-9]{1,2})$").unwrap();
}
let captures = REG_RE.captures(input)?;
let reg_no: Reg = captures.get(1)?
.as_str()
.parse()
.unwrap();
let reg = R00 + reg_no;
if reg > R31 {
None
} else {
Some(reg)
}
}
#[cfg(test)]
mod test {
use crate::vm::reg::*;
use super::parse_reg;
#[test]
fn test_parse_reg() {
assert_eq!(parse_reg("%r00"), Some(R00));
assert_eq!(parse_reg("%r0"), Some(R00));
assert_eq!(parse_reg("%r1"), Some(R01));
assert_eq!(parse_reg("%r01"), Some(R01));
assert_eq!(parse_reg("%r31"), Some(R31));
assert_eq!(parse_reg("%r32"), None);
assert_eq!(parse_reg("%r0000"), None);
assert_eq!(parse_reg("%r9"), Some(R09));
assert_eq!(parse_reg("%r"), None);
assert_eq!(parse_reg("%r12"), Some(R12));
}
}

View File

@@ -20,9 +20,6 @@ macro_rules! registers {
} }
registers! { registers! {
// https://crates.io/crates/packed_struct
// TODO : check this muffugin shit out!!
// Instruction pointer // Instruction pointer
IP = 0, IP = 0,
@@ -38,68 +35,42 @@ registers! {
// Zero // Zero
NULL = 4, NULL = 4,
UNUSED01 = 5,
UNUSED02 = 6,
UNUSED03 = 7,
UNUSED04 = 8,
UNUSED05 = 9,
UNUSED06 = 10,
UNUSED07 = 11,
UNUSED08 = 12,
// General status code // General status code
STATUS = 13, STATUS = 5,
R00 = 14, R00 = 6,
R01 = 15, R01 = 7,
R02 = 16, R02 = 8,
R03 = 17, R03 = 9,
R04 = 18, R04 = 10,
R05 = 19, R05 = 11,
R06 = 20, R06 = 12,
R07 = 21, R07 = 13,
R08 = 22, R08 = 14,
R09 = 23, R09 = 15,
R10 = 24, R10 = 16,
R11 = 25, R11 = 17,
R12 = 26, R12 = 18,
R13 = 27, R13 = 19,
R14 = 28, R14 = 20,
R15 = 29, R15 = 21,
R16 = 30, R16 = 22,
R17 = 31, R17 = 23,
R18 = 32, R18 = 24,
R19 = 33, R19 = 25,
R20 = 34, R20 = 26,
R21 = 35, R21 = 27,
R22 = 36, R22 = 28,
R23 = 37, R23 = 29,
R24 = 38, R24 = 30,
R25 = 39, R25 = 31,
R26 = 40, R26 = 32,
R27 = 41, R27 = 33,
R28 = 42, R28 = 34,
R29 = 43, R29 = 35,
R30 = 44, R30 = 36,
R31 = 45, R31 = 37,
R32 = 46,
R33 = 47,
R34 = 48,
R35 = 49,
R36 = 50,
R37 = 51,
R38 = 52,
R39 = 53,
R40 = 54,
R41 = 55,
R42 = 56,
R43 = 57,
R44 = 58,
R45 = 59,
R46 = 60,
R47 = 61,
R48 = 62,
R49 = 63,
} }
pub const LAST_REG: Reg = R49; pub const LAST_REG: Reg = 63;
pub const NUM_REGS: usize = 64;

78
src/vm/state.rs Normal file
View File

@@ -0,0 +1,78 @@
use crate::vm::{common::*, error::*, flags::*, mem::*, reg::*};
pub struct State {
regs: [u64; NUM_REGS],
mem: Vec<u8>,
}
impl State {
pub fn new() -> Self {
State {
regs: [0; NUM_REGS],
mem: Default::default(),
}
}
pub fn mem_cursor(&self, addr: Addr) -> MemCursor<&[u8]> {
let mut cursor = MemCursor::new(self.mem.as_slice());
cursor.set_position(addr);
cursor
}
pub fn run(&mut self) -> Result<u64> {
Ok(self.get_reg_unchecked(STATUS))
}
////////////////////////////////////////////////////////////////////////////////
// Registers
////////////////////////////////////////////////////////////////////////////////
pub fn get_reg_unchecked(&self, reg: Reg) -> u64 {
self.regs[reg as usize]
}
pub fn get_reg(&self, reg: Reg) -> Result<u64> {
if (reg as usize) >= NUM_REGS {
Err(VmError::IllegalReg { reg })
} else {
Ok(self.get_reg_unchecked(reg))
}
}
pub fn set_reg_unchecked(&mut self, reg: Reg, value: u64) {
self.regs[reg as usize] = value;
}
pub fn set_reg(&mut self, reg: Reg, value: u64) -> Result<()> {
if (reg as usize) >= NUM_REGS {
Err(VmError::IllegalReg { reg })
} else {
Ok(self.set_reg_unchecked(reg, value))
}
}
////////////////////////////////////////////////////////////////////////////////
// Flags
////////////////////////////////////////////////////////////////////////////////
pub fn flags(&self) -> Flags {
// this is safe because it's OK if there are random bits flipped - this shouldn't happen
// anyway, but if it does, they're ignored
unsafe { Flags::from_bits_unchecked(self.get_reg_unchecked(FLAGS)) }
}
pub fn insert_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.insert(flags);
self.set_flags(new_flags);
}
pub fn remove_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.remove(flags);
self.set_flags(new_flags);
}
pub fn set_flags(&mut self, flags: Flags) {
self.set_reg_unchecked(FLAGS, flags.bits());
}
}

View File

@@ -1,208 +0,0 @@
use crate::vm::{error::*, flags::*, inst::InstOp, mem::*, obj::obj::*, reg::*};
use byteorder::{WriteBytesExt, LE};
use std::{io::Cursor, mem};
pub type Word = u64;
pub type HalfWord = u32;
pub type Registers = [Word; 64];
pub type Addr = u64;
pub struct Vm {
pub(super) mem: Vec<u8>,
pub(super) registers: Registers,
}
impl Vm {
pub fn new() -> Self {
Vm {
mem: Default::default(),
registers: [0; 64],
}
}
/// Loads an object into this VM, clearing out all previous memory and resetting the registers.
pub fn load_object(&mut self, object: Object, max_mem: usize) -> Result<()> {
self.registers = [0; 64];
// determine memory spread
let mem_size = object
.sections
.iter()
.filter_map(|s| match s {
Section::Data { end, .. } | Section::Code { end, .. } => Some(*end),
Section::Meta { .. } => None,
})
.max()
.unwrap_or(0);
if mem_size > (max_mem as u64) {
todo!("raise max memory error");
}
self.mem = vec![0; mem_size as usize];
let mut entry = 0;
// write sections to memory
for section in object.sections.into_iter() {
match section {
Section::Data {
start,
contents,
..
}
| Section::Code {
start,
contents,
..
} => {
let start = start as usize;
for (value, dest) in contents.into_iter().zip(&mut self.mem[start..])
{
*dest = value;
}
}
Section::Meta { entries } => {
if let Some(e) = entries.get("entry") {
// set the entry point
entry = *e;
}
}
}
}
self.set_reg(IP, entry);
Ok(())
}
pub fn mem_cursor(&self, at: usize) -> MemCursor {
let mut cursor = MemCursor::new(&self.mem);
cursor.set_position(at as u64);
cursor
}
pub fn run(&mut self) -> Result<u64> {
while !self.is_halted() {
self.tick()?;
}
Ok(self.get_reg(STATUS))
}
pub fn resume(&mut self) {
self.remove_flags(Flags::HALT);
}
pub fn is_halted(&self) -> bool {
self.flags().contains(Flags::HALT)
}
pub fn get_word(&self, addr: Addr) -> Result<Word> {
self.check_read(addr, 8)?;
Ok(self.mem_cursor(addr as usize).next_u64().unwrap())
}
pub fn get_halfword(&self, addr: Addr) -> Result<HalfWord> {
self.check_read(addr, 4)?;
Ok(self.mem_cursor(addr as usize).next_u32().unwrap())
}
pub fn get_inst_op(&self, addr: Addr) -> Result<InstOp> {
self.check_read(addr, 2)?;
Ok(self.mem_cursor(addr as usize).next_u16().unwrap())
}
pub fn get_byte(&self, addr: Addr) -> Result<u8> {
self.check_addr(addr)?;
Ok(self.mem_cursor(addr as usize).next_u8().unwrap())
}
pub fn set_word(&mut self, addr: Addr, value: Word) -> Result<()> {
self.check_read(addr, 8)?;
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
Ok(cursor.write_u64::<LE>(value).unwrap())
}
pub fn set_halfword(&mut self, addr: Addr, value: HalfWord) -> Result<()> {
self.check_read(addr, 4)?;
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
Ok(cursor.write_u32::<LE>(value).unwrap())
}
pub fn set_byte(&mut self, addr: Addr, value: u8) -> Result<()> {
self.check_addr(addr)?;
let mut cursor = Cursor::new(&mut self.mem[addr as usize..]);
Ok(cursor.write_u8(value).unwrap())
}
pub fn load(&self, reg: Reg) -> Result<Word> {
self.get_word(self.get_reg_checked(reg)?)
}
pub fn store(&mut self, reg: Reg, value: Word) -> Result<()> {
let addr = self.get_reg_checked(reg)?;
self.set_word(addr, value)
}
pub fn get_reg_checked(&self, reg: Reg) -> Result<Word> {
self.check_reg(reg)?;
Ok(self.get_reg(reg))
}
pub fn get_reg(&self, reg: Reg) -> Word {
self.registers[reg as usize]
}
pub fn set_reg_checked(&mut self, reg: Reg, value: Word) -> Result<Word> {
self.check_reg(reg)?;
Ok(self.set_reg(reg, value))
}
pub fn set_reg(&mut self, reg: Reg, value: Word) -> Word {
if reg == NULL {
return 0;
}
mem::replace(&mut self.registers[reg as usize], value)
}
pub fn ip(&self) -> Word {
self.get_reg(IP)
}
pub fn flags(&self) -> Flags {
// this is safe because it's OK if there are random bits flipped - this shouldn't happen
// anyway, but if it does, they're ignored
unsafe { Flags::from_bits_unchecked(self.get_reg(FLAGS)) }
}
pub fn insert_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.insert(flags);
self.set_flags(new_flags);
}
pub fn remove_flags(&mut self, flags: Flags) {
let mut new_flags = self.flags();
new_flags.remove(flags);
self.set_flags(new_flags);
}
pub fn set_flags(&mut self, flags: Flags) {
self.set_reg(FLAGS, flags.bits());
}
fn check_addr(&self, addr: Addr) -> Result<()> {
if addr >= (self.mem.len() as u64) {
Err(VmError::MemOutOfBounds { addr })
} else {
Ok(())
}
}
fn check_read(&self, addr: Addr, len: Word) -> Result<()> {
self.check_addr(addr)
.and_then(|_| self.check_addr(addr + len - 1))
}
fn check_reg(&self, reg: Reg) -> Result<()> {
if reg > LAST_REG {
Err(VmError::IllegalReg { reg })
} else {
Ok(())
}
}
}

443
vm.md
View File

@@ -4,7 +4,7 @@ This is an outline of the VM that drives this language.
# Primitives # Primitives
* Numbers may be big endian (BE) or little endian (LE) at the byte level. This guide will use LE. * Numbers are little endian (LE) at the byte level.
* Addresses point to single bytes. * Addresses point to single bytes.
* Signed numbers use two's complement. * Signed numbers use two's complement.
@@ -23,10 +23,10 @@ CPU registers are addressed by a value between 0-63 (6 bits). All registers are
* SP - Stack pointer * SP - Stack pointer
* FP - Frame pointer * FP - Frame pointer
* FLAGS - CPU flags * FLAGS - CPU flags
* NULL - Always zero for reading and will never change after writing.
* (8 unused registers)
* STATUS - Generic status code * STATUS - Generic status code
* R0-R49 * NIL - Always zero for reading and will never change after writing.
* R0-R31
* (26 unused registers)
## CPU Flags ## CPU Flags
@@ -42,14 +42,75 @@ CPU flags are addressed by bit index, going from right to left.
* Overwriting a register without its value being used * Overwriting a register without its value being used
* Mixing arithmetic with bit twiddling on the same target * Mixing arithmetic with bit twiddling on the same target
## Register ideas
* Other possible names: Z, NIL
# Instructions # Instructions
Instructions attempt to be as small as possible while conforming to 8-bit, 16-bit, 32-bit, or 64-bit All instructions have 16-bit opcodes. There are three types of instructions:
alignment. All instructions have 16-bit opcodes.
* Those whose operations require a source and a destination.
* Those whose operations require two sources
* The sources of these instructions is implied by the instruction itself; e.g. the `CMPEQ`
instruction implicitly sets a bit in the `FLAGS` register.
* Those whose operations require a source, but no destination.
* Those whose operations require a destination, but no source.
* There aren't any of these instructions yet
* Those whose operations require neither a source nor a destination.
Destinations may be:
* A 64-bit address pointing at a 64-bit or 8-bit value
* A 6-bit register
Sources may be one of:
* A 64-bit address pointing at a 64-bit or 8-bit value
* A 6-bit register
* A 64-bit immediate value
Counting all source and destination value sizes as their own configuration, there are:
* 3 possible destination types
* 4 possible source types
Instructions have different layouts depending on whether its operation takes a source and/or
destination. For example, the `ADD` instruction takes a source and a destination, the `JMP`
instruction takes a source, and the `NOP` instruction takes neither a source nor a destination.
For instructions that take neither a source nor a destination, they are simply 16 bits long and
that's that. All other instructions are followed by a byte determining its source and/or
destination.
An instruction that has a source and destination looks like this:
```
| XXXXXXXX | XXXXXXXX | DDDDSSSS | ...source and destination |
```
An instruction that has either a source or a destination (but not both) looks like this:
```
| XXXXXXXX | XXXXXXXX | YYYY0000 | ...source or destination |
```
An instruction that has neither a source nor a destination looks like this:
```
| XXXXXXXX | XXXXXXXX |
```
## Source/destination flags
| Bits | Source/destination |
| - | - |
| 0b0000 | Address (64 bit value) |
| 0b0001 | Address (32 bit value) |
| 0b0010 | Address (16 bit value) |
| 0b0011 | Address (8 bit value) |
| 0b0100 | 6-bit register |
| 0b0101 | Immediate (64 bits, source only) |
| 0b0110 | Immediate (32 bits, source only) |
| 0b0111 | Immediate (16 bits, source only) |
| 0b1000 | Immediate (8 bits, source only) |
## Arithmetic ## Arithmetic
@@ -58,160 +119,43 @@ wrapping around to 0.
* Add * Add
* Opcode: 0x0000 * Opcode: 0x0000
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 + REG2` * Sub
* Unsigned addition
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000000 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Mul
* Opcode: 0x0001 * Opcode: 0x0001
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 * REG2` * Mul
* Unsigned multiplication
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000001 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Div
* Opcode: 0x0002 * Opcode: 0x0002
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 / REG2` * Div
* Unsigned division
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000010 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Mod
* Opcode: 0x0003 * Opcode: 0x0003
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 % REG2` (exact semantics TBD) * Mod
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000011 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* INeg
* Opcode: 0x0004 * Opcode: 0x0004
* **Params**: REG1 * Params: Destination, source
* `REG1 = REG1 * -1`
* Signed negative
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0000000000000100 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* And * And
* Opcode: 0x0005 * Opcode: 0x0005
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 & REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000101 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Or * Or
* Opcode: 0x0006 * Opcode: 0x0006
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 | REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000000110 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Inv
* Opcode: 0x0007
* **Params**: REG1
* `REG1 = ~REG1`
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0000000000000111 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Not
* Opcode: 0x0008
* **Params**: REG1
* ```
if REG1 == 0 {
REG1 = 0;
} else {
REG1 = 1;
}
```
* Boolean NOT; equivalent of C's `!` unary operator
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0000000000001000 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Xor * Xor
* Opcode: 0x0009 * Opcode: 0x0007
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 ^ REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000001001 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Shl * Shl
* Opcode: 0x000A * Opcode: 0x0008
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 << REG2`
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0000000000001010 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Shr * Shr
* Opcode: 0x000B * Opcode: 0x0009
* **Params**: REG1, REG2 * Params: Destination, source
* `REG1 = REG1 >> REG2` * INeg
* Does not sign extend * Opcode: 0x000a
* ``` * Params: Destination, source
32 16 10 4 0 * Inv
opcode reg1 reg2 unused * Opcode: 0x000b
/ / / / * Params: Destination, source
+-------------------------------------------+ * Not
| 0000000000001011 | ...... | ...... | XXXX | * Opcode: 0x000c
+-------------------------------------------+ * Params: Destination, source
```
### TODO ### TODO
@@ -223,196 +167,33 @@ wrapping around to 0.
* CmpEq * CmpEq
* Opcode: 0x1000 * Opcode: 0x1000
* **Params**: REG1, REG2 * Params: Source, source
* ```
if REG1 == REG2 {
FLAGS[1] = 1;
} else {
FLAGS[1] = 0;
}
```
* Sets the COMPARE flag to 1 if REG1 == REG2
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0001000000000000 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* CmpLt * CmpLt
* Opcode: 0x1001 * Opcode: 0x1001
* **Params**: REG1, REG2 * Params: Source, source
* ```
if REG1 < REG2 {
FLAGS[1] = 1;
} else {
FLAGS[1] = 0;
}
```
* Sets the COMPARE flag to 1 if REG1 < REG2
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0001000000000001 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* Jmp * Jmp
* Opcode: 0x1100
* **Params**: REG1
* `IP = REG1;`
* Jumps to the address in REG1 unconditionally.
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0001000100000000 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Jz
* Opcode: 0x1101
* **Params**: REG1
* ```
if FLAGS[1] == 0 {
IP = REG1;
}
```
* Jumps to the address in REG1 if COMPARE flag is 0.
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0001000100000001 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
* Jnz
* Opcode: 0x1002 * Opcode: 0x1002
* **Params**: REG1 * Params: Source
* ``` * Jz
if FLAGS[1] != 0 { * Opcode: 0x1003
IP = REG1; * Params: Source
} * Jnz
``` * Opcode: 0x1004
* Jumps to the address in REG1 if COMPARE flag is 1. * Params: Source
* ```
32 16 10 0
opcode reg1 unused
/ / /
+----------------------------------------+
| 0001000100000002 | ...... | XXXXXXXXXX |
+----------------------------------------+
```
## Data movement ## Data movement
* Load * Mov
* Opcode: 0x2000 * Opcode: 0x2000
* **Params**: REG1, REG2
* ```
REG1 = MEM[REG2];
```
* Sets REG1 to the value at the memory address in REG2.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010000000000000 | ...... | ...... | XXXX |
+-------------------------------------------+
```
* RegCopy
* Opcode: 0x2001
* **Params**: REG1, REG2
* `REG1 = REG2`
* Copies the value in REG2 into REG1.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010000000000001 | REG1.. | REG2.. | XXXX |
+-------------------------------------------+
```
* StoreImm64
* Opcode: 0x2100
* **Params**: REG1, IMM_64
* `REG1 = IMM_64`
* Sets REG1 to the specified 64-bit number.
* StoreImm32
* Opcode: 0x2101
* **Params**: REG1, IMM_32
* `REG1 = IMM_32`
* Sets REG1 to the specified 32-bit number.
* ```
64 48 42 36 32 0
opcode reg1 reg2 unused
/ / / / immediate 32 bit value
/ / / / /
+------------------------------------------------------------------------------+
| 0010000100000001 | REG1.. | REG2.. | XXXX | IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII |
+------------------------------------------------------------------------------+
```
* MemCopy
* Opcode: 0x2200
* **Params**: REG1, REG2
* `MEM[REG1] = MEM[REG2]`
* Copies the value at the memory address in REG2 to the memory address in REG1.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010001000000000 | REG1.. | REG2.. | XXXX |
+-------------------------------------------+
```
* Store
* Opcode: 0x2201
* **Params**: REG1, REG2
* ```
MEM[REG2] = REG1;
```
* Sets the value at the memory address in REG2 to the value in REG1.
* ```
32 16 10 4 0
opcode reg1 reg2 unused
/ / / /
+-------------------------------------------+
| 0010001000000001 | REG1.. | REG2.. | XXXX |
+-------------------------------------------+
```
## Miscellaneous ## Miscellaneous
* Halt * Halt
* Opcode: 0xF000 * Opcode: 0xF000
* **Params**: (none)
* `FLAGS[0] = 1`
* Halts the machine
* ```
16
opcode
/
+------------------+
| 1111000000000000 |
+------------------+
```
* Nop * Nop
* Opcode: 0xF001 * Opcode: 0xF001
* **Params**: (none) * Dump
* Does nothing * Opcode: 0xF002
* ```
16
opcode
/
+------------------+
| 1111000000000001 |
+------------------+
```
## Other instructions TODO ## Other instructions TODO
@@ -423,8 +204,6 @@ wrapping around to 0.
* Uses FP to determine previous SP, FP, and IP and restores them * Uses FP to determine previous SP, FP, and IP and restores them
* Push * Push
* Pop * Pop
* More immediate stores?
* Idea: Store42 (or whatever number of bits) that maximizes the usage of a 64-bit instruction
# Binary object format # Binary object format
@@ -435,7 +214,7 @@ the object.
The header is composed of: The header is composed of:
* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11). * 64 bits - A magic number (0xDEAD\_BEA7\_BA5E\_BA11).
* 32 bits - Version of the file * 32 bits - Version of the file
* 32 bits - The number of sections in the file * 32 bits - The number of sections in the file
* section descriptions detailed below * section descriptions detailed below
@@ -458,7 +237,7 @@ the section contents.
The data section contains static data that is initialized to some known value. The data section contains static data that is initialized to some known value.
* 64 bits - section load start - where in memory the content of this section begins * 64 bits - section load start - where in memory the content of this section begins
* 64 bits - section load end - where in memory the content of this section ends * 64 bits - section length - how long the memory content is
### Code section ### Code section