Finish up parser and assembler with more-or-less complete syntax

Major changes inlude:

* Bit the bullet and now instructions have their length hard-coded
* Move from_utf8 object parsing to be done by their objects (instead of
  a Parser god object)
* A list of AST sections are assembled into an Object using the new
  vm::obj::assemble module.
* Changed the object layout some in the spec, and adjusted code to match
  this.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-02-09 13:04:56 -05:00
parent 329e61e087
commit e198da5825
19 changed files with 739 additions and 311 deletions

View File

@@ -5,6 +5,7 @@ mod common;
mod vm;
use std::{
convert::TryFrom,
io::{self, Read},
process,
};
@@ -18,15 +19,16 @@ fn get_input_string() -> io::Result<String> {
}
fn main() -> Result<()> {
use vm::syn::parser::ProgramParser;
use vm::obj::syn::parser::SectionsParser;
let contents = get_input_string()?;
let ast = match ProgramParser::new().parse(&contents) {
let ast = match SectionsParser::new().parse(&contents) {
Ok(ast) => ast,
Err(err) => {
eprintln!("{}", err);
process::exit(1);
},
};
println!("{:#?}", ast);
let obj = vm::obj::obj::Object::try_from(&ast)?;
println!("{:#?}", obj);
Ok(())
}

View File

@@ -2,13 +2,14 @@ macro_rules! instructions {
{
$($variant:ident = $value:expr),* $(,)?
} => {
pub type InstOp = u16;
$(
pub const $variant: InstOp = $value;
)*
};
}
pub type InstOp = u16;
instructions! {
ADD = 0x0000,
@@ -37,3 +38,37 @@ instructions! {
HALT = 0xF000,
NOP = 0xF001,
}
pub fn inst_len(op: InstOp) -> usize {
match op {
// 2 bytes
INEG
| INV
| NOT
| HALT
| NOP => 2,
// 4 bytes
ADD
| MUL
| DIV
| MOD
| AND
| OR
| XOR
| SHL
| SHR
| CMPEQ
| CMPLT
| JMP
| JZ
| JNZ
| LOAD
| REGCOPY
| MEMCOPY
| STORE => 4,
// Immediates - 4+ bytes
STOREIMM64 => 16,
STOREIMM32 => 8,
_ => panic!("unknown instruction op 0x{:04x}", op),
}
}

View File

@@ -4,6 +4,5 @@ pub mod inst;
pub mod mem;
pub mod obj;
pub mod reg;
pub mod syn;
mod tick;
pub mod vm;

View File

@@ -0,0 +1,19 @@
use snafu::Snafu;
use std::{
fmt::Debug,
io,
};
#[derive(Debug, Snafu)]
pub enum AssembleError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
}
pub type Result<T, E = AssembleError> = std::result::Result<T, E>;

View File

344
src/vm/obj/assemble/mod.rs Normal file
View File

@@ -0,0 +1,344 @@
pub mod error;
use crate::vm::{
inst::*,
obj::{assemble::error::*, obj::*, syn::ast::*},
reg::Reg,
};
use byteorder::{WriteBytesExt, LE};
use std::{
collections::{HashMap, HashSet},
convert::TryFrom,
io::Cursor,
mem,
};
pub const LAYOUT_VERSION: u32 = 0;
impl TryFrom<&'_ Vec<SectionBlock>> for Object {
type Error = AssembleError;
fn try_from(other: &Vec<SectionBlock>) -> Result<Self, Self::Error> {
// Assemble an AST to an object
Assemble::new(&other).assemble()
}
}
pub struct Assemble<'a> {
ast: &'a Vec<SectionBlock>,
symbols: SymbolTable,
}
impl<'a> Assemble<'a> {
pub fn new(ast: &'a Vec<SectionBlock>) -> Self {
Assemble {
ast,
symbols: Default::default(),
}
}
pub fn assemble(&mut self) -> Result<Object> {
let mut pos = 0;
let mut sections = Vec::new();
// gather global symbols
for block in self.ast.iter() {
let exports = Self::gather_symbols(block, true)?;
// check if there are any duplicated exports
{
let export_keys = exports.keys().collect::<HashSet<&String>>();
let global_keys = self.symbols.globals().keys().collect::<HashSet<&String>>();
if let Some(key) = export_keys.intersection(&global_keys).next() {
return Err(AssembleError::DuplicateExportName {
name: key.to_string(),
});
}
}
self.symbols.globals_mut().extend(exports);
}
for block in self.ast.iter() {
let locals = Self::gather_symbols(block, false)?;
self.symbols.replace_locals(locals);
match block {
SectionBlock::Data { org, body } | SectionBlock::Code { org, body } => {
let mut bytes = Vec::new();
for line in body {
match line {
Line::Inst(inst) => {
bytes.extend(self.assemble_inst(inst));
}
Line::LabelDef(_) => { /* no-op */ }
Line::ImmValue(value) => {
let value =
self.get_value(value).expect("TODO : value label not found");
bytes.extend(&value.to_le_bytes());
}
Line::Export(_) => { /* no-op */ }
}
}
let (start, end) = match org {
Some(SectionOrg::Start(start)) => (*start, start + bytes.len() as u64),
Some(SectionOrg::Range(start, end)) => (*start, *end),
None => (pos, pos + bytes.len() as u64),
};
pos = end;
let section = match block {
SectionBlock::Data { .. } => Section::Data {
start,
end,
contents: bytes,
},
SectionBlock::Code { .. } => Section::Code {
start,
end,
contents: bytes,
},
SectionBlock::Meta { .. } => unreachable!(),
};
sections.push(section);
}
SectionBlock::Meta { entries } => {
let entries = entries
.iter()
.map(|(name, value)| {
(
name.to_string(),
self.get_value(value).expect("TODO : value label not found"),
)
})
.collect();
sections.push(Section::Meta { entries });
}
}
}
Ok(Object {
version: LAYOUT_VERSION,
sections,
})
}
fn gather_symbols(block: &SectionBlock, export: bool) -> Result<HashMap<String, u64>> {
match block {
SectionBlock::Data { body, .. } | SectionBlock::Code { body, .. } => {
let mut exports = HashSet::new();
let mut labels = HashMap::new();
let mut pos = 0;
for line in body.iter() {
match line {
Line::Inst(inst) => {
pos += inst.len();
}
Line::LabelDef(label) => {
if labels.contains_key(label) {
return Err(AssembleError::DuplicateName {
name: label.to_string(),
});
} else {
labels.insert(label.to_string(), pos as u64);
}
}
Line::ImmValue(_) => {
pos += 8;
}
Line::Export(name) => {
if export {
exports.insert(name);
}
}
}
}
// only return exports if specified
if export {
labels.retain(|k, _| exports.contains(k));
}
Ok(labels)
}
SectionBlock::Meta { .. } => Ok(Default::default()),
}
}
fn get_value(&self, value: &ImmValue) -> Option<u64> {
match value {
ImmValue::Number(n) => Some(*n),
ImmValue::Label(s) => self.symbols.get(s),
}
}
fn assemble_inst(&self, inst: &Inst) -> Vec<u8> {
let mut builder = InstBuilder::default();
builder = match inst {
Inst::Add(r1, r2) => builder.op(ADD).r1(*r1).r2(*r2),
Inst::Mul(r1, r2) => builder.op(MUL).r1(*r1).r2(*r2),
Inst::Div(r1, r2) => builder.op(DIV).r1(*r1).r2(*r2),
Inst::Mod(r1, r2) => builder.op(MOD).r1(*r1).r2(*r2),
Inst::INeg(r1) => builder.op(INEG).r1(*r1),
Inst::And(r1, r2) => builder.op(AND).r1(*r1).r2(*r2),
Inst::Or(r1, r2) => builder.op(OR).r1(*r1).r2(*r2),
Inst::Inv(r1) => builder.op(INV).r1(*r1),
Inst::Not(r1) => builder.op(NOT).r1(*r1),
Inst::Xor(r1, r2) => builder.op(XOR).r1(*r1).r2(*r2),
Inst::Shl(r1, r2) => builder.op(SHL).r1(*r1).r2(*r2),
Inst::Shr(r1, r2) => builder.op(SHR).r1(*r1).r2(*r2),
Inst::CmpEq(r1, r2) => builder.op(CMPEQ).r1(*r1).r2(*r2),
Inst::CmpLt(r1, r2) => builder.op(CMPLT).r1(*r1).r2(*r2),
Inst::Jmp(r1) => builder.op(JMP).r1(*r1),
Inst::Jz(r1) => builder.op(JZ).r1(*r1),
Inst::Jnz(r1) => builder.op(JNZ).r1(*r1),
Inst::Load(r1, r2) => builder.op(LOAD).r1(*r1).r2(*r2),
Inst::Store(r1, r2) => builder.op(STORE).r1(*r1).r2(*r2),
Inst::StoreImm(r1, imm) => {
let imm = match imm {
ImmValue::Number(num) => *num,
ImmValue::Label(name) => {
self.symbols.get(name).expect("TODO: value label not found")
}
};
if imm <= (u32::max_value() as u64) {
builder.op(STOREIMM32).r1(*r1).imm32(imm as u32)
} else {
builder.op(STOREIMM64).r1(*r1).imm64(imm)
}
}
Inst::MemCopy(r1, r2) => builder.op(MEMCOPY).r1(*r1).r2(*r2),
Inst::RegCopy(r1, r2) => builder.op(REGCOPY).r1(*r1).r2(*r2),
Inst::Nop => builder.op(NOP),
Inst::Halt => builder.op(HALT),
};
builder.finish()
}
}
#[derive(Debug, Default)]
struct InstBuilder {
op: Option<InstOp>,
r1: Option<Reg>,
r2: Option<Reg>,
imm32: Option<u32>,
imm64: Option<u64>,
}
impl InstBuilder {
fn op(mut self, op: InstOp) -> Self {
self.op = Some(op);
self
}
fn r1(mut self, r1: Reg) -> Self {
self.r1 = Some(r1);
self
}
fn r2(mut self, r2: Reg) -> Self {
self.r2 = Some(r2);
self
}
fn imm32(mut self, imm32: u32) -> Self {
self.imm32 = Some(imm32);
self
}
fn imm64(mut self, imm64: u64) -> Self {
self.imm64 = Some(imm64);
self
}
fn finish(self) -> Vec<u8> {
let mut cursor = Cursor::new(Vec::new());
let InstBuilder {
op,
r1,
r2,
imm32,
imm64,
} = self;
let op = op.expect("no op specified");
cursor.write_u16::<LE>(op).unwrap();
match (r1, r2, imm32, imm64) {
(Some(r1), Some(r2), None, None) => {
let tail = ((r1 as u16) << 10) | ((r2 as u16) << 4);
cursor.write_u16::<LE>(tail).unwrap();
}
(Some(r1), None, None, None) => {
let tail = (r1 as u16) << 10;
cursor.write_u16::<LE>(tail).unwrap();
}
(Some(r1), None, Some(imm32), None) => {
let tail = (r1 as u16) << 10;
cursor.write_u16::<LE>(tail).unwrap();
cursor.write_u32::<LE>(imm32).unwrap();
}
(Some(r1), None, None, Some(imm64)) => {
let tail = (r1 as u16) << 10;
cursor.write_u16::<LE>(tail).unwrap();
cursor.write_u32::<LE>(0).unwrap();
cursor.write_u64::<LE>(imm64).unwrap();
}
(_, _, _, _) if op == HALT || op == NOP => {}
(_, _, _, _) => {
panic!(
r#"invalid instruction combo for opcode 0x{:04x}:
r1 : {:?}
r2 : {:?}
imm32 : {:?}
imm64 : {:?}"#,
op, r1, r2, imm32, imm64
);
}
}
cursor.into_inner()
}
}
#[derive(Debug, Clone, Default)]
struct SymbolTable {
globals: HashMap<String, u64>,
locals: HashMap<String, u64>,
}
impl SymbolTable {
pub fn new() -> Self {
Default::default()
}
pub fn globals(&self) -> &HashMap<String, u64> {
&self.globals
}
pub fn locals(&self) -> &HashMap<String, u64> {
&self.locals
}
pub fn globals_mut(&mut self) -> &mut HashMap<String, u64> {
&mut self.globals
}
pub fn locals_mut(&mut self) -> &mut HashMap<String, u64> {
&mut self.locals
}
pub fn insert_global(&mut self, name: String, value: u64) -> Option<u64> {
self.globals_mut().insert(name, value)
}
pub fn insert_local(&mut self, name: String, value: u64) -> Option<u64> {
self.locals_mut().insert(name, value)
}
pub fn replace_locals(&mut self, locals: HashMap<String, u64>) -> HashMap<String, u64> {
mem::replace(self.locals_mut(), locals)
}
pub fn get(&self, name: &String) -> Option<u64> {
self.locals
.get(name)
.or_else(|| self.globals.get(name))
.copied()
}
}

View File

@@ -1,42 +0,0 @@
use snafu::Snafu;
use std::{
fmt::Debug,
io,
};
#[derive(Debug, Snafu)]
pub enum ParseError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
#[snafu(display("wrong magic number"))]
WrongMagic,
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
UnknownSectionKind { kind: u8 },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
}
macro_rules! into_parse_error {
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T> = std::result::Result<T, ParseError>;

View File

@@ -1,3 +1,3 @@
pub mod assemble;
pub mod syn;
pub mod obj;
pub mod parser;
pub mod error;

View File

@@ -1,21 +1,38 @@
use crate::vm::obj::error::ParseError;
use byteorder::{ReadBytesExt, LE};
use crate::vm::obj::syn::error::{Result, ParseError};
use std::{
convert::TryFrom,
collections::HashMap,
convert::{TryFrom, TryInto},
fmt::Debug,
io::{Cursor, Read},
};
pub const MAGIC: u64 = 0xDEAD_BEA7_BA5E_BA11;
const OBJECT_HEADER_LEN: usize = 16; // 8 + 4 + 4
#[derive(Debug)]
pub struct Object {
pub header: Header,
pub sections: Vec<Box<dyn Section>>,
pub version: u32,
pub sections: Vec<Section>,
}
#[derive(Debug, Clone, Copy)]
pub struct Header {
pub version: u16,
pub sections: u16,
impl Object {
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let mut cursor = Cursor::new(bytes);
let magic = cursor.read_u64::<LE>()?;
if magic != magic {
return Err(ParseError::WrongMagic);
}
let version = cursor.read_u32::<LE>()?;
let section_count = cursor.read_u32::<LE>()?;
let mut sections = Vec::new();
for _ in 0 .. section_count {
let section = Section::from_bytes(&mut cursor)?;
sections.push(section);
}
Ok(Object { version, sections })
}
}
macro_rules! section_kind {
@@ -25,7 +42,7 @@ macro_rules! section_kind {
}
) => {
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum $enum_name {
$($name = $value),*
@@ -54,54 +71,79 @@ section_kind! {
}
}
pub trait Section: Debug {
fn header(&self) -> SectionHeader;
}
#[derive(Debug, Clone, Copy)]
pub struct SectionHeader {
pub kind: SectionKind,
pub checksum: u32,
pub len: u64,
}
#[derive(Debug, Clone)]
pub struct DataSection {
pub header: SectionHeader,
pub load_location: u64,
pub contents: Vec<u8>,
pub enum Section {
Data {
start: u64,
end: u64,
contents: Vec<u8>,
},
Code {
start: u64,
end: u64,
contents: Vec<u8>,
},
Meta {
entries: HashMap<String, u64>,
},
}
impl Section for DataSection {
fn header(&self) -> SectionHeader {
self.header
impl Section {
fn from_bytes(cursor: &mut Cursor<&[u8]>) -> Result<Self> {
let len = cursor.read_u64::<LE>()?;
let start = cursor.position() as usize;
let end = start + len as usize;
let bytes = &cursor.get_ref()[start .. end];
let kind: SectionKind = cursor.read_u8()?.try_into()?;
match kind {
SectionKind::Data => Section::data_section_from_bytes(bytes),
SectionKind::Code => Section::code_section_from_bytes(bytes),
SectionKind::Meta => Section::meta_section_from_bytes(bytes),
}
}
fn data_section_from_bytes(bytes: &[u8]) -> Result<Self> {
let mut cursor = Cursor::new(bytes);
let start = cursor.read_u64::<LE>()?;
let end = cursor.read_u64::<LE>()?;
let contents = &bytes[cursor.position() as usize..];
Ok(Section::Data {
start,
end,
contents: From::from(contents),
})
}
fn code_section_from_bytes(bytes: &[u8]) -> Result<Self> {
let mut cursor = Cursor::new(bytes);
let start = cursor.read_u64::<LE>()?;
let end = cursor.read_u64::<LE>()?;
let contents = &bytes[cursor.position() as usize..];
Ok(Section::Code {
start,
end,
contents: From::from(contents),
})
}
fn meta_section_from_bytes(bytes: &[u8]) -> Result<Self> {
let mut cursor = Cursor::new(bytes);
let entry_count = cursor.read_u64::<LE>()?;
let mut entries = HashMap::new();
for _ in 0 .. entry_count {
// key
let key_len = cursor.read_u64::<LE>()?;
let mut key_bytes = vec![0u8; key_len as usize];
cursor.read_exact(&mut key_bytes)?;
let key = String::from_utf8(key_bytes)?;
// value
let value = cursor.read_u64::<LE>()?;
entries.insert(key, value);
}
Ok(Section::Meta {
entries
})
}
}
#[derive(Debug, Clone)]
pub struct CodeSection {
pub header: SectionHeader,
pub load_location: u64,
pub contents: Vec<u8>,
}
impl Section for CodeSection {
fn header(&self) -> SectionHeader {
self.header
}
}
#[derive(Debug, Clone)]
pub struct MetaSection {
pub header: SectionHeader,
pub entry_count: u64,
pub entries: Vec<(String, Vec<u8>)>,
}
impl Section for MetaSection {
fn header(&self) -> SectionHeader {
self.header
}
}

View File

@@ -1,108 +0,0 @@
use byteorder::{ReadBytesExt, LE};
use crate::vm::obj::{
obj::*,
error::*,
};
use std::{
convert::TryInto,
fmt::Debug,
io::{Cursor, Read},
};
#[derive(Debug, Clone)]
pub struct ObjectParser {
bytes: Cursor<Vec<u8>>,
}
impl ObjectParser {
pub fn parse(&mut self) -> Result<Object> {
let header = self.parse_header()?;
let sections = self.parse_sections(header)?;
Ok(Object { header, sections })
}
fn parse_header(&mut self) -> Result<Header> {
let magic = self.bytes.read_u64::<LE>()?;
if magic != magic {
return Err(ParseError::WrongMagic);
}
let version = self.bytes.read_u16::<LE>()?;
let sections = self.bytes.read_u16::<LE>()?;
Ok(Header { version, sections })
}
fn parse_sections(&mut self, header: Header) -> Result<Vec<Box<dyn Section>>> {
(0..header.sections)
.map(|_| self.parse_section())
.collect()
}
fn parse_section(&mut self) -> Result<Box<dyn Section>> {
let header = self.parse_section_header()?;
let section: Box<dyn Section> = match header.kind {
SectionKind::Data => self.parse_data_section(header).map(Box::new)?,
SectionKind::Code => self.parse_code_section(header).map(Box::new)?,
SectionKind::Meta => self.parse_meta_section(header).map(Box::new)?,
};
Ok(section)
}
fn parse_section_header(&mut self) -> Result<SectionHeader> {
let kind: SectionKind = self.bytes.read_u8()?.try_into()?;
let checksum = self.bytes.read_u32::<LE>()?;
let len = self.bytes.read_u64::<LE>()?;
Ok(SectionHeader {
kind,
checksum,
len,
})
}
fn parse_data_section(&mut self, header: SectionHeader) -> Result<DataSection> {
let load_location = self.bytes.read_u64::<LE>()?;
let contents = self.take_bytes(header.len)?;
Ok(DataSection {
header,
load_location,
contents,
})
}
fn parse_code_section(&mut self, header: SectionHeader) -> Result<CodeSection> {
let load_location = self.bytes.read_u64::<LE>()?;
let contents = self.take_bytes(header.len)?;
Ok(CodeSection {
header,
load_location,
contents,
})
}
fn parse_meta_section(&mut self, header: SectionHeader) -> Result<MetaSection> {
let entry_count = self.bytes.read_u64::<LE>()?;
let mut entries = Vec::with_capacity(entry_count as usize);
for _ in 0 .. entry_count {
let key_bytes = self.parse_sized_data()?;
let key = String::from_utf8(key_bytes)?;
let value = self.parse_sized_data()?;
entries.push((key, value));
}
Ok(MetaSection {
header,
entry_count,
entries,
})
}
fn parse_sized_data(&mut self) -> Result<Vec<u8>> {
let size = self.bytes.read_u64::<LE>()?;
self.take_bytes(size)
}
fn take_bytes(&mut self, count: u64) -> Result<Vec<u8>> {
let mut contents = vec!(0u8; count as usize);
self.bytes.read_exact(&mut contents)?;
Ok(contents)
}
}

114
src/vm/obj/syn/ast.rs Normal file
View File

@@ -0,0 +1,114 @@
use crate::vm::{reg::Reg, inst::*};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SectionBlock {
Data {
org: Option<SectionOrg>,
body: Vec<Line>,
},
Code {
org: Option<SectionOrg>,
body: Vec<Line>,
},
Meta {
entries: Vec<(String, ImmValue)>,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SectionOrg {
Start(u64),
Range(u64, u64),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Line {
Inst(Inst),
LabelDef(String),
ImmValue(ImmValue),
Export(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ImmValue {
Number(u64),
Label(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Inst {
Add(Reg, Reg),
Mul(Reg, Reg),
Div(Reg, Reg),
Mod(Reg, Reg),
INeg(Reg),
And(Reg, Reg),
Or(Reg, Reg),
Inv(Reg),
Not(Reg),
Xor(Reg, Reg),
Shl(Reg, Reg),
Shr(Reg, Reg),
CmpEq(Reg, Reg),
CmpLt(Reg, Reg),
Jmp(Reg),
Jz(Reg),
Jnz(Reg),
Load(Reg, Reg),
Store(Reg, Reg),
StoreImm(Reg, ImmValue),
MemCopy(Reg, Reg),
RegCopy(Reg, Reg),
Nop,
Halt,
}
impl Inst {
pub fn op(&self) -> InstOp {
match self {
Inst::Add(_, _) => { ADD }
Inst::Mul(_, _) => { MUL }
Inst::Div(_, _) => { DIV }
Inst::Mod(_, _) => { MOD }
Inst::INeg(_) => { INEG }
Inst::And(_, _) => { AND }
Inst::Or(_, _) => { OR }
Inst::Inv(_) => { INV }
Inst::Not(_) => { NOT }
Inst::Xor(_, _) => { XOR }
Inst::Shl(_, _) => { SHL }
Inst::Shr(_, _) => { SHR }
Inst::CmpEq(_, _) => { CMPEQ }
Inst::CmpLt(_, _) => { CMPLT }
Inst::Jmp(_) => { JMP }
Inst::Jz(_) => { JZ }
Inst::Jnz(_) => { JNZ }
Inst::Load(_, _) => { LOAD }
Inst::Store(_, _) => { STORE }
Inst::StoreImm(_, imm) => {
if let ImmValue::Number(imm) = imm {
if *imm > (u32::max_value() as u64) {
STOREIMM64
} else {
STOREIMM32
}
} else {
STOREIMM64
}
}
Inst::MemCopy(_, _) => { MEMCOPY }
Inst::RegCopy(_, _) => { REGCOPY }
Inst::Nop => { NOP }
Inst::Halt => { HALT }
}
}
pub fn len(&self) -> usize {
inst_len(self.op())
}
}

View File

@@ -0,0 +1,2 @@
struct GetLayout {
}

47
src/vm/obj/syn/error.rs Normal file
View File

@@ -0,0 +1,47 @@
use snafu::Snafu;
use std::{
fmt::Debug,
io,
};
#[derive(Debug, Snafu)]
pub enum ParseError {
#[snafu(display("IO error: {}", source))]
Io { source: io::Error },
#[snafu(display("wrong magic number"))]
WrongMagic,
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
UnknownSectionKind { kind: u8 },
#[snafu(display("invalid UTF-8 string: {}", source))]
InvalidUtf8String { source: std::string::FromUtf8Error },
#[snafu(display("duplicate symbol name: {}", name))]
DuplicateName { name: String },
#[snafu(display("duplicate exported symbol name: {}", name))]
DuplicateExportName { name: String },
}
macro_rules! into_parse_error {
(
$($type:ty : $variant:ident),* $(,)?
) => {
$(
impl From<$type> for ParseError {
fn from(other: $type) -> Self {
ParseError::$variant { source: other }
}
}
)*
}
}
into_parse_error! {
io::Error: Io,
std::string::FromUtf8Error: InvalidUtf8String,
}
pub type Result<T, E = ParseError> = std::result::Result<T, E>;

5
src/vm/obj/syn/mod.rs Normal file
View File

@@ -0,0 +1,5 @@
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser, "/vm/obj/syn/parser.rs");
pub mod ast;
pub mod error;

View File

@@ -1,6 +1,6 @@
use std::str::FromStr;
use crate::vm::{
syn::ast::*,
obj::syn::ast::*,
reg::*,
};
@@ -10,9 +10,11 @@ LabelDef: String = {
<Label> ":" => <>
}
Value: Value = {
<Label> => Value::Label(<>),
<Number> => Value::Number(<>),
// TODO : Value (ImmValue, String)
ImmValue: ImmValue = {
<Label> => ImmValue::Label(<>),
<Number> => ImmValue::Number(<>),
}
Label: String = {
@@ -56,24 +58,43 @@ Inst: Inst = {
"jnz" <d:Reg> => Inst::Jnz(d),
"load" <d:Reg> "," <s:Reg> => Inst::Load(d, s),
"store" <d:Reg> "," <s:Reg> => Inst::Store(d, s),
"storeimm" <d:Reg> "," <s:Value> => Inst::StoreImm(d, s),
"storeimm" <d:Reg> "," <s:ImmValue> => Inst::StoreImm(d, s),
"memcopy" <d:Reg> "," <s:Reg> => Inst::MemCopy(d, s),
"regcopy" <d:Reg> "," <s:Reg> => Inst::RegCopy(d, s),
"nop" => Inst::Nop,
"halt" => Inst::Halt,
}
Directive: Directive = {
r"\.section" <s:Label> => Directive::Section(s.to_string()),
r"\.org" <v:Value> => Directive::Org(v),
}
Line: Line = {
<Directive> => Line::Directive(<>),
<Inst> => Line::Inst(<>),
<LabelDef> => Line::LabelDef(<>),
<ImmValue> => Line::ImmValue(<>),
r"\.export" <Label> => Line::Export(<>),
}
pub Program: Vec<Line> = {
<Line*> => <>,
MetaLine: (String, ImmValue) = {
<name:Label> ":" <value:ImmValue> => (name, value),
}
SectionOrg: SectionOrg = {
<start:Number> => SectionOrg::Start(start),
<start:Number> r"\.\." <end:Number> => SectionOrg::Range(start, end),
}
Section: SectionBlock = {
"data" <org:SectionOrg?> "{" <body:Line*> "}" => {
SectionBlock::Data { org, body }
},
"code" <org:SectionOrg?> "{" <body:Line*> "}" => {
SectionBlock::Code { org, body }
},
"meta" "{" <entries:MetaLine*> "}" => {
SectionBlock::Meta { entries, }
}
}
pub Sections: Vec<SectionBlock> = {
<Section*> => <>,
}

View File

@@ -1,49 +0,0 @@
use crate::vm::reg::Reg;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Line {
Directive(Directive),
Inst(Inst),
LabelDef(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Directive {
Section(String),
Org(Value),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Value {
Number(u64),
Label(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Inst {
Add(Reg, Reg),
Mul(Reg, Reg),
Div(Reg, Reg),
Mod(Reg, Reg),
INeg(Reg),
And(Reg, Reg),
Or(Reg, Reg),
Xor(Reg, Reg),
Shl(Reg, Reg),
Shr(Reg, Reg),
CmpEq(Reg, Reg),
CmpLt(Reg, Reg),
Jmp(Reg),
Jz(Reg),
Jnz(Reg),
Load(Reg, Reg),
Store(Reg, Reg),
StoreImm(Reg, Value),
MemCopy(Reg, Reg),
RegCopy(Reg, Reg),
Nop,
Halt,
}

View File

@@ -1,4 +0,0 @@
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser, "/vm/syn/parser.rs");
pub mod ast;

View File

@@ -1,28 +1,35 @@
.section data
data $0x1000 .. $0x2000 {
}
.section code
code $0x0 {
main:
storeimm %r00, $0xDEAD
storeimm %r01, $16
main:
storeimm %r00, $0xDEAD
storeimm %r01, $16
shl %r00, %r01
shl %r00, %r01
storeimm %r01, $0xBEEF
or %r00, %r01
storeimm %r01, $0xBEEF
or %r00, %r01
storeimm %r01, $0xDEADBEEF
storeimm %r01, $0xDEADBEEF
storeimm %r00, failure
storeimm %r01, ok
cmpeq %r00, %r01
storeimm %r00, failure
storeimm %r01, ok
cmpeq %r00, %r01
jz %r00
jz %r00
jmp %r01
jmp %r01
failure:
storeimm %status, $1
halt
failure:
storeimm %status, $1
halt
ok:
ok:
.export main
}
meta {
entry: main
}

24
vm.md
View File

@@ -436,13 +436,10 @@ the object.
The header is composed of:
* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11).
* 16 bits - Version of the file
* 16 bits - The number of sections in the file
* 32 bits - Unused
* 32 bits - Version of the file
* 32 bits - The number of sections in the file
* section descriptions detailed below
Total length: 128 bits
## Sections
The rest of the object is a list of sections. A section's layout is a section header, followed by
@@ -454,23 +451,21 @@ the section contents.
* 0x00 - Data
* 0x10 - Code
* 0xFF - Meta
* 24 bits - Unused
* 32 bits - Checksum of the section
* 64 bits - Length of the section
Total length: 128 bits
### Data section
The data section contains static data that is initialized to some known value.
* 64 bits - load location - where in memory the contents of this section are put.
* 64 bits - section load start - where in memory the content of this section begins
* 64 bits - section load end - where in memory the content of this section ends
### Code section
The code section contains executable code.
* 64 bits - load location - where in memory the contents of this section are put.
* 64 bits - section load start - where in memory the content of this section begins
* 64 bits - section load end - where in memory the content of this section ends
The remaining length of the section is the code itself.
@@ -483,10 +478,9 @@ to other strings. All strings are UTF-8 encoded.
The remaining length of the section are the key-value pairs.
The layout for a key-value pair is the key, followed immediately by the value. The key is always a
string, and the value may be any type of data. A key starts with the length of the string, followed
by the key string itself. A value starts with the length of the data, followed by the value data
itself.
The layout for a key-value pair is the key, followed immediately by the value. The key is a string,
and the value is a 64-bit value. A key starts with the length of the string, followed by the key
string itself. A value is just the 8 bytes of the number.
The meta section should be used to place data that's readable by the VM, but is not used by the
executing program. Data in the meta section is not copied to the program memory.