Initial binary object layout spec and matching impl (sans code)
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
28
Cargo.lock
generated
28
Cargo.lock
generated
@@ -154,6 +154,11 @@ dependencies = [
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "doc-comment"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "docopt"
|
||||
version = "1.1.0"
|
||||
@@ -446,6 +451,7 @@ dependencies = [
|
||||
"lalrpop 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lalrpop-util 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -532,6 +538,25 @@ name = "siphasher"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "snafu"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "snafu-derive"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.7.5"
|
||||
@@ -658,6 +683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum diff 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499"
|
||||
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
|
||||
"checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
|
||||
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
|
||||
"checksum docopt 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969"
|
||||
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
||||
"checksum ena 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8944dc8fa28ce4a38f778bd46bf7d923fe73eed5a439398507246c8e017e6f36"
|
||||
@@ -703,6 +729,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)" = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64"
|
||||
"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
|
||||
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
||||
"checksum snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "546db9181bce2aa22ed883c33d65603b76335b4c2533a98289f54265043de7a1"
|
||||
"checksum snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bdc75da2e0323f297402fd9c8fdba709bb04e4c627cbe31d19a2c91fc8d9f0e2"
|
||||
"checksum string_cache 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "89c058a82f9fd69b1becf8c274f412281038877c553182f1d02eb027045a2d67"
|
||||
"checksum string_cache_codegen 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6"
|
||||
"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
|
||||
|
||||
@@ -13,6 +13,7 @@ bitflags = "1"
|
||||
byteorder = "1"
|
||||
lalrpop-util = "0.17.2"
|
||||
regex = "*"
|
||||
snafu = "0.6.2"
|
||||
|
||||
[build-dependencies]
|
||||
lalrpop = "0.17.2"
|
||||
|
||||
232
src/vm/bin.rs
Normal file
232
src/vm/bin.rs
Normal file
@@ -0,0 +1,232 @@
|
||||
use crate::vm::inst::Inst;
|
||||
use byteorder::{ReadBytesExt, LE};
|
||||
use snafu::{ensure, Snafu};
|
||||
use std::{
|
||||
convert::{TryFrom, TryInto},
|
||||
fmt::Debug,
|
||||
io::{self, Cursor, Read},
|
||||
};
|
||||
|
||||
pub const MAGIC: u64 = 0xDEAD_BEA7_BA5E_BA11;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Bin {
|
||||
header: Header,
|
||||
sections: Vec<Box<dyn Section>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Header {
|
||||
version: u16,
|
||||
sections: u16,
|
||||
}
|
||||
|
||||
macro_rules! section_kind {
|
||||
(
|
||||
pub enum $enum_name:ident {
|
||||
$($name:ident = $value:expr),* $(,)?
|
||||
}
|
||||
) => {
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(u8)]
|
||||
pub enum $enum_name {
|
||||
$($name = $value),*
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for SectionKind {
|
||||
type Error = ParseError;
|
||||
|
||||
fn try_from(other: u8) -> std::result::Result<Self, Self::Error> {
|
||||
match other {
|
||||
$(
|
||||
$value => Ok($enum_name::$name),
|
||||
)*
|
||||
_ => Err(ParseError::UnknownSectionKind { kind: other }),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
section_kind! {
|
||||
pub enum SectionKind {
|
||||
Data = 0x00,
|
||||
Code = 0x10,
|
||||
Meta = 0xFF,
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Section: Debug {
|
||||
fn header(&self) -> SectionHeader;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SectionHeader {
|
||||
kind: SectionKind,
|
||||
checksum: u32,
|
||||
len: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DataSection {
|
||||
header: SectionHeader,
|
||||
load_location: u64,
|
||||
contents: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Section for DataSection {
|
||||
fn header(&self) -> SectionHeader {
|
||||
self.header
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CodeSection {
|
||||
header: SectionHeader,
|
||||
load_location: u64,
|
||||
contents: Vec<Inst>,
|
||||
}
|
||||
|
||||
impl Section for CodeSection {
|
||||
fn header(&self) -> SectionHeader {
|
||||
self.header
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetaSection {
|
||||
header: SectionHeader,
|
||||
entry_count: u64,
|
||||
entries: Vec<(String, Vec<u8>)>,
|
||||
}
|
||||
|
||||
impl Section for MetaSection {
|
||||
fn header(&self) -> SectionHeader {
|
||||
self.header
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BinParser {
|
||||
bytes: Cursor<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl BinParser {
|
||||
pub fn parse(&mut self) -> Result<Bin> {
|
||||
let header = self.parse_header()?;
|
||||
let sections = self.parse_sections()?;
|
||||
Ok(Bin { header, sections })
|
||||
}
|
||||
|
||||
fn parse_header(&mut self) -> Result<Header> {
|
||||
let magic = self.bytes.read_u64::<LE>()?;
|
||||
ensure!(magic == MAGIC, WrongMagic);
|
||||
let version = self.bytes.read_u16::<LE>()?;
|
||||
let sections = self.bytes.read_u16::<LE>()?;
|
||||
Ok(Header { version, sections })
|
||||
}
|
||||
|
||||
fn parse_sections(&mut self) -> Result<Vec<Box<dyn Section>>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_section(&mut self) -> Result<Box<dyn Section>> {
|
||||
let header = self.parse_section_header()?;
|
||||
let section: Box<dyn Section> = match header.kind {
|
||||
SectionKind::Data => self.parse_data_section(header).map(Box::new)?,
|
||||
SectionKind::Code => self.parse_code_section(header).map(Box::new)?,
|
||||
SectionKind::Meta => self.parse_meta_section(header).map(Box::new)?,
|
||||
};
|
||||
Ok(section)
|
||||
}
|
||||
|
||||
fn parse_section_header(&mut self) -> Result<SectionHeader> {
|
||||
let kind: SectionKind = self.bytes.read_u8()?.try_into()?;
|
||||
let checksum = self.bytes.read_u32::<LE>()?;
|
||||
let len = self.bytes.read_u64::<LE>()?;
|
||||
Ok(SectionHeader {
|
||||
kind,
|
||||
checksum,
|
||||
len,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_data_section(&mut self, header: SectionHeader) -> Result<DataSection> {
|
||||
let load_location = self.bytes.read_u64::<LE>()?;
|
||||
let contents = self.take_bytes(header.len)?;
|
||||
Ok(DataSection {
|
||||
header,
|
||||
load_location,
|
||||
contents,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_code_section(&mut self, _header: SectionHeader) -> Result<CodeSection> {
|
||||
let _load_location = self.bytes.read_u64::<LE>()?;
|
||||
todo!("instruction parsing")
|
||||
}
|
||||
|
||||
fn parse_meta_section(&mut self, header: SectionHeader) -> Result<MetaSection> {
|
||||
let entry_count = self.bytes.read_u64::<LE>()?;
|
||||
let mut entries = Vec::with_capacity(entry_count as usize);
|
||||
for offset in 0 .. entry_count {
|
||||
let key_bytes = self.parse_sized_data()?;
|
||||
let key = String::from_utf8(key_bytes)?;
|
||||
let value = self.parse_sized_data()?;
|
||||
entries.push((key, value));
|
||||
}
|
||||
Ok(MetaSection {
|
||||
header,
|
||||
entry_count,
|
||||
entries,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_sized_data(&mut self) -> Result<Vec<u8>> {
|
||||
let size = self.bytes.read_u64::<LE>()?;
|
||||
self.take_bytes(size)
|
||||
}
|
||||
|
||||
fn take_bytes(&mut self, count: u64) -> Result<Vec<u8>> {
|
||||
let mut contents = vec!(0u8; count as usize);
|
||||
self.bytes.read_exact(&mut contents)?;
|
||||
Ok(contents)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum ParseError {
|
||||
#[snafu(display("IO error: {}", source))]
|
||||
Io { source: io::Error },
|
||||
|
||||
#[snafu(display("wrong magic number"))]
|
||||
WrongMagic,
|
||||
|
||||
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
|
||||
UnknownSectionKind { kind: u8 },
|
||||
|
||||
#[snafu(display("invalid UTF-8 string: {}", source))]
|
||||
InvalidUtf8String { source: std::string::FromUtf8Error },
|
||||
}
|
||||
|
||||
macro_rules! into_parse_error {
|
||||
(
|
||||
$($type:ty : $variant:ident),* $(,)?
|
||||
) => {
|
||||
$(
|
||||
impl From<$type> for ParseError {
|
||||
fn from(other: $type) -> Self {
|
||||
ParseError::$variant { source: other }
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
into_parse_error! {
|
||||
io::Error: Io,
|
||||
std::string::FromUtf8Error: InvalidUtf8String,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, ParseError>;
|
||||
@@ -3,3 +3,4 @@ pub mod inst;
|
||||
pub mod reg;
|
||||
pub mod vm;
|
||||
pub mod flags;
|
||||
pub mod bin;
|
||||
|
||||
68
vm.md
68
vm.md
@@ -195,6 +195,74 @@ wrapping around to 0.
|
||||
* Pop
|
||||
* More immediate stores?
|
||||
|
||||
# Binary format
|
||||
|
||||
The binary format is composed of a header followed by sections that make up the content of the blob.
|
||||
|
||||
## Header
|
||||
|
||||
The header is composed of:
|
||||
|
||||
* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11).
|
||||
* 16 bits - Version of the file
|
||||
* 16 bits - The number of sections in the file
|
||||
* 32 bits - Unused
|
||||
* section descriptions detailed below
|
||||
|
||||
Total length: 128 bits
|
||||
|
||||
## Sections
|
||||
|
||||
The rest of the content is a list of sections. A section's layout is a section header, followed by
|
||||
the section contents.
|
||||
|
||||
### Section header
|
||||
|
||||
* 8 bits - Section kind
|
||||
* 0x00 - Data
|
||||
* 0x10 - Code
|
||||
* 0xFF - Meta
|
||||
* 24 bits - Unused
|
||||
* 32 bits - Checksum of the section
|
||||
* 64 bits - Length of the section
|
||||
|
||||
Total length: 128 bits
|
||||
|
||||
### Data section
|
||||
|
||||
The data section contains static data that is initialized to some known value.
|
||||
|
||||
* 64 bits - load location - where in memory the contents of this section are put.
|
||||
|
||||
### Code section
|
||||
|
||||
The code section contains executable code.
|
||||
|
||||
* 64 bits - load location - where in memory the contents of this section are put.
|
||||
|
||||
The remaining length of the section is the code itself.
|
||||
|
||||
### Meta section
|
||||
|
||||
The meta section holds a table of metadata about the binary in a key-value format of strings mapping
|
||||
to other strings. All strings are UTF-8 encoded.
|
||||
|
||||
* 64 bits - the number of key-value entries
|
||||
|
||||
The remaining length of the section are the key-value pairs.
|
||||
|
||||
The layout for a key-value pair is the key, followed immediately by the value. The key is always a
|
||||
string, and the value may be any type of data. A key starts with the length of the string, followed
|
||||
by the key string itself. A value starts with the length of the data, followed by the value data
|
||||
itself.
|
||||
|
||||
The meta section should be used to place data that's readable by the VM, but is not used by the
|
||||
executing program. Data in the meta section is not copied to the program memory.
|
||||
|
||||
A VM must provide support for the following meta-values:
|
||||
|
||||
* `entry` - a 64-bit address for where the VM should begin executing code.
|
||||
|
||||
# General TODO
|
||||
|
||||
* Interrupts
|
||||
|
||||
Reference in New Issue
Block a user