Initial binary object layout spec and matching impl (sans code)
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
28
Cargo.lock
generated
28
Cargo.lock
generated
@@ -154,6 +154,11 @@ dependencies = [
|
|||||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "doc-comment"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docopt"
|
name = "docopt"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
@@ -446,6 +451,7 @@ dependencies = [
|
|||||||
"lalrpop 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"lalrpop 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"lalrpop-util 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"lalrpop-util 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"regex 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"regex 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -532,6 +538,25 @@ name = "siphasher"
|
|||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "snafu"
|
||||||
|
version = "0.6.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "snafu-derive"
|
||||||
|
version = "0.6.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "string_cache"
|
name = "string_cache"
|
||||||
version = "0.7.5"
|
version = "0.7.5"
|
||||||
@@ -658,6 +683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
"checksum diff 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499"
|
"checksum diff 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499"
|
||||||
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
|
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
|
||||||
"checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
|
"checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
|
||||||
|
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
|
||||||
"checksum docopt 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969"
|
"checksum docopt 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969"
|
||||||
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
||||||
"checksum ena 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8944dc8fa28ce4a38f778bd46bf7d923fe73eed5a439398507246c8e017e6f36"
|
"checksum ena 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8944dc8fa28ce4a38f778bd46bf7d923fe73eed5a439398507246c8e017e6f36"
|
||||||
@@ -703,6 +729,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
"checksum serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)" = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64"
|
"checksum serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)" = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64"
|
||||||
"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
|
"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
|
||||||
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
||||||
|
"checksum snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "546db9181bce2aa22ed883c33d65603b76335b4c2533a98289f54265043de7a1"
|
||||||
|
"checksum snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bdc75da2e0323f297402fd9c8fdba709bb04e4c627cbe31d19a2c91fc8d9f0e2"
|
||||||
"checksum string_cache 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "89c058a82f9fd69b1becf8c274f412281038877c553182f1d02eb027045a2d67"
|
"checksum string_cache 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "89c058a82f9fd69b1becf8c274f412281038877c553182f1d02eb027045a2d67"
|
||||||
"checksum string_cache_codegen 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6"
|
"checksum string_cache_codegen 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6"
|
||||||
"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
|
"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ bitflags = "1"
|
|||||||
byteorder = "1"
|
byteorder = "1"
|
||||||
lalrpop-util = "0.17.2"
|
lalrpop-util = "0.17.2"
|
||||||
regex = "*"
|
regex = "*"
|
||||||
|
snafu = "0.6.2"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
lalrpop = "0.17.2"
|
lalrpop = "0.17.2"
|
||||||
|
|||||||
232
src/vm/bin.rs
Normal file
232
src/vm/bin.rs
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
use crate::vm::inst::Inst;
|
||||||
|
use byteorder::{ReadBytesExt, LE};
|
||||||
|
use snafu::{ensure, Snafu};
|
||||||
|
use std::{
|
||||||
|
convert::{TryFrom, TryInto},
|
||||||
|
fmt::Debug,
|
||||||
|
io::{self, Cursor, Read},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const MAGIC: u64 = 0xDEAD_BEA7_BA5E_BA11;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Bin {
|
||||||
|
header: Header,
|
||||||
|
sections: Vec<Box<dyn Section>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct Header {
|
||||||
|
version: u16,
|
||||||
|
sections: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! section_kind {
|
||||||
|
(
|
||||||
|
pub enum $enum_name:ident {
|
||||||
|
$($name:ident = $value:expr),* $(,)?
|
||||||
|
}
|
||||||
|
) => {
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum $enum_name {
|
||||||
|
$($name = $value),*
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u8> for SectionKind {
|
||||||
|
type Error = ParseError;
|
||||||
|
|
||||||
|
fn try_from(other: u8) -> std::result::Result<Self, Self::Error> {
|
||||||
|
match other {
|
||||||
|
$(
|
||||||
|
$value => Ok($enum_name::$name),
|
||||||
|
)*
|
||||||
|
_ => Err(ParseError::UnknownSectionKind { kind: other }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
section_kind! {
|
||||||
|
pub enum SectionKind {
|
||||||
|
Data = 0x00,
|
||||||
|
Code = 0x10,
|
||||||
|
Meta = 0xFF,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Section: Debug {
|
||||||
|
fn header(&self) -> SectionHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct SectionHeader {
|
||||||
|
kind: SectionKind,
|
||||||
|
checksum: u32,
|
||||||
|
len: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DataSection {
|
||||||
|
header: SectionHeader,
|
||||||
|
load_location: u64,
|
||||||
|
contents: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Section for DataSection {
|
||||||
|
fn header(&self) -> SectionHeader {
|
||||||
|
self.header
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CodeSection {
|
||||||
|
header: SectionHeader,
|
||||||
|
load_location: u64,
|
||||||
|
contents: Vec<Inst>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Section for CodeSection {
|
||||||
|
fn header(&self) -> SectionHeader {
|
||||||
|
self.header
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct MetaSection {
|
||||||
|
header: SectionHeader,
|
||||||
|
entry_count: u64,
|
||||||
|
entries: Vec<(String, Vec<u8>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Section for MetaSection {
|
||||||
|
fn header(&self) -> SectionHeader {
|
||||||
|
self.header
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct BinParser {
|
||||||
|
bytes: Cursor<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BinParser {
|
||||||
|
pub fn parse(&mut self) -> Result<Bin> {
|
||||||
|
let header = self.parse_header()?;
|
||||||
|
let sections = self.parse_sections()?;
|
||||||
|
Ok(Bin { header, sections })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_header(&mut self) -> Result<Header> {
|
||||||
|
let magic = self.bytes.read_u64::<LE>()?;
|
||||||
|
ensure!(magic == MAGIC, WrongMagic);
|
||||||
|
let version = self.bytes.read_u16::<LE>()?;
|
||||||
|
let sections = self.bytes.read_u16::<LE>()?;
|
||||||
|
Ok(Header { version, sections })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_sections(&mut self) -> Result<Vec<Box<dyn Section>>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_section(&mut self) -> Result<Box<dyn Section>> {
|
||||||
|
let header = self.parse_section_header()?;
|
||||||
|
let section: Box<dyn Section> = match header.kind {
|
||||||
|
SectionKind::Data => self.parse_data_section(header).map(Box::new)?,
|
||||||
|
SectionKind::Code => self.parse_code_section(header).map(Box::new)?,
|
||||||
|
SectionKind::Meta => self.parse_meta_section(header).map(Box::new)?,
|
||||||
|
};
|
||||||
|
Ok(section)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_section_header(&mut self) -> Result<SectionHeader> {
|
||||||
|
let kind: SectionKind = self.bytes.read_u8()?.try_into()?;
|
||||||
|
let checksum = self.bytes.read_u32::<LE>()?;
|
||||||
|
let len = self.bytes.read_u64::<LE>()?;
|
||||||
|
Ok(SectionHeader {
|
||||||
|
kind,
|
||||||
|
checksum,
|
||||||
|
len,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_data_section(&mut self, header: SectionHeader) -> Result<DataSection> {
|
||||||
|
let load_location = self.bytes.read_u64::<LE>()?;
|
||||||
|
let contents = self.take_bytes(header.len)?;
|
||||||
|
Ok(DataSection {
|
||||||
|
header,
|
||||||
|
load_location,
|
||||||
|
contents,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_code_section(&mut self, _header: SectionHeader) -> Result<CodeSection> {
|
||||||
|
let _load_location = self.bytes.read_u64::<LE>()?;
|
||||||
|
todo!("instruction parsing")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_meta_section(&mut self, header: SectionHeader) -> Result<MetaSection> {
|
||||||
|
let entry_count = self.bytes.read_u64::<LE>()?;
|
||||||
|
let mut entries = Vec::with_capacity(entry_count as usize);
|
||||||
|
for offset in 0 .. entry_count {
|
||||||
|
let key_bytes = self.parse_sized_data()?;
|
||||||
|
let key = String::from_utf8(key_bytes)?;
|
||||||
|
let value = self.parse_sized_data()?;
|
||||||
|
entries.push((key, value));
|
||||||
|
}
|
||||||
|
Ok(MetaSection {
|
||||||
|
header,
|
||||||
|
entry_count,
|
||||||
|
entries,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_sized_data(&mut self) -> Result<Vec<u8>> {
|
||||||
|
let size = self.bytes.read_u64::<LE>()?;
|
||||||
|
self.take_bytes(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_bytes(&mut self, count: u64) -> Result<Vec<u8>> {
|
||||||
|
let mut contents = vec!(0u8; count as usize);
|
||||||
|
self.bytes.read_exact(&mut contents)?;
|
||||||
|
Ok(contents)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Snafu)]
|
||||||
|
pub enum ParseError {
|
||||||
|
#[snafu(display("IO error: {}", source))]
|
||||||
|
Io { source: io::Error },
|
||||||
|
|
||||||
|
#[snafu(display("wrong magic number"))]
|
||||||
|
WrongMagic,
|
||||||
|
|
||||||
|
#[snafu(display("unknown section kind: 0x{:02x}", kind))]
|
||||||
|
UnknownSectionKind { kind: u8 },
|
||||||
|
|
||||||
|
#[snafu(display("invalid UTF-8 string: {}", source))]
|
||||||
|
InvalidUtf8String { source: std::string::FromUtf8Error },
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! into_parse_error {
|
||||||
|
(
|
||||||
|
$($type:ty : $variant:ident),* $(,)?
|
||||||
|
) => {
|
||||||
|
$(
|
||||||
|
impl From<$type> for ParseError {
|
||||||
|
fn from(other: $type) -> Self {
|
||||||
|
ParseError::$variant { source: other }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
into_parse_error! {
|
||||||
|
io::Error: Io,
|
||||||
|
std::string::FromUtf8Error: InvalidUtf8String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Result<T> = std::result::Result<T, ParseError>;
|
||||||
@@ -3,3 +3,4 @@ pub mod inst;
|
|||||||
pub mod reg;
|
pub mod reg;
|
||||||
pub mod vm;
|
pub mod vm;
|
||||||
pub mod flags;
|
pub mod flags;
|
||||||
|
pub mod bin;
|
||||||
|
|||||||
68
vm.md
68
vm.md
@@ -195,6 +195,74 @@ wrapping around to 0.
|
|||||||
* Pop
|
* Pop
|
||||||
* More immediate stores?
|
* More immediate stores?
|
||||||
|
|
||||||
|
# Binary format
|
||||||
|
|
||||||
|
The binary format is composed of a header followed by sections that make up the content of the blob.
|
||||||
|
|
||||||
|
## Header
|
||||||
|
|
||||||
|
The header is composed of:
|
||||||
|
|
||||||
|
* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11).
|
||||||
|
* 16 bits - Version of the file
|
||||||
|
* 16 bits - The number of sections in the file
|
||||||
|
* 32 bits - Unused
|
||||||
|
* section descriptions detailed below
|
||||||
|
|
||||||
|
Total length: 128 bits
|
||||||
|
|
||||||
|
## Sections
|
||||||
|
|
||||||
|
The rest of the content is a list of sections. A section's layout is a section header, followed by
|
||||||
|
the section contents.
|
||||||
|
|
||||||
|
### Section header
|
||||||
|
|
||||||
|
* 8 bits - Section kind
|
||||||
|
* 0x00 - Data
|
||||||
|
* 0x10 - Code
|
||||||
|
* 0xFF - Meta
|
||||||
|
* 24 bits - Unused
|
||||||
|
* 32 bits - Checksum of the section
|
||||||
|
* 64 bits - Length of the section
|
||||||
|
|
||||||
|
Total length: 128 bits
|
||||||
|
|
||||||
|
### Data section
|
||||||
|
|
||||||
|
The data section contains static data that is initialized to some known value.
|
||||||
|
|
||||||
|
* 64 bits - load location - where in memory the contents of this section are put.
|
||||||
|
|
||||||
|
### Code section
|
||||||
|
|
||||||
|
The code section contains executable code.
|
||||||
|
|
||||||
|
* 64 bits - load location - where in memory the contents of this section are put.
|
||||||
|
|
||||||
|
The remaining length of the section is the code itself.
|
||||||
|
|
||||||
|
### Meta section
|
||||||
|
|
||||||
|
The meta section holds a table of metadata about the binary in a key-value format of strings mapping
|
||||||
|
to other strings. All strings are UTF-8 encoded.
|
||||||
|
|
||||||
|
* 64 bits - the number of key-value entries
|
||||||
|
|
||||||
|
The remaining length of the section are the key-value pairs.
|
||||||
|
|
||||||
|
The layout for a key-value pair is the key, followed immediately by the value. The key is always a
|
||||||
|
string, and the value may be any type of data. A key starts with the length of the string, followed
|
||||||
|
by the key string itself. A value starts with the length of the data, followed by the value data
|
||||||
|
itself.
|
||||||
|
|
||||||
|
The meta section should be used to place data that's readable by the VM, but is not used by the
|
||||||
|
executing program. Data in the meta section is not copied to the program memory.
|
||||||
|
|
||||||
|
A VM must provide support for the following meta-values:
|
||||||
|
|
||||||
|
* `entry` - a 64-bit address for where the VM should begin executing code.
|
||||||
|
|
||||||
# General TODO
|
# General TODO
|
||||||
|
|
||||||
* Interrupts
|
* Interrupts
|
||||||
|
|||||||
Reference in New Issue
Block a user