From 25f89bbc73807c85411f98049b843dc4960dbf23 Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Tue, 28 Jan 2020 18:12:31 -0500 Subject: [PATCH] Initial binary object layout spec and matching impl (sans code) Signed-off-by: Alek Ratzloff --- Cargo.lock | 28 ++++++ Cargo.toml | 1 + src/vm/bin.rs | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/vm/mod.rs | 1 + vm.md | 68 +++++++++++++++ 5 files changed, 330 insertions(+) create mode 100644 src/vm/bin.rs diff --git a/Cargo.lock b/Cargo.lock index c25461c..a0a25ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,6 +154,11 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "doc-comment" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "docopt" version = "1.1.0" @@ -446,6 +451,7 @@ dependencies = [ "lalrpop 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)", "lalrpop-util 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -532,6 +538,25 @@ name = "siphasher" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "snafu" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "snafu-derive" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "string_cache" version = "0.7.5" @@ -658,6 +683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum diff 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" "checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" "checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" +"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" "checksum docopt 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969" "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" "checksum ena 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8944dc8fa28ce4a38f778bd46bf7d923fe73eed5a439398507246c8e017e6f36" @@ -703,6 +729,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)" = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64" "checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0" "checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" +"checksum snafu 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "546db9181bce2aa22ed883c33d65603b76335b4c2533a98289f54265043de7a1" +"checksum snafu-derive 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bdc75da2e0323f297402fd9c8fdba709bb04e4c627cbe31d19a2c91fc8d9f0e2" "checksum string_cache 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "89c058a82f9fd69b1becf8c274f412281038877c553182f1d02eb027045a2d67" "checksum string_cache_codegen 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6" "checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" diff --git a/Cargo.toml b/Cargo.toml index 11509a8..7227cbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ bitflags = "1" byteorder = "1" lalrpop-util = "0.17.2" regex = "*" +snafu = "0.6.2" [build-dependencies] lalrpop = "0.17.2" diff --git a/src/vm/bin.rs b/src/vm/bin.rs new file mode 100644 index 0000000..1e01b21 --- /dev/null +++ b/src/vm/bin.rs @@ -0,0 +1,232 @@ +use crate::vm::inst::Inst; +use byteorder::{ReadBytesExt, LE}; +use snafu::{ensure, Snafu}; +use std::{ + convert::{TryFrom, TryInto}, + fmt::Debug, + io::{self, Cursor, Read}, +}; + +pub const MAGIC: u64 = 0xDEAD_BEA7_BA5E_BA11; + +#[derive(Debug)] +pub struct Bin { + header: Header, + sections: Vec>, +} + +#[derive(Debug, Clone, Copy)] +pub struct Header { + version: u16, + sections: u16, +} + +macro_rules! section_kind { + ( + pub enum $enum_name:ident { + $($name:ident = $value:expr),* $(,)? + } + ) => { + + #[derive(Debug, Clone, Copy)] + #[repr(u8)] + pub enum $enum_name { + $($name = $value),* + } + + impl TryFrom for SectionKind { + type Error = ParseError; + + fn try_from(other: u8) -> std::result::Result { + match other { + $( + $value => Ok($enum_name::$name), + )* + _ => Err(ParseError::UnknownSectionKind { kind: other }), + } + } + } + }; +} + +section_kind! { + pub enum SectionKind { + Data = 0x00, + Code = 0x10, + Meta = 0xFF, + } +} + +pub trait Section: Debug { + fn header(&self) -> SectionHeader; +} + +#[derive(Debug, Clone, Copy)] +pub struct SectionHeader { + kind: SectionKind, + checksum: u32, + len: u64, +} + +#[derive(Debug, Clone)] +pub struct DataSection { + header: SectionHeader, + load_location: u64, + contents: Vec, +} + +impl Section for DataSection { + fn header(&self) -> SectionHeader { + self.header + } +} + +#[derive(Debug, Clone)] +pub struct CodeSection { + header: SectionHeader, + load_location: u64, + contents: Vec, +} + +impl Section for CodeSection { + fn header(&self) -> SectionHeader { + self.header + } +} + +#[derive(Debug, Clone)] +pub struct MetaSection { + header: SectionHeader, + entry_count: u64, + entries: Vec<(String, Vec)>, +} + +impl Section for MetaSection { + fn header(&self) -> SectionHeader { + self.header + } +} + +#[derive(Debug, Clone)] +pub struct BinParser { + bytes: Cursor>, +} + +impl BinParser { + pub fn parse(&mut self) -> Result { + let header = self.parse_header()?; + let sections = self.parse_sections()?; + Ok(Bin { header, sections }) + } + + fn parse_header(&mut self) -> Result
{ + let magic = self.bytes.read_u64::()?; + ensure!(magic == MAGIC, WrongMagic); + let version = self.bytes.read_u16::()?; + let sections = self.bytes.read_u16::()?; + Ok(Header { version, sections }) + } + + fn parse_sections(&mut self) -> Result>> { + todo!() + } + + fn parse_section(&mut self) -> Result> { + let header = self.parse_section_header()?; + let section: Box = match header.kind { + SectionKind::Data => self.parse_data_section(header).map(Box::new)?, + SectionKind::Code => self.parse_code_section(header).map(Box::new)?, + SectionKind::Meta => self.parse_meta_section(header).map(Box::new)?, + }; + Ok(section) + } + + fn parse_section_header(&mut self) -> Result { + let kind: SectionKind = self.bytes.read_u8()?.try_into()?; + let checksum = self.bytes.read_u32::()?; + let len = self.bytes.read_u64::()?; + Ok(SectionHeader { + kind, + checksum, + len, + }) + } + + fn parse_data_section(&mut self, header: SectionHeader) -> Result { + let load_location = self.bytes.read_u64::()?; + let contents = self.take_bytes(header.len)?; + Ok(DataSection { + header, + load_location, + contents, + }) + } + + fn parse_code_section(&mut self, _header: SectionHeader) -> Result { + let _load_location = self.bytes.read_u64::()?; + todo!("instruction parsing") + } + + fn parse_meta_section(&mut self, header: SectionHeader) -> Result { + let entry_count = self.bytes.read_u64::()?; + let mut entries = Vec::with_capacity(entry_count as usize); + for offset in 0 .. entry_count { + let key_bytes = self.parse_sized_data()?; + let key = String::from_utf8(key_bytes)?; + let value = self.parse_sized_data()?; + entries.push((key, value)); + } + Ok(MetaSection { + header, + entry_count, + entries, + }) + } + + fn parse_sized_data(&mut self) -> Result> { + let size = self.bytes.read_u64::()?; + self.take_bytes(size) + } + + fn take_bytes(&mut self, count: u64) -> Result> { + let mut contents = vec!(0u8; count as usize); + self.bytes.read_exact(&mut contents)?; + Ok(contents) + } +} + +#[derive(Debug, Snafu)] +pub enum ParseError { + #[snafu(display("IO error: {}", source))] + Io { source: io::Error }, + + #[snafu(display("wrong magic number"))] + WrongMagic, + + #[snafu(display("unknown section kind: 0x{:02x}", kind))] + UnknownSectionKind { kind: u8 }, + + #[snafu(display("invalid UTF-8 string: {}", source))] + InvalidUtf8String { source: std::string::FromUtf8Error }, +} + +macro_rules! into_parse_error { + ( + $($type:ty : $variant:ident),* $(,)? + ) => { + $( + impl From<$type> for ParseError { + fn from(other: $type) -> Self { + ParseError::$variant { source: other } + } + } + )* + } +} + +into_parse_error! { + io::Error: Io, + std::string::FromUtf8Error: InvalidUtf8String, +} + +pub type Result = std::result::Result; diff --git a/src/vm/mod.rs b/src/vm/mod.rs index f57b9b4..42394b9 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -3,3 +3,4 @@ pub mod inst; pub mod reg; pub mod vm; pub mod flags; +pub mod bin; diff --git a/vm.md b/vm.md index e0e6e0d..f33ea75 100644 --- a/vm.md +++ b/vm.md @@ -195,6 +195,74 @@ wrapping around to 0. * Pop * More immediate stores? +# Binary format + +The binary format is composed of a header followed by sections that make up the content of the blob. + +## Header + +The header is composed of: + +* 64 bits - A magic number (0xDEAD_BEA7_BA5E_BA11). +* 16 bits - Version of the file +* 16 bits - The number of sections in the file +* 32 bits - Unused +* section descriptions detailed below + +Total length: 128 bits + +## Sections + +The rest of the content is a list of sections. A section's layout is a section header, followed by +the section contents. + +### Section header + +* 8 bits - Section kind + * 0x00 - Data + * 0x10 - Code + * 0xFF - Meta +* 24 bits - Unused +* 32 bits - Checksum of the section +* 64 bits - Length of the section + +Total length: 128 bits + +### Data section + +The data section contains static data that is initialized to some known value. + +* 64 bits - load location - where in memory the contents of this section are put. + +### Code section + +The code section contains executable code. + +* 64 bits - load location - where in memory the contents of this section are put. + +The remaining length of the section is the code itself. + +### Meta section + +The meta section holds a table of metadata about the binary in a key-value format of strings mapping +to other strings. All strings are UTF-8 encoded. + +* 64 bits - the number of key-value entries + +The remaining length of the section are the key-value pairs. + +The layout for a key-value pair is the key, followed immediately by the value. The key is always a +string, and the value may be any type of data. A key starts with the length of the string, followed +by the key string itself. A value starts with the length of the data, followed by the value data +itself. + +The meta section should be used to place data that's readable by the VM, but is not used by the +executing program. Data in the meta section is not copied to the program memory. + +A VM must provide support for the following meta-values: + +* `entry` - a 64-bit address for where the VM should begin executing code. + # General TODO * Interrupts