Add address deref, syntax, and deref sizes

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-02-25 12:07:24 -05:00
parent bdd08c6c5b
commit 5619c9dc87
11 changed files with 308 additions and 99 deletions

View File

@@ -1,31 +1,32 @@
code $0x0 {
.section data $0x1000 {
beef: .u16 $0xBEEF
; TODO(syntax)
; bytes: .u8 [
; $0xEF,
; $0xBE,
; $0xAD,
; $0xDE,
; ]
.export beef
}
.section code $0x0 {
main:
storeimm32 %r00, $0xDEAD
storeimm64 %r01, $16
shl %r00, %r01
storeimm32 %r01, $0xBEEF
or %r00, %r01
storeimm64 %r01, $0xDEADBEEF
cmpeq %r00, %r01
storeimm32 %r00, failure
storeimm64 %r01, ok
jz %r00
jmp %r01
failure:
storeimm32 %status, $1
ok: halt
mov %r0, $0xDEAD
shl %r0, $16
; move 32 bits at 'beef' to %r01
; TODO(syntax)
mov %r1, (beef)u32
or %r0, %r01
cmpeq %r0, $0xDEADBEEF
; jump to the address 'end'
jz end
mov %status, $1
end:
halt
.export main
}
meta {
.meta {
entry: main
}

View File

@@ -1,45 +1,30 @@
data $0x1000 .. $0x1100 {
zstr: .zstring "This is a zero-terminated string"
.section data $0x1000 .. $0x1100 {
zstr: .zstring "This is a string"
str: .string "This is a string"
.export zstr
.export str
}
code $0x0 {
zstr_len:
storeimm32 %r15, $0xFF
storeimm32 %r16, $1
storeimm64 %r20, zstr_next
storeimm64 %r21, exit_zstr
.section code $0x0 {
; Take the length of a zstr without those fancy "function calls" and "stack frames"
; %r0: the input string (in)
; %r1: the return address (in)
; %r2: the length of the string (out)
regcopy %r10, %r00
zstr_next:
load %r11, %r10
and %r11, %r15
cmpeq %r11, %null
jnz %r21
add %r10, %r16
jmp %r20
zstrlen:
mov %r2, %r0
ztrlen_loop:
cmpeq (%r2)u8, $0
zstrlen_end:
jmp %r1
main:
storeimm64 %r05, zstr_len
storeimm64 %r00, zstr
jmp %r05
exit_zstr:
regcopy %status, %r10
halt
ineg %r00
add %r10, %r00
regcopy %status, %r10
end:
halt
.export main
}
meta {
.meta {
entry: main
}

View File

@@ -44,6 +44,6 @@ fn main() -> Result<()> {
let mut state = State::new();
state.load_object(object, 64 * 1024 * 1024)?;
let status = state.exec()?;
println!("exit status: {}", status);
println!("exit status: {:#04x}", status);
Ok(())
}

View File

@@ -134,6 +134,10 @@ pub enum Source {
Addr32(Addr),
Addr16(Addr),
Addr8(Addr),
RegAddr64(Reg),
RegAddr32(Reg),
RegAddr16(Reg),
RegAddr8(Reg),
Reg(Reg),
Imm(u64),
}
@@ -142,6 +146,7 @@ impl Source {
pub fn len(&self) -> usize {
match self {
Source::Addr64(_) | Source::Addr32(_) | Source::Addr16(_) | Source::Addr8(_) => 8,
Source::RegAddr64(_) | Source::RegAddr32(_) | Source::RegAddr16(_) | Source::RegAddr8(_) => 1,
Source::Reg(_) => 1,
Source::Imm(_) => 8,
}
@@ -154,6 +159,10 @@ pub enum Dest {
Addr32(Addr),
Addr16(Addr),
Addr8(Addr),
RegAddr64(Reg),
RegAddr32(Reg),
RegAddr16(Reg),
RegAddr8(Reg),
Reg(Reg),
}
@@ -161,23 +170,35 @@ impl Dest {
pub fn len(&self) -> usize {
match self {
Dest::Addr64(_) | Dest::Addr32(_) | Dest::Addr16(_) | Dest::Addr8(_) => 8,
Dest::RegAddr64(_) | Dest::RegAddr32(_) | Dest::RegAddr16(_) | Dest::RegAddr8(_) => 1,
Dest::Reg(_) => 1,
}
}
}
// TODO : make this an enum
pub const DEST_ADDR64: u8 = 0b0000;
pub const DEST_ADDR32: u8 = 0b0001;
pub const DEST_ADDR16: u8 = 0b0010;
pub const DEST_ADDR8: u8 = 0b0011;
pub const DEST_REG: u8 = 0b0100;
pub const DEST_REG_ADDR64: u8 = 0b0100;
pub const DEST_REG_ADDR32: u8 = 0b0101;
pub const DEST_REG_ADDR16: u8 = 0b0110;
pub const DEST_REG_ADDR8: u8 = 0b0111;
/* immediates - not used, invalid */
pub const DEST_REG: u8 = 0b1100;
pub const SOURCE_ADDR64: u8 = 0b0000;
pub const SOURCE_ADDR32: u8 = 0b0001;
pub const SOURCE_ADDR16: u8 = 0b0010;
pub const SOURCE_ADDR8: u8 = 0b0011;
pub const SOURCE_REG: u8 = 0b0100;
pub const SOURCE_IMM64: u8 = 0b0101;
pub const SOURCE_IMM32: u8 = 0b0110;
pub const SOURCE_IMM16: u8 = 0b0111;
pub const SOURCE_IMM8: u8 = 0b1000;
pub const SOURCE_REG_ADDR64: u8 = 0b0100;
pub const SOURCE_REG_ADDR32: u8 = 0b0101;
pub const SOURCE_REG_ADDR16: u8 = 0b0110;
pub const SOURCE_REG_ADDR8: u8 = 0b0111;
pub const SOURCE_IMM64: u8 = 0b1000;
pub const SOURCE_IMM32: u8 = 0b1001;
pub const SOURCE_IMM16: u8 = 0b1010;
pub const SOURCE_IMM8: u8 = 0b1011;
pub const SOURCE_REG: u8 = 0b1100;

View File

@@ -145,6 +145,10 @@ impl<T> MemCursor<T>
DEST_ADDR32 => Ok(Dest::Addr32(self.next_addr()?)),
DEST_ADDR16 => Ok(Dest::Addr16(self.next_addr()?)),
DEST_ADDR8 => Ok(Dest::Addr8(self.next_addr()?)),
DEST_REG_ADDR64 => Ok(Dest::RegAddr64(self.next_reg()?)),
DEST_REG_ADDR32 => Ok(Dest::RegAddr32(self.next_reg()?)),
DEST_REG_ADDR16 => Ok(Dest::RegAddr16(self.next_reg()?)),
DEST_REG_ADDR8 => Ok(Dest::RegAddr8(self.next_reg()?)),
DEST_REG => Ok(Dest::Reg(self.next_reg()?)),
_ => Err(VmError::IllegalDestSpec { spec }),
}
@@ -156,6 +160,10 @@ impl<T> MemCursor<T>
SOURCE_ADDR32 => Ok(Source::Addr32(self.next_addr()?)),
SOURCE_ADDR16 => Ok(Source::Addr16(self.next_addr()?)),
SOURCE_ADDR8 => Ok(Source::Addr8(self.next_addr()?)),
SOURCE_REG_ADDR64 => Ok(Source::RegAddr64(self.next_reg()?)),
SOURCE_REG_ADDR32 => Ok(Source::RegAddr32(self.next_reg()?)),
SOURCE_REG_ADDR16 => Ok(Source::RegAddr16(self.next_reg()?)),
SOURCE_REG_ADDR8 => Ok(Source::RegAddr8(self.next_reg()?)),
SOURCE_REG => Ok(Source::Reg(self.next_reg()?)),
SOURCE_IMM64 => Ok(Source::Imm(self.next_u64()?)),
SOURCE_IMM32 => Ok(Source::Imm(self.next_u32()? as u64)),

View File

@@ -22,7 +22,7 @@ impl Asm {
/// Gets all names defined in a data section, their positions, and puts them into a hashmap.
fn gather_names(&self, section: &DataSection) -> Result<HashMap<String, Addr>> {
let mut names = HashMap::new();
let mut addr = Addr(0);
let mut addr = Addr(section.org.start());
for line in section.lines.iter() {
match line {
DataLine::ValueDef(v) => addr += v.len(),
@@ -37,7 +37,7 @@ impl Asm {
}
}
}
assert_eq!(addr, section.len());
assert_eq!(addr, Addr(section.org.start() + (section.len() as u64)));
Ok(names)
}
@@ -48,7 +48,9 @@ impl Asm {
.rev()
.filter_map(|names| names.get(name).copied())
.next()
.ok_or_else(|| AssembleError::UnknownName { name: name.to_string() })
.ok_or_else(|| AssembleError::UnknownName {
name: name.to_string(),
})
}
}
@@ -83,10 +85,14 @@ impl Assemble for Vec<SectionDef> {
asm.names.clear();
asm.names.push(globals);
let sections = self.iter()
let sections = self
.iter()
.map(|section| section.assemble(asm))
.collect::<Result<_>>()?;
Ok(Object { version: OBJ_VERSION, sections, })
Ok(Object {
version: OBJ_VERSION,
sections,
})
}
}
@@ -107,14 +113,14 @@ impl Assemble for DataSection {
fn assemble(&self, asm: &mut Asm) -> Result<Self::Out> {
let names = asm.gather_names(self)?;
asm.names.push(names);
asm.pos = Addr(0);
let section_len = self.len() as u64;
let (start, end) = match self.org {
SectionOrg::Start(start) => (start, start + (section_len as u64)),
SectionOrg::StartEnd(start, end) => (start, end),
};
asm.pos = Addr(start);
if start > end {
return Err(AssembleError::StartGreaterThanEnd { start, end, });
return Err(AssembleError::StartGreaterThanEnd { start, end });
}
let len = end - start - 1;
if len > section_len {
@@ -129,7 +135,12 @@ impl Assemble for DataSection {
contents.extend(line.assemble(asm)?);
asm.pos += line.len();
}
assert_eq!(contents.len() as u64, section_len, "in section {}", self.name);
assert_eq!(
contents.len() as u64,
section_len,
"in section {}",
self.name
);
asm.names.pop();
Ok(Section::Data {
start,
@@ -146,15 +157,21 @@ impl Assemble for MetaSection {
let mut entries = HashMap::new();
for line in self.lines.iter() {
if entries.contains_key(&line.name) {
return Err(AssembleError::DuplicateMetaName { name: line.name.to_string() });
return Err(AssembleError::DuplicateMetaName {
name: line.name.to_string(),
});
}
let value = match &line.value {
Value::Int(i) => *i,
Value::Name(s) => asm.lookup_name(s.as_str())?.0,
Value::Reg(_) | Value::Here => return Err(AssembleError::IllegalMetaValue {
Value::Reg(_) | Value::Here | Value::Addr(_, _) => {
return Err(AssembleError::IllegalMetaValue {
name: line.name.to_string(),
value: line.value.clone(),
}),
})
} // TODO :
// * deref constexpr?
// * pre-startup static init?
};
entries.insert(line.name.to_string(), value);
}
@@ -207,17 +224,25 @@ impl Assemble for Inst {
let mut bytes = Vec::with_capacity(len);
bytes.write_u16::<LE>($op).unwrap();
let dest = $dest;
let dest_encoding = dest.dest_encoding()
.ok_or_else(|| AssembleError::IllegalDestValue { value: dest.clone(), })?;
let dest_encoding =
dest.dest_encoding()
.ok_or_else(|| AssembleError::IllegalDestValue {
value: dest.clone(),
})?;
let source = $source;
let source_encoding = source.source_encoding();
bytes.write_u8((dest_encoding << 4) | source_encoding).unwrap();
bytes
.write_u8((dest_encoding << 4) | source_encoding)
.unwrap();
bytes.extend(dest.assemble(asm)?);
bytes.extend(source.assemble(asm)?);
assert_eq!(
self.len(), bytes.len(),
self.len(),
bytes.len(),
"instruction size mismatch in {} instruction - {:?} produces these bytes {:?}",
stringify!($op), self, bytes
stringify!($op),
self,
bytes
);
Ok(bytes)
}};
@@ -244,9 +269,12 @@ impl Assemble for Inst {
let mut bytes = Vec::with_capacity(len);
bytes.write_u16::<LE>($op).unwrap();
assert_eq!(
self.len(), bytes.len(),
self.len(),
bytes.len(),
"instruction size mismatch in {} instruction - {:?} produces these bytes {:?}",
stringify!($op), self, bytes
stringify!($op),
self,
bytes
);
Ok(bytes)
}};
@@ -291,6 +319,12 @@ impl Assemble for Value {
Ok(value.0.to_le_bytes().to_vec())
}
Value::Here => Ok(asm.pos.0.to_le_bytes().to_vec()),
Value::Addr(v, _) => if let Value::Addr(_, _) = &**v {
// double deref is not allowed
todo!()
} else {
v.assemble(asm)
}
}
}
}
@@ -318,11 +352,78 @@ pub enum AssembleError {
#[snafu(display("section start ({:#x}) is greater than end ({:#x})", start, end))]
StartGreaterThanEnd { start: u64, end: u64 },
#[snafu(display("section end ({:#x}) too short for section content size ({:#x})", section_end, section_size))]
#[snafu(display(
"section end ({:#x}) too short for section content size ({:#x})",
section_end,
section_size
))]
SectionTooShort { section_end: u64, section_size: u64 },
#[snafu(display("illegal instruction destination value: {:?}", value))]
IllegalDestValue { value: Value, },
IllegalDestValue { value: Value },
#[snafu(display("deref of a deref value is not allowed"))]
DoubleDeref { value: Value },
}
pub type Result<T, E = AssembleError> = std::result::Result<T, E>;
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_inst_len() {
let mut asm = Asm::default();
asm.names.push(vec![("test".to_string(), Addr(0u64))].into_iter().collect());
macro_rules! assert_len {
($inst:expr) => {{
let inst = $inst;
let asm_size = $inst.assemble(&mut asm).unwrap().len();
assert_eq!(inst.len(), asm_size, "Instruction {:?}.len() indicates it should be {} bytes long but was assembled as {} bytes", inst, inst.len(), asm_size);
}}
}
use Inst::*;
let dummy_dests = &[
Value::Reg(0),
Value::Addr(Box::new(Value::Reg(0)), IntSize::U8),
Value::Addr(Box::new(Value::Here), IntSize::U16),
Value::Addr(Box::new(Value::Name("test".to_string())), IntSize::U32),
Value::Addr(Box::new(Value::Int(0)), IntSize::U64),
];
let dummy_sources = &[
Value::Int(0),
Value::Reg(0),
Value::Name("test".to_string()),
Value::Here,
Value::Addr(Box::new(Value::Reg(0)), IntSize::U8),
Value::Addr(Box::new(Value::Here), IntSize::U16),
Value::Addr(Box::new(Value::Name("test".to_string())), IntSize::U32),
Value::Addr(Box::new(Value::Int(0)), IntSize::U32),
];
for v1 in dummy_dests {
for v2 in dummy_sources {
assert_len!(Add(v1.clone(), v2.clone()));
assert_len!(Sub(v1.clone(), v2.clone()));
assert_len!(Mul(v1.clone(), v2.clone()));
assert_len!(Div(v1.clone(), v2.clone()));
assert_len!(Mod(v1.clone(), v2.clone()));
assert_len!(And(v1.clone(), v2.clone()));
assert_len!(Or(v1.clone(), v2.clone()));
assert_len!(Xor(v1.clone(), v2.clone()));
assert_len!(Shl(v1.clone(), v2.clone()));
assert_len!(Shr(v1.clone(), v2.clone()));
assert_len!(INeg(v1.clone(), v2.clone()));
assert_len!(Inv(v1.clone(), v2.clone()));
assert_len!(Not(v1.clone(), v2.clone()));
assert_len!(Mov(v1.clone(), v2.clone()));
// TODO more length tests
}
}
}
}

View File

@@ -50,6 +50,14 @@ pub enum SectionOrg {
StartEnd(u64, u64),
}
impl SectionOrg {
pub fn start(&self) -> u64 {
match self {
SectionOrg::Start(start) | SectionOrg::StartEnd(start, _) => *start,
}
}
}
#[derive(Debug, Clone)]
pub enum DataLine {
ValueDef(ValueDef),
@@ -87,22 +95,24 @@ impl ValueDef {
#[derive(Debug, Clone)]
pub enum Value {
// TODO : immediate int sizes
// Int(u64, IntSize)
Int(u64),
Reg(Reg),
Name(String),
Here,
Addr(Box<Value>, IntSize),
//Array(Vec<Value>),
//Deref(Value, IntSize),
}
impl Value {
pub fn len(&self) -> usize {
match self {
// TODO : immediate int sizes
Value::Int(_) => 8,
Value::Reg(_) => 1,
Value::Name(_) => 8,
Value::Here => 8,
Value::Addr(v, _) => v.len(),
}
}
@@ -110,6 +120,24 @@ impl Value {
match self {
Value::Int(_) | Value::Name(_) | Value::Here => None,
Value::Reg(_) => Some(inst::DEST_REG),
// TODO : check reg vs int value, and use dest_reg_addr8/16/32/64 values
Value::Addr(v, size) => {
if let Value::Reg(_) = &**v {
match size {
IntSize::U64 => Some(inst::DEST_REG_ADDR64),
IntSize::U32 => Some(inst::DEST_REG_ADDR32),
IntSize::U16 => Some(inst::DEST_REG_ADDR16),
IntSize::U8 => Some(inst::DEST_REG_ADDR8),
}
} else {
match size {
IntSize::U64 => Some(inst::DEST_ADDR64),
IntSize::U32 => Some(inst::DEST_ADDR32),
IntSize::U16 => Some(inst::DEST_ADDR16),
IntSize::U8 => Some(inst::DEST_ADDR8),
}
}
}
}
}
@@ -117,10 +145,36 @@ impl Value {
match self {
Value::Int(_) => inst::SOURCE_IMM64,
Value::Reg(_) => inst::SOURCE_REG,
// TODO : check reg vs int value, and use source_reg_addr8/16/32/64 values
Value::Name(_) | Value::Here => inst::SOURCE_IMM64,
Value::Addr(v, size) => {
if let Value::Reg(_) = &**v {
match size {
IntSize::U64 => inst::SOURCE_REG_ADDR64,
IntSize::U32 => inst::SOURCE_REG_ADDR32,
IntSize::U16 => inst::SOURCE_REG_ADDR16,
IntSize::U8 => inst::SOURCE_REG_ADDR8,
}
} else {
match size {
IntSize::U64 => inst::SOURCE_ADDR64,
IntSize::U32 => inst::SOURCE_ADDR32,
IntSize::U16 => inst::SOURCE_ADDR16,
IntSize::U8 => inst::SOURCE_ADDR8,
}
}
}
}
}
}
#[derive(Debug, Clone)]
pub enum IntSize {
U8,
U16,
U32,
U64,
}
#[derive(Debug, Clone)]
pub enum Inst {

View File

@@ -5,13 +5,18 @@
\.meta "DIR_META"
\.section "DIR_SECTION"
\.export "DIR_EXPORT"
\( "LPAREN"
\) "RPAREN"
\{ "LBRACE"
\} "RBRACE"
\.\. "DOTDOT"
: "COLON"
, "COMMA"
\$\$ "BUCKBUCK"
[iu](8|16|32|64) "INT_TYPE"
u8 "U8"
u16 "U16"
u32 "U32"
u64 "U64"
\.[iu](8|16|32|64) "INT_DEF"
\.string "STR_DEF"
\.zstring "ZSTR_DEF"

View File

@@ -57,6 +57,11 @@ Value -> Value:
| Reg { Value::Reg($1) }
| Name { Value::Name($1) }
| 'BUCKBUCK' { Value::Here }
| 'LPAREN' Value 'RPAREN' { Value::Addr(Box::new($2), IntSize::U64) }
| 'LPAREN' Value 'RPAREN' 'U8' { Value::Addr(Box::new($2), IntSize::U8) }
| 'LPAREN' Value 'RPAREN' 'U16' { Value::Addr(Box::new($2), IntSize::U16) }
| 'LPAREN' Value 'RPAREN' 'U32' { Value::Addr(Box::new($2), IntSize::U32) }
| 'LPAREN' Value 'RPAREN' 'U64' { Value::Addr(Box::new($2), IntSize::U64) }
;
Inst -> Inst:

View File

@@ -232,6 +232,9 @@ impl State {
// TODO - dump
}
}
if next_ip > 100 {
panic!("{:?}", next_ip);
}
self.set_reg_unchecked(IP, next_ip);
Ok(())
}
@@ -244,6 +247,23 @@ impl State {
.write_u32((value & 0xffff_ffff) as u32),
Dest::Addr16(a) => self.mem_cursor_mut(a).write_u16((value & 0xffff) as u16),
Dest::Addr8(a) => self.mem_cursor_mut(a).write_u8((value & 0xff) as u8),
Dest::RegAddr64(r) => {
let addr = Addr(self.get_reg(r)?);
self.mem_cursor_mut(addr).write_u64(value)
}
Dest::RegAddr32(r) => {
let addr = Addr(self.get_reg(r)?);
self.mem_cursor_mut(addr)
.write_u32((value & 0xffff_ffff) as u32)
}
Dest::RegAddr16(r) => {
let addr = Addr(self.get_reg(r)?);
self.mem_cursor_mut(addr).write_u16((value & 0xffff) as u16)
}
Dest::RegAddr8(r) => {
let addr = Addr(self.get_reg(r)?);
self.mem_cursor_mut(addr).write_u8((value & 0xff) as u8)
}
Dest::Reg(reg) => self.set_reg(reg, value),
}
}
@@ -254,6 +274,10 @@ impl State {
Source::Addr32(a) => self.mem_cursor(a).next_u32()? as u64,
Source::Addr16(a) => self.mem_cursor(a).next_u16()? as u64,
Source::Addr8(a) => self.mem_cursor(a).next_u8()? as u64,
Source::RegAddr64(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u64()?,
Source::RegAddr32(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u32()? as u64,
Source::RegAddr16(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u16()? as u64,
Source::RegAddr8(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u8()? as u64,
Source::Reg(reg) => self.get_reg(reg)?,
Source::Imm(u) => u,
};
@@ -266,6 +290,10 @@ impl State {
Dest::Addr32(a) => self.mem_cursor(a).next_u32()? as u64,
Dest::Addr16(a) => self.mem_cursor(a).next_u16()? as u64,
Dest::Addr8(a) => self.mem_cursor(a).next_u8()? as u64,
Dest::RegAddr64(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u64()?,
Dest::RegAddr32(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u32()? as u64,
Dest::RegAddr16(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u16()? as u64,
Dest::RegAddr8(r) => self.mem_cursor(Addr(self.get_reg(r)?)).next_u8()? as u64,
Dest::Reg(reg) => self.get_reg(reg)?,
};
Ok(value)

21
vm.md
View File

@@ -228,7 +228,6 @@ the section contents.
* 8 bits - Section kind
* 0x00 - Data
* 0x10 - Code
* 0xFF - Meta
* 64 bits - Length of the section
@@ -239,15 +238,6 @@ The data section contains static data that is initialized to some known value.
* 64 bits - section load start - where in memory the content of this section begins
* 64 bits - section length - how long the memory content is
### Code section
The code section contains executable code.
* 64 bits - section load start - where in memory the content of this section begins
* 64 bits - section load end - where in memory the content of this section ends
The remaining length of the section is the code itself.
### Meta section
The meta section holds a table of metadata about the binary in a key-value format of strings mapping
@@ -273,3 +263,14 @@ A VM must provide support for the following meta-values:
* Interrupts
* MMIO regions
* Paging?
* Determine how address sizes are determined
* source size <= dest size - zero extend source and copy
* mov %r0, (label)u32
* source size > dest size - truncate to dest size
* mov (label)u32, %r0
* source size with unknown dest size - use dest size == source size
* mov %r0, (label)
* unknown source size with dest size - use dest size == source size
* mov (label), %r0
* unknown source size with unknown dest size - 64 bits
* mov (label), (%r0)