diff --git a/Cargo.lock b/Cargo.lock index 755861a..fe965e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.15" @@ -109,6 +115,18 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f6d59c71e7dc3af60f0af9db32364d96a16e9310f3f5db2b55ed642162dd35" +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + [[package]] name = "gc" version = "0.5.0" @@ -130,6 +148,17 @@ dependencies = [ "synstructure", ] +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "heck" version = "0.5.0" @@ -150,6 +179,7 @@ dependencies = [ "clap", "common_macros", "gc", + "hashbrown", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index 631438c..dfe4bb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,4 @@ clap = { version = "4.5.8", features = ["derive"] } common_macros = "0.1.1" thiserror = "1.0.63" gc = { version = "0.5", features = ["derive"] } +hashbrown = "0.15.0" diff --git a/src/obj.rs b/src/obj.rs index 3f1e3b5..2b34aca 100644 --- a/src/obj.rs +++ b/src/obj.rs @@ -7,6 +7,7 @@ pub mod float; pub mod function; pub mod int; pub mod list; +pub mod map; pub mod module; pub mod str; pub mod ty; @@ -15,12 +16,12 @@ use std::any::Any; use std::cell::RefCell; use std::collections::HashMap; use std::fmt::{self, Debug, Display}; +use std::hash::Hash; use std::rc::Rc; use gc::{Finalize, Gc, GcCell, Trace}; -use crate::obj::macros::*; -use crate::obj::prelude::*; +use crate::obj::{macros::*, map::Hasher, prelude::*}; use crate::vm::{Argc, Vm}; pub type Ptr = Gc>; @@ -43,7 +44,7 @@ pub mod prelude { pub use crate::obj::function::{BuiltinFunction, Method, UserFunction}; pub use crate::obj::{ - bool::Bool, float::Float, int::Int, list::List, module::Module, str::Str, ty::Ty, + bool::Bool, float::Float, int::Int, list::List, map::Map, module::Module, str::Str, ty::Ty, }; pub use crate::obj::{Nil, Obj}; @@ -435,6 +436,31 @@ impl Obj { // no-op Nil::create() } + + pub(crate) fn hash(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let hasher = vm.frame_stack()[1].clone(); + + if let (Some(this), Some(hasher)) = ( + this.borrow().as_any().downcast_ref::(), + hasher.borrow_mut().as_any_mut().downcast_mut::(), + ) { + // Hash type name and then our address. + // We do not hash the `this` pointer value alone - this is so we don't unintentionally + // collide with integers of the same value. So we include something else to sort of + // "mark" this as an object value. While this object is still alive, it will always + // occupy the same memory address. Modifying this object should not change its hash + // value with this logic. + // + // Right now we are using a constant that I have generated using a 2^64-1 sided die. + // This value ultimately doesn't matter, it just needs to add another hash value. + const OBJ_SEED: u64 = 0x_be2b_b00f_33f2_7269; + OBJ_SEED.hash(hasher.hasher()); + (this as *const _ as usize).hash(hasher.hasher()); + } + + Nil::create() + } } //////////////////////////////////////////////////////////////////////////////// @@ -479,7 +505,7 @@ impl Object for Nil { } // -// Nil function implementations +// Nil methods // impl Nil { @@ -490,6 +516,16 @@ impl Nil { pub(crate) fn init(_vm: &mut Vm) -> ObjP { Nil::create() } + + pub(crate) fn hash(vm: &mut Vm) -> ObjP { + let hasher = vm.frame_stack()[1].clone(); + + if let Some(hasher) = hasher.borrow_mut().as_any_mut().downcast_mut::() { + ().hash(hasher.hasher()); + } + + Nil::create() + } } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/obj/bool.rs b/src/obj/bool.rs index 71aa8be..df5d094 100644 --- a/src/obj/bool.rs +++ b/src/obj/bool.rs @@ -1,9 +1,9 @@ use std::fmt::{self, Debug, Display}; +use std::hash::Hash; use gc::{Finalize, Trace}; -use crate::obj::macros::*; -use crate::obj::prelude::*; +use crate::obj::{macros::*, map::Hasher, prelude::*}; use crate::vm::Vm; #[derive(Trace, Finalize)] @@ -73,4 +73,22 @@ impl Bool { let bool_value = with_obj_downcast(vm.frame_stack()[0].clone(), Bool::bool_value); Float::create(bool_value as i64 as f64) } + + pub(crate) fn hash(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let hasher = vm.frame_stack()[1].clone(); + + { + let this_borrowed = this.borrow(); + let this = this_borrowed.as_any().downcast_ref::().unwrap(); + let mut hasher_borrowed = hasher.borrow_mut(); + let hasher = hasher_borrowed + .as_any_mut() + .downcast_mut::() + .unwrap(); + this.bool_value().hash(hasher.hasher()); + } + + Nil::create() + } } diff --git a/src/obj/float.rs b/src/obj/float.rs index 1a9db32..2f81164 100644 --- a/src/obj/float.rs +++ b/src/obj/float.rs @@ -1,9 +1,9 @@ use std::fmt::{self, Debug, Display}; +use std::hash::Hash; use gc::{Finalize, Trace}; -use crate::obj::macros::*; -use crate::obj::prelude::*; +use crate::obj::{macros::*, map::Hasher, prelude::*}; use crate::vm::Vm; #[derive(Trace, Finalize)] @@ -25,6 +25,21 @@ impl Float { pub fn float_value(&self) -> f64 { self.float_value } + + /// Decode a float into its mantissa, exponent, and sign values. + fn integer_decode(&self) -> (u64, i16, i8) { + let bits: u64 = unsafe { std::mem::transmute(self.float_value) }; + let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 }; + let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16; + let mantissa = if exponent == 0 { + (bits & 0xfffffffffffff) << 1 + } else { + (bits & 0xfffffffffffff) | 0x10000000000000 + }; + + exponent -= 1023 + 52; + (mantissa, exponent, sign) + } } impl Debug for Float { @@ -158,4 +173,48 @@ impl Float { let value = with_obj_downcast(lhs, Float::float_value); Float::create(-value) } + + /// Hash a Float value. + /// + /// A lot of this implementation is taken from answers here: + /// https://stackoverflow.com/questions/39638363/ + /// + /// Overall, there are a lot of valuable answers on that page. *In general*, you probably + /// shouldn't be hashing floating point numbers, but I think it's still a useful thing to + /// enable. + /// + /// There are a few corner cases that we need to keep track of: + /// * NaN values should all be the same. We check if the value is NaN and then use the NaN + /// constant if that is the case, rather than the value of the float itself. + /// * Infinities have their values hashed bit-for-bit, since they have unique representations. + /// * All other values, including subnormals, are broken down into their mantissa, exponent, + /// and sign values. Each of those values are hashed using the hash state. + pub(crate) fn hash(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let hasher = vm.frame_stack()[1].clone(); + + { + let this_borrowed = this.borrow(); + let this = this_borrowed.as_any().downcast_ref::().unwrap(); + let mut hasher_borrowed = hasher.borrow_mut(); + let hasher = hasher_borrowed + .as_any_mut() + .downcast_mut::() + .unwrap(); + if this.float_value().is_nan() { + let bits: u64 = unsafe { std::mem::transmute(f64::NAN) }; + bits.hash(hasher.hasher()); + } else if this.float_value().is_infinite() { + let bits: u64 = unsafe { std::mem::transmute(this.float_value()) }; + bits.hash(hasher.hasher()); + } else { + let (mantissa, exponent, sign) = this.integer_decode(); + sign.hash(hasher.hasher()); + exponent.hash(hasher.hasher()); + mantissa.hash(hasher.hasher()); + } + } + + Nil::create() + } } diff --git a/src/obj/int.rs b/src/obj/int.rs index f2db175..ceea092 100644 --- a/src/obj/int.rs +++ b/src/obj/int.rs @@ -1,9 +1,9 @@ use std::fmt::{self, Debug, Display}; +use std::hash::Hash; use gc::{Finalize, Trace}; -use crate::obj::macros::*; -use crate::obj::prelude::*; +use crate::obj::{macros::*, map::Hasher, prelude::*}; use crate::vm::Vm; #[derive(Trace, Finalize)] @@ -182,4 +182,22 @@ impl Int { let value = with_obj_downcast(lhs, Int::int_value); Int::create(-value) } + + pub(crate) fn hash(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let hasher = vm.frame_stack()[1].clone(); + + { + let this_borrowed = this.borrow(); + let this = this_borrowed.as_any().downcast_ref::().unwrap(); + let mut hasher_borrowed = hasher.borrow_mut(); + let hasher = hasher_borrowed + .as_any_mut() + .downcast_mut::() + .unwrap(); + this.int_value().hash(hasher.hasher()); + } + + Nil::create() + } } diff --git a/src/obj/map.rs b/src/obj/map.rs new file mode 100644 index 0000000..2add1c0 --- /dev/null +++ b/src/obj/map.rs @@ -0,0 +1,343 @@ +use std::fmt::{self, Debug, Display}; +use std::hash::BuildHasher; + +use gc::{custom_trace, Finalize, Trace}; +use hashbrown::{hash_table::Entry, DefaultHashBuilder, HashTable}; + +use crate::obj::{macros::*, prelude::*}; +use crate::vm::Vm; + +//////////////////////////////////////////////////////////////////////////////// +// Misc +//////////////////////////////////////////////////////////////////////////////// + +/// Calculate the hash of an index on a map. +/// +/// This requires the map's pointer because it uses the map's `hash_builder` member to make a new +/// `DefaultHasher`. +/// +/// This function also needs a better name. `calculate_map_hash` maybe? +fn map_hash_index(vm: &mut Vm, this: &ObjP, index: &ObjP) -> u64 { + with_obj_downcast(this.clone(), |map: &Map| { + let hasher = Hasher::create(map.make_hasher()); + let method = index + .borrow() + .get_vtable_attr(index.clone(), "hash") + .expect("no hash"); + vm.call(method, &[hasher.clone()]); + + with_obj_downcast_mut(hasher, |hasher: &mut Hasher| { + // NOTE: this `use` statement is becuase we use the `Hasher` name already + use std::hash::Hasher; + hasher.hasher.finish() + }) + }) +} + +//////////////////////////////////////////////////////////////////////////////// +// Map +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Finalize, Default)] +pub struct Map { + base: Obj, + hash_builder: DefaultHashBuilder, + table: HashTable<(ObjP, ObjP)>, +} + +impl Map { + pub fn new() -> Self { + Default::default() + } + + pub fn table(&self) -> &HashTable<(ObjP, ObjP)> { + &self.table + } + + pub fn table_mut(&mut self) -> &mut HashTable<(ObjP, ObjP)> { + &mut self.table + } + + pub fn make_hasher(&self) -> DefaultHasher { + self.hash_builder.build_hasher() + } + + impl_create!(); +} + +unsafe impl Trace for Map { + custom_trace! { + this, + { + for (k, v) in this.table.iter() { + mark(k); + mark(v); + } + } + } +} + +impl Display for Map { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self, fmt) + } +} + +impl Debug for Map { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let mut debug_map = fmt.debug_map(); + for (k, v) in self.table.iter() { + debug_map.entry(&k.borrow(), &v.borrow()); + } + debug_map.finish() + } +} + +impl Object for Map { + impl_base_obj!(Map); +} + +// +// Map methods +// +impl Map { + pub(crate) fn do_call(_vm: &mut Vm) -> ObjP { + Map::create() + } + + pub(crate) fn init(_vm: &mut Vm) -> ObjP { + // don't do any initialization in __init__ + Nil::create() + } + + pub(crate) fn to_repr(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let this_borrowed = this.borrow(); + let map_obj = this_borrowed.as_any().downcast_ref::().unwrap(); + if map_obj.table().len() == 0 { + return Str::create("[]"); + } + + let mut repr = "[".to_string(); + let mut iter = map_obj.table().iter(); + + // first item + { + let (key, value) = iter.next().unwrap(); + let key_method = key + .borrow() + .get_vtable_attr(key.clone(), "to_repr") + .expect("no to_repr"); + let key_str = vm.call(key_method, &[]); + let value_method = value + .borrow() + .get_vtable_attr(value.clone(), "to_repr") + .expect("no to_repr"); + let value_str = vm.call(value_method, &[]); + repr += &format!("{}: {}", key_str.borrow(), value_str.borrow()); + } + + for (key, value) in iter { + let key_method = key + .borrow() + .get_vtable_attr(key.clone(), "to_repr") + .expect("no to_repr"); + let key_str = vm.call(key_method, &[]); + let value_method = value + .borrow() + .get_vtable_attr(value.clone(), "to_repr") + .expect("no to_repr"); + let value_str = vm.call(value_method, &[]); + repr += &format!(", {}: {}", key_str.borrow(), value_str.borrow()); + } + + repr += "]"; + + Str::create(repr) + } + + pub(crate) fn to_list(vm: &mut Vm) -> ObjP { + // to_list returns the keys of this value + let this = vm.frame_stack()[0].clone(); + let list: Vec<_> = with_obj_downcast(this, |map: &Map| { + map.table().iter().map(|(k, _)| k.clone()).collect() + }); + List::create(list) + } + + pub(crate) fn index(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let index = vm.frame_stack()[1].clone(); + + let hash = map_hash_index(vm, &this, &index); + + let result = with_obj_downcast(this.clone(), |map: &Map| { + map.table() + .find(hash, |(key, _)| { + let method = index + .borrow() + .get_vtable_attr(index.clone(), "__eq__") + .unwrap(); + let result = vm.call(method, &[key.clone()]); + let is_truthy = result.borrow().is_truthy(); + is_truthy + }) + .cloned() + }); + + if let Some((_key, value)) = result { + value + } else { + // TODO Map::index - throw an exception when no value is found in the index + // BLOCKED-ON: exceptions + todo!("Map::index - throw an exception when no value is found"); + } + } + + pub(crate) fn len(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let len = with_obj_downcast(this.clone(), |map: &Map| map.table().len()); + Int::create(len as i64) + } + + pub(crate) fn insert(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let index = vm.frame_stack()[1].clone(); + let value = vm.frame_stack()[2].clone(); + + let hash = map_hash_index(vm, &this, &index); + + let old_value = with_obj_downcast_mut(this.clone(), |map: &mut Map| { + // NOTE: we have to borrow `vm` mutably twice in both of these functions. + // This is safe because these closures are not called simultaneously. + let vm = vm as *mut Vm; + // get the entry + let entry = map.table_mut().entry( + hash, + // eq + |(key, _)| { + let method = index + .borrow() + .get_vtable_attr(index.clone(), "__eq__") + .expect("no __eq__") + .clone(); + // this should be safe, see note above + let result = unsafe { (*vm).call(method, &[key.clone()]) }; + let is_truthy = result.borrow().is_truthy(); + is_truthy + }, + // hasher + // this should be safe, see note above + |(key, _)| unsafe { map_hash_index(&mut *vm, &this, &key) }, + ); + + // get whether there was already a value in the hashtable + if let Entry::Occupied(occupied_entry) = &entry { + let (key, old_value) = occupied_entry.get().clone(); + // if there *was* an old value, use the old key instead of the one that we + // provided. + entry.insert((key, value)); + // also return the old value. + Some(old_value) + } else { + // if there *was not* an old value, insert as normal. + entry.insert((index, value)); + None + } + }); + + if let Some(value) = old_value { + value + } else { + Nil::create() + } + } + + pub(crate) fn remove(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let index = vm.frame_stack()[1].clone(); + + let hash = map_hash_index(vm, &this, &index); + + let removed = with_obj_downcast_mut(this.clone(), |map: &mut Map| { + let result = map.table_mut().find_entry(hash, |(key, _)| { + let method = index + .borrow() + .get_vtable_attr(index.clone(), "__eq__") + .expect("no __eq__") + .clone(); + // this should be safe, see note above + let result = vm.call(method, &[key.clone()]); + let is_truthy = result.borrow().is_truthy(); + is_truthy + }); + + if let Ok(entry) = result { + let ((_key, value), _vacancy) = entry.remove(); + Some(value) + } else { + None + } + }); + + if let Some(removed) = removed { + removed + } else { + Nil::create() + } + } + + /* + pub(crate) fn merge(_vm: &mut Vm) -> ObjP { + todo!() + } + */ +} + +//////////////////////////////////////////////////////////////////////////////// +// Hasher +//////////////////////////////////////////////////////////////////////////////// + +pub type DefaultHasher = ::Hasher; + +#[derive(Trace, Finalize)] +pub struct Hasher { + base: Obj, + #[unsafe_ignore_trace] + hasher: DefaultHasher, +} + +impl Hasher { + pub fn new(hasher: DefaultHasher) -> Self { + Self { + base: Default::default(), + hasher, + } + } + + pub fn hasher(&mut self) -> &mut DefaultHasher { + &mut self.hasher + } + + impl_create!(hasher: DefaultHasher); +} + +impl Display for Hasher { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self, fmt) + } +} + +impl Debug for Hasher { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "", self as *const _ as usize) + } +} + +impl Object for Hasher { + impl_base_obj!(Hasher); +} + +// TODO obj::map::Hasher - maybe allow users to create new instances of Hasher. This would require +// a HashBuilder instance (I'd call it HashState) that creates new hashers, similar to what we do +// with the Map implementation. diff --git a/src/obj/str.rs b/src/obj/str.rs index 4aafa8b..6d9c324 100644 --- a/src/obj/str.rs +++ b/src/obj/str.rs @@ -1,10 +1,10 @@ use std::fmt::{self, Debug, Display}; +use std::hash::Hash; use std::rc::Rc; use gc::{Finalize, Trace}; -use crate::obj::macros::*; -use crate::obj::prelude::*; +use crate::obj::{macros::*, map::Hasher, prelude::*}; use crate::vm::Vm; #[derive(Trace, Finalize)] @@ -182,4 +182,22 @@ impl Str { Str::create(c).into() } + + pub(crate) fn hash(vm: &mut Vm) -> ObjP { + let this = vm.frame_stack()[0].clone(); + let hasher = vm.frame_stack()[1].clone(); + + { + let this_borrowed = this.borrow(); + let this = this_borrowed.as_any().downcast_ref::().unwrap(); + let mut hasher_borrowed = hasher.borrow_mut(); + let hasher = hasher_borrowed + .as_any_mut() + .downcast_mut::() + .unwrap(); + this.str_value().hash(hasher.hasher()); + } + + Nil::create() + } } diff --git a/src/obj/ty.rs b/src/obj/ty.rs index 2838b69..3d70ef7 100644 --- a/src/obj/ty.rs +++ b/src/obj/ty.rs @@ -153,7 +153,6 @@ pub fn init_types() { // type definitions Ty { // Constructor - // TODO Ty::do_call, Ty::init - implement these methods __call__ => BuiltinFunction::create("__call__", Obj::not_implemented_un, 1), __init__ => BuiltinFunction::create("__init__", Obj::not_implemented_un, 1), @@ -184,7 +183,7 @@ pub fn init_types() { // Methods len => BuiltinFunction::create("len", Obj::not_implemented_un, 1), - hash => BuiltinFunction::create("hash", Obj::not_implemented_bin, 2), + hash => BuiltinFunction::create("hash", Obj::hash, 2), }, Obj { // Constructor @@ -212,6 +211,25 @@ pub fn init_types() { pop => BuiltinFunction::create("pop", List::pop, 1), extend => BuiltinFunction::create("extend", List::extend, 2), }, + Map { + // Constructor + __call__ => BuiltinFunction::create("__call__", Map::do_call, 1), + __init__ => BuiltinFunction::create("__init__", Map::init, 1), + + // Conversion methods + to_repr => BuiltinFunction::create("to_repr", Map::to_repr, 1), + to_list => BuiltinFunction::create("to_list", Map::to_list, 1), + + // Operators + __index__ => BuiltinFunction::create("__index__", Map::index, 2), + + // Methods + len => BuiltinFunction::create("len", Map::len, 1), + insert => BuiltinFunction::create("insert", Map::insert, 3), + remove => BuiltinFunction::create("remove", Map::remove, 2), + //merge => BuiltinFunction::create("merge", Map::merge, 2), + }, + Hasher { }, Str { // Constructor __call__ => BuiltinFunction::create("__call__", Str::do_call, 2), @@ -230,6 +248,7 @@ pub fn init_types() { __index__ => BuiltinFunction::create("__index__", Str::index, 2), // Methods + hash => BuiltinFunction::create("hash", Str::hash, 2), len => BuiltinFunction::create("len", Str::len, 1), // TODO Str methods - .lower, .upper, .slice, etc }, @@ -254,7 +273,9 @@ pub fn init_types() { __le__ => BuiltinFunction::create("__le__", Int::le, 2), __pos__ => BuiltinFunction::create("__pos__", Int::pos, 1), __neg__ => BuiltinFunction::create("__neg__", Int::neg, 1), + // Methods + hash => BuiltinFunction::create("hash", Int::hash, 2), }, Float { // Constructor @@ -276,7 +297,9 @@ pub fn init_types() { __le__ => BuiltinFunction::create("__le__", Float::le, 2), __pos__ => BuiltinFunction::create("__pos__", Float::pos, 1), __neg__ => BuiltinFunction::create("__neg__", Float::neg, 1), + // Methods + hash => BuiltinFunction::create("hash", Float::hash, 2), }, Bool { // Constructor @@ -289,6 +312,7 @@ pub fn init_types() { // Operators // Methods + hash => BuiltinFunction::create("hash", Bool::hash, 2), }, Nil { // Constructor @@ -298,7 +322,9 @@ pub fn init_types() { // Conversion methods // Operators + // Methods + hash => BuiltinFunction::create("hash", Nil::hash, 2), }, BuiltinFunction { // Constructor