diff --git a/Cargo.lock b/Cargo.lock index 98567f858e9f1..9dea5a9be2683 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1709,6 +1709,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "horde" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d7c629771c2e116e71d8e7bbc6e6e0450a8817766a7230bb0c98f81311df34" +dependencies = [ + "parking_lot", +] + [[package]] name = "html-checker" version = "0.1.0" @@ -3803,6 +3812,7 @@ dependencies = [ "elsa", "ena", "hashbrown 0.16.1", + "horde", "indexmap", "jobserver", "libc", diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml index 0332ff6810828..0743fc4cfd06a 100644 --- a/compiler/rustc_data_structures/Cargo.toml +++ b/compiler/rustc_data_structures/Cargo.toml @@ -10,6 +10,7 @@ bitflags = "2.4.1" either = "1.0" elsa = "1.11.0" ena = "0.14.4" +horde = { version = "0.1.2", features = ["nightly"] } indexmap = "2.12.1" jobserver_crate = { version = "0.1.28", package = "jobserver" } measureme = "12.0.1" diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs index e10ccccad5bb4..e255dcdc34c61 100644 --- a/compiler/rustc_data_structures/src/sharded.rs +++ b/compiler/rustc_data_structures/src/sharded.rs @@ -201,60 +201,6 @@ impl ShardedHashMap { } } -impl ShardedHashMap { - #[inline] - pub fn intern_ref(&self, value: &Q, make: impl FnOnce() -> K) -> K - where - K: Borrow, - Q: Hash + Eq, - { - let hash = make_hash(value); - let mut shard = self.lock_shard_by_hash(hash); - - match table_entry(&mut shard, hash, value) { - Entry::Occupied(e) => e.get().0, - Entry::Vacant(e) => { - let v = make(); - e.insert((v, ())); - v - } - } - } - - #[inline] - pub fn intern(&self, value: Q, make: impl FnOnce(Q) -> K) -> K - where - K: Borrow, - Q: Hash + Eq, - { - let hash = make_hash(&value); - let mut shard = self.lock_shard_by_hash(hash); - - match table_entry(&mut shard, hash, &value) { - Entry::Occupied(e) => e.get().0, - Entry::Vacant(e) => { - let v = make(value); - e.insert((v, ())); - v - } - } - } -} - -pub trait IntoPointer { - /// Returns a pointer which outlives `self`. - fn into_pointer(&self) -> *const (); -} - -impl ShardedHashMap { - pub fn contains_pointer_to(&self, value: &T) -> bool { - let hash = make_hash(&value); - let shard = self.lock_shard_by_hash(hash); - let value = value.into_pointer(); - shard.find(hash, |(k, ())| k.into_pointer() == value).is_some() - } -} - #[inline] pub fn make_hash(val: &K) -> u64 { let mut state = FxHasher::default(); diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs index 327c28fd13890..f543fb6a9e4c3 100644 --- a/compiler/rustc_data_structures/src/sync.rs +++ b/compiler/rustc_data_structures/src/sync.rs @@ -25,6 +25,7 @@ use std::collections::HashMap; use std::hash::{BuildHasher, Hash}; +pub use horde::collect; pub use parking_lot::{ MappedRwLockReadGuard as MappedReadGuard, MappedRwLockWriteGuard as MappedWriteGuard, RwLockReadGuard as ReadGuard, RwLockWriteGuard as WriteGuard, @@ -39,6 +40,7 @@ pub use self::parallel::{ broadcast, par_fns, par_for_each_in, par_join, par_map, parallel_guard, spawn, try_par_for_each_in, }; +pub use self::sync_table::{IntoPointer, LockedWrite, Read, SyncTable}; pub use self::vec::{AppendOnlyIndexVec, AppendOnlyVec}; pub use self::worker_local::{Registry, WorkerLocal}; pub use crate::marker::*; @@ -46,6 +48,7 @@ pub use crate::marker::*; mod freeze; mod lock; mod parallel; +mod sync_table; mod vec; mod worker_local; diff --git a/compiler/rustc_data_structures/src/sync/lock.rs b/compiler/rustc_data_structures/src/sync/lock.rs index f183af0c0dabd..5b6810a88eaa7 100644 --- a/compiler/rustc_data_structures/src/sync/lock.rs +++ b/compiler/rustc_data_structures/src/sync/lock.rs @@ -112,6 +112,11 @@ impl Lock { self.data.get_mut() } + #[inline(always)] + pub fn mode(&self) -> Mode { + self.mode + } + #[inline(always)] pub fn try_lock(&self) -> Option> { let mode = self.mode; diff --git a/compiler/rustc_data_structures/src/sync/sync_table.rs b/compiler/rustc_data_structures/src/sync/sync_table.rs new file mode 100644 index 0000000000000..f2aa86bb7f2a8 --- /dev/null +++ b/compiler/rustc_data_structures/src/sync/sync_table.rs @@ -0,0 +1,215 @@ +use std::borrow::Borrow; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::hint::cold_path; +use std::ops::{Deref, DerefMut}; + +use horde::collect::{Pin, pin}; +pub use horde::sync_table::Read; +use horde::sync_table::Write; +use rustc_hash::FxBuildHasher; + +use crate::sync::{DynSync, Lock, LockGuard, Mode}; + +pub struct SyncTable { + // We use this lock to protect `table` instead of the internal mutex in `horde::SyncTable` + // as it's faster when synchronization is disabled. + lock: Lock<()>, + + table: horde::SyncTable, +} + +// Memory reclamation can move elements to other threads for dropping, +// so we require `Sync` instead of `DynSync` here +unsafe impl DynSync for SyncTable where FxBuildHasher: Sync {} + +impl Default for SyncTable { + fn default() -> Self { + Self { lock: Lock::default(), table: horde::SyncTable::default() } + } +} + +impl SyncTable { + /// Creates a [Read] handle from a pinned region. + /// + /// Use [horde::collect::pin] to get a `Pin` instance. + #[inline] + pub fn read<'a>(&'a self, pin: Pin<'a>) -> Read<'a, K, V, FxBuildHasher> { + self.table.read(pin) + } + + /// Creates a [LockedWrite] handle by taking the underlying mutex that protects writes. + #[inline] + pub fn lock(&self) -> LockedWrite<'_, K, V> { + LockedWrite { + _guard: self.lock.lock(), + table: { + // SAFETY: We ensure there's only 1 writer at a time using our own lock + unsafe { self.table.unsafe_write() } + }, + } + } + + /// Hashes a key with the table's hasher. + #[inline] + pub fn hash_key(&self, key: &Q) -> u64 + where + K: Borrow, + Q: ?Sized + Hash, + { + self.table.hash_key::(key) + } + + pub fn len(&self) -> usize { + pin(|pin| self.read(pin).len()) + } + + pub fn with_capacity(cap: usize) -> Self { + Self { lock: Lock::new(()), table: horde::SyncTable::new_with(FxBuildHasher, cap) } + } +} + +/// A handle to a [SyncTable] with write access protected by a lock. +pub struct LockedWrite<'a, K, V> { + table: Write<'a, K, V, FxBuildHasher>, + _guard: LockGuard<'a, ()>, +} + +impl<'a, K, V> Deref for LockedWrite<'a, K, V> { + type Target = Write<'a, K, V, FxBuildHasher>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.table + } +} + +impl<'a, K, V> DerefMut for LockedWrite<'a, K, V> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.table + } +} + +pub trait IntoPointer { + /// Returns a pointer which outlives `self`. + fn into_pointer(&self) -> *const (); +} + +impl SyncTable { + pub fn contains_pointer_to(&self, value: &T) -> bool + where + K: IntoPointer, + { + pin(|pin| { + let mut state = FxBuildHasher.build_hasher(); + value.hash(&mut state); + let hash = state.finish(); + let value = value.into_pointer(); + self.read(pin).get_from_hash(hash, |entry| entry.into_pointer() == value).is_some() + }) + } + + #[inline] + pub fn intern_ref(&self, value: &Q, make: impl FnOnce() -> K) -> K + where + K: Borrow, + Q: Hash + Eq, + { + if self.lock.mode() == Mode::Sync { + pin(|pin| { + let hash = self.hash_key(value); + + let potential = match self.read(pin).get_potential(&value, Some(hash)) { + Ok(entry) => return *entry.0, + Err(potential) => { + cold_path(); + potential + } + }; + + let mut write = self.lock(); + + let potential = match potential.refresh(self.read(pin), &value, Some(hash)) { + Ok(entry) => { + cold_path(); + return *entry.0; + } + Err(potential) => potential, + }; + + let result = make(); + + potential.insert_new(&mut write, result, (), Some(hash)); + + result + }) + } else { + let mut write = self.lock(); + + let hash = self.hash_key(&value); + + let entry = write.read().get(&value, Some(hash)); + if let Some(entry) = entry { + return *entry.0; + } + + let result = make(); + + write.insert_new(result, (), Some(hash)); + + result + } + } + + #[inline] + pub fn intern(&self, value: Q, make: impl FnOnce(Q) -> K) -> K + where + K: Borrow, + Q: Hash + Eq, + { + if self.lock.mode() == Mode::Sync { + pin(|pin| { + let hash = self.hash_key(&value); + + let potential = match self.read(pin).get_potential(&value, Some(hash)) { + Ok(entry) => return *entry.0, + Err(potential) => { + cold_path(); + potential + } + }; + + let mut write = self.lock(); + + let potential = match potential.refresh(self.read(pin), &value, Some(hash)) { + Ok(entry) => { + cold_path(); + return *entry.0; + } + Err(potential) => potential, + }; + + let result = make(value); + + potential.insert_new(&mut write, result, (), Some(hash)); + + result + }) + } else { + let mut write = self.lock(); + + let hash = self.hash_key(&value); + + let entry = write.read().get(&value, Some(hash)); + if let Some(entry) = entry { + return *entry.0; + } + + let result = make(value); + + write.insert_new(result, (), Some(hash)); + + result + } + } +} diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs index c5344ee66cd03..63529769fb083 100644 --- a/compiler/rustc_interface/src/util.rs +++ b/compiler/rustc_interface/src/util.rs @@ -14,7 +14,7 @@ use rustc_codegen_ssa::traits::CodegenBackend; use rustc_codegen_ssa::{CompiledModules, CrateInfo, TargetConfig}; use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::jobserver::Proxy; -use rustc_data_structures::sync; +use rustc_data_structures::sync::{self, collect}; use rustc_metadata::{DylibError, EncodedMetadata, load_symbol_from_dylib}; use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; use rustc_middle::ty::{CurrentGcx, TyCtxt}; @@ -216,7 +216,10 @@ pub(crate) fn run_in_thread_pool_with_globals< let builder = rustc_thread_pool::ThreadPoolBuilder::new() .thread_name(|_| "rustc".to_string()) .acquire_thread_handler(move || proxy_.acquire_thread()) - .release_thread_handler(move || proxy__.release_thread()) + .release_thread_handler(move || { + collect::release(); + proxy__.release_thread() + }) .num_threads(threads) .deadlock_handler(move || { // On deadlock, creates a new thread and forwards information in thread diff --git a/compiler/rustc_middle/src/query/caches.rs b/compiler/rustc_middle/src/query/caches.rs index 0c71a98b7fb29..8189b4233798d 100644 --- a/compiler/rustc_middle/src/query/caches.rs +++ b/compiler/rustc_middle/src/query/caches.rs @@ -1,6 +1,7 @@ use std::sync::OnceLock; -use rustc_data_structures::sharded::ShardedHashMap; +use rustc_data_structures::sync::SyncTable; +use rustc_data_structures::sync::collect::pin; pub use rustc_data_structures::vec_cache::VecCache; use rustc_hir::def_id::LOCAL_CRATE; use rustc_index::Idx; @@ -41,7 +42,7 @@ pub trait QueryCache: Sized { /// In-memory cache for queries whose keys aren't suitable for any of the /// more specialized kinds of cache. Backed by a sharded hashmap. pub struct DefaultCache { - cache: ShardedHashMap, + cache: SyncTable, } impl Default for DefaultCache { @@ -53,33 +54,34 @@ impl Default for DefaultCache { impl QueryCache for DefaultCache where K: QueryKey, - V: Copy, + V: Copy + Send, { type Key = K; type Value = V; #[inline(always)] fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> { - self.cache.get(key) + pin(|pin| { + let result = self.cache.read(pin).get(key, None); + if let Some((_, value)) = result { Some(*value) } else { None } + }) } #[inline] fn complete(&self, key: K, value: V, index: DepNodeIndex) { - // We may be overwriting another value. This is all right, since the dep-graph - // will check that the value fingerprint matches. - self.cache.insert(key, (value, index)); + self.cache.lock().insert_new(key, (value, index), None); } fn for_each(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) { - for shard in self.cache.lock_shards() { - for (k, v) in shard.iter() { + pin(|pin| { + for (k, v) in self.cache.read(pin).iter() { f(k, &v.0, v.1); } - } + }) } fn len(&self) -> usize { - self.cache.len() + pin(|pin| self.cache.read(pin).len()) } } @@ -142,7 +144,7 @@ impl Default for DefIdCache { impl QueryCache for DefIdCache where - V: Copy, + V: Copy + Send, { type Key = DefId; type Value = V; diff --git a/compiler/rustc_middle/src/query/job.rs b/compiler/rustc_middle/src/query/job.rs index 3bf37a782ee8a..669553b349aad 100644 --- a/compiler/rustc_middle/src/query/job.rs +++ b/compiler/rustc_middle/src/query/job.rs @@ -4,6 +4,7 @@ use std::num::NonZero; use std::sync::Arc; use parking_lot::{Condvar, Mutex}; +use rustc_data_structures::sync::collect; use rustc_span::Span; use crate::query::CycleError; @@ -103,6 +104,7 @@ impl<'tcx> QueryLatch<'tcx> { // we have to be in the `wait` call. This is ensured by the deadlock handler // getting the self.info lock. rustc_thread_pool::mark_blocked(); + collect::release(); tcx.jobserver_proxy.release_thread(); waiter.condvar.wait(&mut waiters_guard); // Release the lock before we potentially block in `acquire_thread` diff --git a/compiler/rustc_middle/src/query/keys.rs b/compiler/rustc_middle/src/query/keys.rs index e5e56b8e28b27..387cd9e0e5683 100644 --- a/compiler/rustc_middle/src/query/keys.rs +++ b/compiler/rustc_middle/src/query/keys.rs @@ -24,7 +24,8 @@ use crate::{mir, traits}; #[derive(Copy, Clone, Debug)] pub struct LocalCrate; -pub trait QueryKeyBounds = Copy + Debug + Eq + Hash + for<'a> HashStable>; +pub trait QueryKeyBounds = + Copy + Debug + Eq + Hash + Send + for<'a> HashStable>; /// Controls what types can legally be used as the key for a query. pub trait QueryKey: Sized + QueryKeyBounds { diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 56428780d22da..a2b8f93d7855b 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -23,11 +23,10 @@ use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::intern::Interned; use rustc_data_structures::jobserver::Proxy; use rustc_data_structures::profiling::SelfProfilerRef; -use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::steal::Steal; use rustc_data_structures::sync::{ - self, DynSend, DynSync, FreezeReadGuard, Lock, RwLock, WorkerLocal, + self, DynSend, DynSync, FreezeReadGuard, IntoPointer, Lock, RwLock, SyncTable, WorkerLocal, }; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, Diagnostic, MultiSpan}; use rustc_hir::def::DefKind; @@ -131,7 +130,7 @@ impl<'tcx> rustc_type_ir::inherent::Span> for Span { } } -type InternedSet<'tcx, T> = ShardedHashMap, ()>; +type InternedSet<'tcx, T> = SyncTable, ()>; pub struct CtxtInterners<'tcx> { /// The arena that types, regions, etc. are allocated from. @@ -1802,6 +1801,7 @@ macro_rules! sty_debug_print { mod inner { use crate::ty::{self, TyCtxt}; use crate::ty::context::InternedInSet; + use rustc_data_structures::sync::collect::pin; #[derive(Copy, Clone)] struct DebugStat { @@ -1822,11 +1822,11 @@ macro_rules! sty_debug_print { }; $(let mut $variant = total;)* - for shard in tcx.interners.type_.lock_shards() { + pin(|pin| { // It seems that ordering doesn't affect anything here. #[allow(rustc::potential_query_instability)] - let types = shard.iter(); - for &(InternedInSet(t), ()) in types { + let types = tcx.interners.type_.read(pin); + for (&InternedInSet(t), _) in types.iter() { let variant = match t.internee { ty::Bool | ty::Char | ty::Int(..) | ty::Uint(..) | ty::Float(..) | ty::Str | ty::Never => continue, @@ -1844,7 +1844,7 @@ macro_rules! sty_debug_print { if ct { total.ct_infer += 1; variant.ct_infer += 1 } if lt && ty && ct { total.all_infer += 1; variant.all_infer += 1 } } - } + }); writeln!(fmt, "Ty interner total ty lt ct all")?; $(writeln!(fmt, " {:18}: {uses:6} {usespc:4.1}%, \ {ty:4.1}% {lt:5.1}% {ct:4.1}% {all:4.1}%", diff --git a/compiler/rustc_query_impl/src/execution.rs b/compiler/rustc_query_impl/src/execution.rs index aea0bb4d35342..aac497d378dad 100644 --- a/compiler/rustc_query_impl/src/execution.rs +++ b/compiler/rustc_query_impl/src/execution.rs @@ -3,7 +3,7 @@ use std::mem::ManuallyDrop; use rustc_data_structures::hash_table::{Entry, HashTable}; use rustc_data_structures::stack::ensure_sufficient_stack; -use rustc_data_structures::sync::{DynSend, DynSync}; +use rustc_data_structures::sync::{DynSend, DynSync, collect}; use rustc_data_structures::{outline, sharded, sync}; use rustc_errors::FatalError; use rustc_middle::dep_graph::{DepGraphData, DepNodeKey, SerializedDepNodeIndex}; @@ -315,6 +315,10 @@ fn try_execute_query<'tcx, C: QueryCache, const INCR: bool>( // `query.state`. job_guard.complete(&query.cache, value, dep_node_index); + // Periodic memory reclamation trigger. + // We do this after `complete` since the default caches may have memory to free due to table expansion. + collect::collect(); + (value, Some(dep_node_index)) } Entry::Occupied(mut entry) => { diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 9e9d463acdb3b..8fe35eb93b9ef 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -336,6 +336,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "gimli", "gsgdt", "hashbrown", + "horde", "icu_collections", "icu_list", "icu_locale",