diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index ab3045b77d7..6a9f10f9418 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -28,7 +28,7 @@ pub mod ray_tracing; pub mod resource; #[cfg(any(feature = "trace", feature = "replay"))] pub mod trace; -pub(crate) use resource::{FenceReadGuard, FenceWriteGuard}; +pub(crate) use resource::FenceReadGuard; pub use {life::WaitIdleError, resource::Device}; pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 8d4da748887..dd26abe66c2 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -24,7 +24,7 @@ use crate::{ CommandAllocator, CommandBuffer, CommandEncoder, CommandEncoderError, CopySide, TransferError, }, - device::{DeviceError, FenceReadGuard, FenceWriteGuard, WaitIdleError}, + device::{DeviceError, FenceReadGuard, WaitIdleError}, get_lowest_common_denom, global::Global, hal_label, @@ -568,7 +568,7 @@ impl WebGpuError for QueueSubmitError { pub(crate) struct PendingSubmission<'a> { queue: &'a Queue, snatch_guard: SnatchGuard<'a>, - fence: FenceWriteGuard<'a>, + fence: FenceReadGuard<'a>, command_index_guard: RwLockWriteGuard<'a, CommandIndices>, // Command buffers to be executed, along with trackers for the resources they use. pub executions: Vec, @@ -1563,7 +1563,7 @@ impl Queue { ) -> Result, (SubmissionIndex, DeviceError)> { // Lock ordering requires that the fence lock be acquired after the snatch lock and // before the command index lock. - let fence = self.device.fence.write(); + let fence = self.device.fence.read(); let mut command_index_guard = self.device.command_indices.write(); command_index_guard.active_submission_index += 1; @@ -1610,7 +1610,7 @@ impl Queue { let PendingSubmission { queue: _, snatch_guard, - mut fence, + fence, command_index_guard, mut executions, mut surface_textures, @@ -1681,7 +1681,7 @@ impl Queue { self.raw().submit( &hal_command_buffers, &submit_surface_textures, - (fence.as_mut(), submit_index), + (fence.as_ref(), submit_index), ) } .map_err(|e| self.device.handle_hal_error(e))?; @@ -1702,7 +1702,7 @@ impl Queue { self.lock_life().track_submission(submit_index, executions); Ok(SubmissionResult { - fence: RwLockWriteGuard::downgrade(fence), + fence, snatch_guard, }) } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 67f03bf9135..d9d781e9c43 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -41,7 +41,7 @@ use crate::{ TextureInitTrackerAction, }, instance::{Adapter, RequestDeviceError}, - lock::{rank, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}, + lock::{rank, Mutex, RwLock, RwLockReadGuard}, pipeline::{self, ColorStateError}, pool::ResourcePool, present, @@ -289,7 +289,6 @@ pub struct Device { } pub(crate) type FenceReadGuard<'a> = RwLockReadGuard<'a, ManuallyDrop>>; -pub(crate) type FenceWriteGuard<'a> = RwLockWriteGuard<'a, ManuallyDrop>>; pub(crate) enum DeferredDestroy { TextureViews(WeakVec), diff --git a/wgpu-core/src/lock/observing.rs b/wgpu-core/src/lock/observing.rs index 3c61d613c45..25c25185df4 100644 --- a/wgpu-core/src/lock/observing.rs +++ b/wgpu-core/src/lock/observing.rs @@ -185,15 +185,6 @@ impl<'a, T> RwLockReadGuard<'a, T> { } } -impl<'a, T> RwLockWriteGuard<'a, T> { - pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> { - RwLockReadGuard { - inner: parking_lot::RwLockWriteGuard::downgrade(this.inner), - _state: this._state, - } - } -} - impl core::fmt::Debug for RwLock { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.inner.fmt(f) @@ -226,11 +217,11 @@ impl<'a, T> core::ops::DerefMut for RwLockWriteGuard<'a, T> { /// /// This type serves two purposes: /// -/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to -/// destructure lock guards and reassemble their pieces into new guards, but -/// if the guard type itself implements `Drop`, we can't destructure it -/// without unsafe code or pointless `Option`s whose state is almost always -/// statically known. +/// - Operations would like to be able to destructure lock guards and +/// reassemble their pieces into new guards, but if the guard type +/// itself implements `Drop`, we can't destructure it without unsafe +/// code or pointless `Option`s whose state is almost always statically +/// known. /// /// - We can just implement `Drop` for this type once, and then use it in lock /// guards, rather than implementing `Drop` separately for each guard type. diff --git a/wgpu-core/src/lock/ranked.rs b/wgpu-core/src/lock/ranked.rs index 2e68b04dd8e..745300ceae5 100644 --- a/wgpu-core/src/lock/ranked.rs +++ b/wgpu-core/src/lock/ranked.rs @@ -110,11 +110,11 @@ impl LockState { /// /// This type serves two purposes: /// -/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to -/// destructure lock guards and reassemble their pieces into new guards, but -/// if the guard type itself implements `Drop`, we can't destructure it -/// without unsafe code or pointless `Option`s whose state is almost always -/// statically known. +/// - Operations would like to be able to destructure lock guards and +/// reassemble their pieces into new guards, but if the guard type +/// itself implements `Drop`, we can't destructure it without unsafe +/// code or pointless `Option`s whose state is almost always statically +/// known. /// /// - We can just implement `Drop` for this type once, and then use it in lock /// guards, rather than implementing `Drop` separately for each guard type. @@ -299,15 +299,6 @@ impl<'a, T> RwLockReadGuard<'a, T> { } } -impl<'a, T> RwLockWriteGuard<'a, T> { - pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> { - RwLockReadGuard { - inner: parking_lot::RwLockWriteGuard::downgrade(this.inner), - saved: this.saved, - } - } -} - impl fmt::Debug for RwLock { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.inner.fmt(f) diff --git a/wgpu-core/src/lock/vanilla.rs b/wgpu-core/src/lock/vanilla.rs index 4ed38c38011..96999696282 100644 --- a/wgpu-core/src/lock/vanilla.rs +++ b/wgpu-core/src/lock/vanilla.rs @@ -116,12 +116,6 @@ impl<'a, T> RwLockReadGuard<'a, T> { } } -impl<'a, T> RwLockWriteGuard<'a, T> { - pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> { - RwLockReadGuard(parking_lot::RwLockWriteGuard::downgrade(this.0)) - } -} - impl fmt::Debug for RwLock { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 34f4f3a9749..a1bd44d2e27 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -1670,7 +1670,7 @@ impl crate::Queue for Queue { &self, command_buffers: &[&CommandBuffer], _surface_textures: &[&Texture], - (signal_fence, signal_value): (&mut Fence, crate::FenceValue), + (signal_fence, signal_value): (&Fence, crate::FenceValue), ) -> Result<(), crate::DeviceError> { let mut temp_lists = self.temp_lists.lock(); temp_lists.clear(); diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs index 85d8ca00450..f450513637e 100644 --- a/wgpu-hal/src/dynamic/mod.rs +++ b/wgpu-hal/src/dynamic/mod.rs @@ -57,10 +57,6 @@ trait DynResourceExt { /// /// - Panics if `self` is not downcastable to `T`. fn expect_downcast_ref(&self) -> &T; - /// # Panics - /// - /// - Panics if `self` is not downcastable to `T`. - fn expect_downcast_mut(&mut self) -> &mut T; /// Unboxes a `Box` to a concrete type. /// @@ -77,12 +73,6 @@ impl DynResourceExt for R { .expect("Resource doesn't have the expected backend type.") } - fn expect_downcast_mut<'a, T: DynResource>(&'a mut self) -> &'a mut T { - self.as_any_mut() - .downcast_mut() - .expect("Resource doesn't have the expected backend type.") - } - unsafe fn unbox(self: Box) -> T { debug_assert!( ::type_id(self.as_ref()) == TypeId::of::(), diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs index 04744a9b4fa..5425b2d1297 100644 --- a/wgpu-hal/src/dynamic/queue.rs +++ b/wgpu-hal/src/dynamic/queue.rs @@ -12,7 +12,7 @@ pub trait DynQueue: DynResource { &self, command_buffers: &[&dyn DynCommandBuffer], surface_textures: &[&dyn DynSurfaceTexture], - signal_fence: (&mut dyn DynFence, FenceValue), + signal_fence: (&dyn DynFence, FenceValue), ) -> Result<(), DeviceError>; unsafe fn present( &self, @@ -28,7 +28,7 @@ impl DynQueue for Q { &self, command_buffers: &[&dyn DynCommandBuffer], surface_textures: &[&dyn DynSurfaceTexture], - signal_fence: (&mut dyn DynFence, FenceValue), + signal_fence: (&dyn DynFence, FenceValue), ) -> Result<(), DeviceError> { let command_buffers = command_buffers .iter() @@ -38,7 +38,7 @@ impl DynQueue for Q { .iter() .map(|surface| (*surface).expect_downcast_ref()) .collect::>(); - let signal_fence = (signal_fence.0.expect_downcast_mut(), signal_fence.1); + let signal_fence = (signal_fence.0.expect_downcast_ref(), signal_fence.1); unsafe { Q::submit(self, &command_buffers, &surface_textures, signal_fence) } } diff --git a/wgpu-hal/src/gles/fence.rs b/wgpu-hal/src/gles/fence.rs index 201f78fcb43..0470e5ba151 100644 --- a/wgpu-hal/src/gles/fence.rs +++ b/wgpu-hal/src/gles/fence.rs @@ -1,20 +1,29 @@ -use alloc::vec::Vec; +use alloc::{sync::Arc, vec::Vec}; use core::sync::atomic::Ordering; +use parking_lot::RwLock; use glow::HasContext; use crate::AtomicFenceValue; -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] struct GLFence { - sync: glow::Fence, + // Since a fence can be `Copy`ed, there can exist some + // cases where a fence could be destroyed while something + // else is still using it. Therefore, while a function is + // using this fence (and doesn't keep pending read locked), + // it should clone the `Arc` to show it needs this to + // stay alive. + // + // The arc should not be kept after a function has finished + sync: Arc, value: crate::FenceValue, } #[derive(Debug)] pub struct Fence { last_completed: AtomicFenceValue, - pending: Vec, + pending: RwLock>, fence_behavior: wgt::GlFenceBehavior, } @@ -29,24 +38,27 @@ impl Fence { pub fn new(options: &wgt::GlBackendOptions) -> Self { Self { last_completed: AtomicFenceValue::new(0), - pending: Vec::new(), + pending: RwLock::new(Vec::new()), fence_behavior: options.fence_behavior, } } pub fn signal( - &mut self, + &self, gl: &glow::Context, value: crate::FenceValue, ) -> Result<(), crate::DeviceError> { if self.fence_behavior.is_auto_finish() { - *self.last_completed.get_mut() = value; + self.last_completed.store(value, Ordering::Release); return Ok(()); } let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) } .map_err(|_| crate::DeviceError::OutOfMemory)?; - self.pending.push(GLFence { sync, value }); + self.pending.write().push(GLFence { + sync: Arc::new(sync), + value, + }); Ok(()) } @@ -62,12 +74,15 @@ impl Fence { return max_value; } - for gl_fence in self.pending.iter() { + let pending = self.pending.read(); + + for gl_fence in pending.iter() { if gl_fence.value <= max_value { // We already know this was good, no need to check again continue; } - let status = unsafe { gl.get_sync_status(gl_fence.sync) }; + // We have pending `read` locked, so we shouldn't have to clone it. + let status = unsafe { gl.get_sync_status(*gl_fence.sync) }; if status == glow::SIGNALED { max_value = gl_fence.value; } else { @@ -82,20 +97,29 @@ impl Fence { max_value } - pub fn maintain(&mut self, gl: &glow::Context) { + pub fn maintain(&self, gl: &glow::Context) { if self.fence_behavior.is_auto_finish() { return; } let latest = self.get_latest(gl); - for &gl_fence in self.pending.iter() { - if gl_fence.value <= latest { + let mut pending = self.pending.write(); + pending.retain_mut(|gl_fence| { + if gl_fence.value > latest { + true + } else if let Some(fence) = Arc::get_mut(&mut gl_fence.sync) { unsafe { - gl.delete_sync(gl_fence.sync); + gl.delete_sync(*fence); } + false + } else { + // Another function is currently using this value. In general, these should finish + // very quickly (for wait because the fence should already be signaled, an all + // others are just fast), but submit should be very fast, so we shouldn't block on + // this. + true } - } - self.pending.retain(|&gl_fence| gl_fence.value > latest); + }); } pub fn wait( @@ -115,25 +139,32 @@ impl Fence { return Ok(true); } + let pending = self.pending.read(); + // Find a matching fence - let gl_fence = self - .pending - .iter() - .find(|gl_fence| gl_fence.value >= wait_value); + let gl_fence = pending.iter().find(|gl_fence| gl_fence.value >= wait_value); let Some(gl_fence) = gl_fence else { log::warn!("Tried to wait for {wait_value} but that value has not been signalled yet"); return Ok(false); }; + // clone to show we're using the fence + let sync = gl_fence.sync.clone(); + let fence_value = gl_fence.value; + + drop(pending); + let status = unsafe { gl.client_wait_sync( - gl_fence.sync, + *sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns.min(i32::MAX as u32) as i32, ) }; + drop(sync); + let signalled = match status { glow::ALREADY_SIGNALED | glow::CONDITION_SATISFIED => true, glow::TIMEOUT_EXPIRED | glow::WAIT_FAILED => false, @@ -144,8 +175,7 @@ impl Fence { }; if signalled { - self.last_completed - .fetch_max(gl_fence.value, Ordering::AcqRel); + self.last_completed.fetch_max(fence_value, Ordering::AcqRel); } Ok(signalled) @@ -156,9 +186,12 @@ impl Fence { return; } - for gl_fence in self.pending { + for gl_fence in self.pending.into_inner() { unsafe { - gl.delete_sync(gl_fence.sync); + gl.delete_sync( + Arc::into_inner(gl_fence.sync) + .expect("A function has failed to drop all its references to this"), + ); } } } diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 09ce9da9cac..967fc3b856e 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1901,7 +1901,7 @@ impl crate::Queue for super::Queue { &self, command_buffers: &[&super::CommandBuffer], _surface_textures: &[&super::Texture], - (signal_fence, signal_value): (&mut super::Fence, crate::FenceValue), + (signal_fence, signal_value): (&super::Fence, crate::FenceValue), ) -> Result<(), crate::DeviceError> { let shared = Arc::clone(&self.shared); let gl = &shared.context.lock(); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 2183bd66971..a92933fd980 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1247,7 +1247,7 @@ pub trait Queue: WasmNotSendSync { &self, command_buffers: &[&::CommandBuffer], surface_textures: &[&::SurfaceTexture], - signal_fence: (&mut ::Fence, FenceValue), + signal_fence: (&::Fence, FenceValue), ) -> Result<(), DeviceError>; /// Present a surface texture to the screen. /// diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 508da11cc52..70bf2a7ee49 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1,5 +1,6 @@ use alloc::{borrow::ToOwned as _, sync::Arc, vec::Vec}; use core::{ptr::NonNull, sync::atomic}; +use parking_lot::RwLock; use std::{thread, time}; use bytemuck::TransparentWrapper; @@ -1886,7 +1887,7 @@ impl crate::Device for super::Device { }; Ok(super::Fence { completed_value: Arc::new(atomic::AtomicU64::new(0)), - pending_command_buffers: Vec::new(), + pending_command_buffers: RwLock::new(Vec::new()), shared_event, }) } @@ -1897,7 +1898,8 @@ impl crate::Device for super::Device { unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult { let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire); - for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() { + let pending_command_buffers = fence.pending_command_buffers.read(); + for &(value, ref cmd_buf) in pending_command_buffers.iter() { if cmd_buf.status() == MTLCommandBufferStatus::Completed { max_value = value; } @@ -1914,18 +1916,22 @@ impl crate::Device for super::Device { return Ok(true); } - let cmd_buf = match fence - .pending_command_buffers + let pending_command_buffers = fence.pending_command_buffers.read(); + + let cmd_buf = match pending_command_buffers .iter() .find(|&&(value, _)| value >= wait_value) { - Some((_, cmd_buf)) => cmd_buf, + Some((_, cmd_buf)) => cmd_buf.clone(), None => { log::error!("No active command buffers for fence value {wait_value}"); return Err(crate::DeviceError::Lost); } }; + // Make sure that nothing is blocked during the actual wait. + drop(pending_command_buffers); + let start = time::Instant::now(); loop { if let MTLCommandBufferStatus::Completed = cmd_buf.status() { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index d7edd7ce6e3..9f7cb1601bd 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -531,7 +531,7 @@ impl crate::Queue for Queue { &self, command_buffers: &[&CommandBuffer], _surface_textures: &[&SurfaceTexture], - (signal_fence, signal_value): (&mut Fence, crate::FenceValue), + (signal_fence, signal_value): (&Fence, crate::FenceValue), ) -> Result<(), crate::DeviceError> { autoreleasepool(|_| { let extra_command_buffer = { @@ -557,6 +557,7 @@ impl crate::Queue for Queue { signal_fence.maintain(); signal_fence .pending_command_buffers + .write() .push((signal_value, raw.clone())); if let Some(shared_event) = &signal_fence.shared_event { @@ -1022,13 +1023,15 @@ unsafe impl Sync for QuerySet {} pub struct Fence { completed_value: Arc, /// The pending fence values have to be ascending. - pending_command_buffers: Vec<( - crate::FenceValue, - Retained>, - )>, + pending_command_buffers: RwLock>, shared_event: Option>>, } +type PendingCommandBuffer = ( + crate::FenceValue, + Retained>, +); + impl crate::DynFence for Fence {} unsafe impl Send for Fence {} @@ -1037,7 +1040,8 @@ unsafe impl Sync for Fence {} impl Fence { fn get_latest(&self) -> crate::FenceValue { let mut max_value = self.completed_value.load(atomic::Ordering::Acquire); - for &(value, ref cmd_buf) in self.pending_command_buffers.iter() { + let pending_command_buffers = self.pending_command_buffers.read(); + for &(value, ref cmd_buf) in pending_command_buffers.iter() { if cmd_buf.status() == MTLCommandBufferStatus::Completed { max_value = value; } @@ -1045,9 +1049,10 @@ impl Fence { max_value } - fn maintain(&mut self) { + fn maintain(&self) { let latest = self.get_latest(); self.pending_command_buffers + .write() .retain(|&(value, _)| value > latest); } diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index 563b1b27de9..c8491ded4ef 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -262,7 +262,7 @@ impl crate::Queue for Context { &self, command_buffers: &[&CommandBuffer], surface_textures: &[&Resource], - (fence, fence_value): (&mut Fence, crate::FenceValue), + (fence, fence_value): (&Fence, crate::FenceValue), ) -> DeviceResult<()> { // All commands are executed synchronously. for cb in command_buffers { diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index f5f76de4eac..c7c8129ad46 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -10,7 +10,7 @@ use core::{ use arrayvec::ArrayVec; use ash::{ext, vk}; use hashbrown::hash_map::Entry; -use parking_lot::Mutex; +use parking_lot::{Mutex, RwLock}; use super::{conv, descriptor::DescriptorCounts, RawTlasInstance}; use crate::TlasInstance; @@ -2279,11 +2279,11 @@ impl crate::Device for super::Device { super::Fence::TimelineSemaphore(raw) } else { - super::Fence::FencePool { + super::Fence::FencePool(RwLock::new(super::FencePool { last_completed: 0, active: Vec::new(), free: Vec::new(), - } + })) }) } unsafe fn destroy_fence(&self, fence: super::Fence) { @@ -2291,13 +2291,17 @@ impl crate::Device for super::Device { super::Fence::TimelineSemaphore(raw) => { unsafe { self.shared.raw.destroy_semaphore(raw, None) }; } - super::Fence::FencePool { - active, - free, - last_completed: _, - } => { + super::Fence::FencePool(pool) => { + let super::FencePool { + active, + free, + last_completed: _, + } = pool.into_inner(); + for (_, raw) in active { - unsafe { self.shared.raw.destroy_fence(raw, None) }; + unsafe { + self.shared.raw.destroy_fence(Arc::into_inner(raw).expect("Fence should have its reference count be one by the end of each function"), None) + }; } for raw in free { unsafe { self.shared.raw.destroy_fence(raw, None) }; @@ -2829,17 +2833,28 @@ impl super::DeviceShared { Err(other) => Err(super::map_host_device_oom_and_lost_err(other)), } } - super::Fence::FencePool { - last_completed, - ref active, - free: _, - } => { + super::Fence::FencePool(ref pool) => { + let pool = pool.read(); + let super::FencePool { + last_completed, + ref active, + free: _, + } = *pool; if wait_value <= last_completed { Ok(true) } else { match active.iter().find(|&&(value, _)| value >= wait_value) { - Some(&(_, raw)) => { - match unsafe { self.raw.wait_for_fences(&[raw], true, timeout_ns) } { + Some((_, fence)) => { + // clone to show we are using this fence while the pool is unlocked. + let fence = fence.clone(); + drop(pool); + match unsafe { + self.raw.wait_for_fences( + core::slice::from_ref(&fence), + true, + timeout_ns, + ) + } { Ok(()) => Ok(true), Err(vk::Result::TIMEOUT) => Ok(false), Err(other) => Err(super::map_host_device_oom_and_lost_err(other)), diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 124df07c8aa..7d18b6bd48d 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -1162,14 +1162,29 @@ pub enum Fence { /// for each queue submission we might want to wait for, and remember which /// [`FenceValue`] each one represents. /// + /// One should keep the fence pool read while there are any references to the + /// fences inside of them. This ensures there are no race conditions when + /// resetting the fences + /// /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences /// [`FenceValue`]: crate::FenceValue - FencePool { - last_completed: crate::FenceValue, - /// The pending fence values have to be ascending. - active: Vec<(crate::FenceValue, vk::Fence)>, - free: Vec, - }, + FencePool(RwLock), +} + +/// A shared fence type. The arc is expect to have a ref-count of one once a function has finished being called +/// +/// A fence should have access synchronised as fence resetting might happen at any point. Resetting checks the ref-count +/// of the fence, so instead of copying the fence, it should have its `Arc` container cloned which shows not to reset +/// this fence as it is being used. +pub(super) type SynchronizedFence = Arc; + +#[derive(Debug)] +pub struct FencePool { + last_completed: crate::FenceValue, + /// The pending fence values have to be ascending. + active: Vec<(crate::FenceValue, SynchronizedFence)>, + // Don't need extra synchronisation around the fences here, if they are used they should be put into active. + free: Vec, } impl crate::DynFence for Fence {} @@ -1186,13 +1201,15 @@ impl Fence { fn check_active( device: &ash::Device, mut last_completed: crate::FenceValue, - active: &[(crate::FenceValue, vk::Fence)], + active: &[(crate::FenceValue, SynchronizedFence)], ) -> Result { - for &(value, raw) in active.iter() { + for &(value, ref raw) in active.iter() { unsafe { if value > last_completed && device - .get_fence_status(raw) + // Don't need to clone as active should be from a read or + // write lock which means this is already synchronised. + .get_fence_status(**raw) .map_err(map_host_device_oom_and_lost_err)? { last_completed = value; @@ -1221,11 +1238,14 @@ impl Fence { .map_err(map_host_device_oom_and_lost_err)?, }) }, - Self::FencePool { - last_completed, - ref active, - free: _, - } => Self::check_active(device, last_completed, active), + Self::FencePool(ref pool) => { + let FencePool { + last_completed, + ref active, + free: _, + } = *pool.read(); + Self::check_active(device, last_completed, active) + } } } @@ -1241,23 +1261,35 @@ impl Fence { /// /// [`FencePool`]: Fence::FencePool /// [`Queue::submit`]: crate::Queue::submit - fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> { + fn maintain(&self, device: &ash::Device) -> Result<(), crate::DeviceError> { match *self { Self::TimelineSemaphore(_) => {} - Self::FencePool { - ref mut last_completed, - ref mut active, - ref mut free, - } => { - let latest = Self::check_active(device, *last_completed, active)?; + Self::FencePool(ref pool) => { + let FencePool { + ref mut last_completed, + ref mut active, + ref mut free, + } = *pool.write(); + let base_free = free.len(); - for &(value, raw) in active.iter() { - if value <= latest { - free.push(raw); + let latest = Self::check_active(device, *last_completed, active)?; + + active.retain_mut(|&mut (value, ref mut fence)| { + if value > latest { + true + } else if let Some(fence) = Arc::get_mut(fence) { + // No other references to these, so we have exclusive access. Add them to free and reset them later, + // but drop them from active immediately + free.push(*fence); + false + } else { + // some other function is using it. Although this shouldn't be to long, + // maintain shouldn't block, and it should be cleared up by the next time it happens + true } - } + }); + if free.len() != base_free { - active.retain(|&(value, _)| value > latest); unsafe { device.reset_fences(&free[base_free..]) } .map_err(map_device_oom_err)? } @@ -1275,7 +1307,7 @@ impl crate::Queue for Queue { &self, command_buffers: &[&CommandBuffer], surface_textures: &[&SurfaceTexture], - (signal_fence, signal_value): (&mut Fence, crate::FenceValue), + (signal_fence, signal_value): (&Fence, crate::FenceValue), ) -> Result<(), crate::DeviceError> { let mut fence_raw = vk::Fence::null(); @@ -1343,25 +1375,33 @@ impl crate::Queue for Queue { // We need to signal our wgpu::Fence if we have one, this adds it to the signal list. signal_fence.maintain(&self.device.raw)?; + // Keeping the Arc around is probably unneeded - the fence should never be signaled as it was reset, + // and newer submits should not happen until this submit is done. Therefore, it should be too high + // to be reset. + let shared_fence; match *signal_fence { Fence::TimelineSemaphore(raw) => { signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value)); } - Fence::FencePool { - ref mut active, - ref mut free, - .. - } => { - fence_raw = match free.pop() { - Some(raw) => raw, + Fence::FencePool(ref pool) => { + let FencePool { + ref mut active, + ref mut free, + .. + } = *pool.write(); + shared_fence = match free.pop() { + Some(raw) => Arc::new(raw), None => unsafe { - self.device + let fence = self + .device .raw .create_fence(&vk::FenceCreateInfo::default(), None) - .map_err(map_host_device_oom_err)? + .map_err(map_host_device_oom_err)?; + Arc::new(fence) }, }; - active.push((signal_value, fence_raw)); + fence_raw = *shared_fence; + active.push((signal_value, shared_fence.clone())); } }