diff --git a/CHANGELOG.md b/CHANGELOG.md index 088a818edda..0ae654af548 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -160,6 +160,7 @@ By @andyleiserson in [#9321](https://github.com/gfx-rs/wgpu/pull/9321). - Fix `SYNC-HAZARD-WRITE-AFTER-PRESENT` on Vulkan when a surface texture is presented without being rendered to. By @inner-daemons and @atlv24 in [#9361](https://github.com/gfx-rs/wgpu/pull/9361). - Fix incorrect checks for dynamic binding bounds when calling an encoder's `set_bind_group` in passes and bundles. By @ErichDonGubler in [#9308](https://github.com/gfx-rs/wgpu/pull/9308). +- Writes from `Queue::write_buffer` are now flushed by calls to `Buffer::map_async` for that same buffer, to prevent reading stale data. `on_submitted_work_done` also now flushes pending writes. By @andyleiserson in [#9307](https://github.com/gfx-rs/wgpu/pull/9307). #### naga diff --git a/cts_runner/test.lst b/cts_runner/test.lst index 8c3f0e5eff8..9104b25305c 100644 --- a/cts_runner/test.lst +++ b/cts_runner/test.lst @@ -12,6 +12,8 @@ unittests:* webgpu:api,operation,adapter,requestAdapter:* webgpu:api,operation,buffers,createBindGroup:buffer_binding_resource:* +webgpu:api,operation,buffers,map:mapAsync,read,* +webgpu:api,operation,buffers,map:mapAsync,write,* webgpu:api,operation,command_buffer,basic:* webgpu:api,operation,command_buffer,copyBufferToBuffer:* fails-if(vulkan) webgpu:api,operation,command_buffer,copyTextureToTexture:copy_depth_stencil:format="depth16unorm" diff --git a/tests/tests/wgpu-gpu/buffer.rs b/tests/tests/wgpu-gpu/buffer.rs index 598c4326025..c1c592d0251 100644 --- a/tests/tests/wgpu-gpu/buffer.rs +++ b/tests/tests/wgpu-gpu/buffer.rs @@ -6,6 +6,7 @@ pub fn all_tests(vec: &mut Vec) { vec.extend([ EMPTY_BUFFER, MAP_OFFSET, + MAP_WITHOUT_SUBMIT, MINIMUM_BUFFER_BINDING_SIZE_LAYOUT, MINIMUM_BUFFER_BINDING_SIZE_DISPATCH, CLEAR_OFFSET_OUTSIDE_RESOURCE_BOUNDS, @@ -184,6 +185,48 @@ static MAP_OFFSET: GpuTestConfiguration = GpuTestConfiguration::new() } }); +/// Mapping a buffer should see data previously written to the buffer, even if there was no +/// intervening submit. +/// +/// Regression test for [#5173](https://github.com/gfx-rs/wgpu/issues/5173). +#[gpu_test] +static MAP_WITHOUT_SUBMIT: GpuTestConfiguration = + GpuTestConfiguration::new().run_async(|ctx| async move { + let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 12, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: true, + }); + + { + let data = (0..12).map(|i| (i % 255) as u8).collect::>(); + let mut mapped = buffer.slice(0..12).get_mapped_range_mut().unwrap(); + assert!(mapped.len() == 12); + mapped.copy_from_slice(&data); + } + + buffer.unmap(); + + buffer + .slice(0..12) + .map_async(wgpu::MapMode::Read, Result::unwrap); + + ctx.async_poll(wgpu::PollType::wait_indefinitely()) + .await + .unwrap(); + + { + let mapped = buffer.slice(0..12).get_mapped_range().unwrap(); + assert!(mapped.len() == 12); + for (i, elt) in mapped.iter().enumerate() { + assert_eq!(*elt, (i % 255) as u8); + } + } + + buffer.unmap(); + }); + /// The WebGPU algorithm [validating shader binding][vsb] requires /// implementations to check that buffer bindings are large enough to /// hold the WGSL `storage` or `uniform` variables they're bound to. diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 864803aa107..ab3045b77d7 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -28,6 +28,7 @@ pub mod ray_tracing; pub mod resource; #[cfg(any(feature = "trace", feature = "replay"))] pub mod trace; +pub(crate) use resource::{FenceReadGuard, FenceWriteGuard}; pub use {life::WaitIdleError, resource::Device}; pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 4d41081e832..602a1b107a0 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -24,7 +24,7 @@ use crate::{ CommandAllocator, CommandBuffer, CommandEncoder, CommandEncoderError, CopySide, TransferError, }, - device::{DeviceError, WaitIdleError}, + device::{DeviceError, FenceReadGuard, FenceWriteGuard, WaitIdleError}, get_lowest_common_denom, global::Global, hal_label, @@ -125,8 +125,9 @@ impl Queue { &self, texture: &Arc, ) -> Result<(), DeviceError> { + let snatch_guard = self.device.snatchable_lock.read(); + let (mut submission, _index) = self.allocate_submission(snatch_guard); let device = &self.device; - let snatch_guard = device.snatchable_lock.read(); // If the texture is uninitialized it needs to be cleared before presenting let needs_clear = { @@ -137,8 +138,6 @@ impl Queue { .is_some_and(|mip| mip.check(0..1).is_some()) }; - // Note required lock order: snatch -> fence -> pending writes - let mut fence = device.fence.write(); let mut pending_writes = self.pending_writes.lock(); if needs_clear { @@ -154,7 +153,7 @@ impl Queue { &mut trackers.textures, &device.alignments, device.zero_buffer.as_ref(), - &snatch_guard, + &submission.snatch_guard, device.instance_flags, ) .map_err(|e| match e { @@ -189,7 +188,7 @@ impl Queue { // Emit the transition barriers to PRESENT. { let raw_texture = texture - .try_raw(&snatch_guard) + .try_raw(&submission.snatch_guard) .map_err(|_| DeviceError::Lost)?; let barriers: Vec> = pending .into_iter() @@ -210,23 +209,13 @@ impl Queue { pending_writes.insert_texture(texture); // Flush pending writes through the standard submission path. - let mut surface_textures = FastHashMap::default(); - surface_textures.insert(Arc::as_ptr(texture), texture.clone()); + submission + .surface_textures + .insert(Arc::as_ptr(texture), texture.clone()); - let submit_index = { - let mut indices = device.command_indices.write(); - indices.active_submission_index += 1; - indices.active_submission_index - }; + submission.submit(pending_writes)?; - self.submit_with_pending_writes( - pending_writes, - Vec::new(), - surface_textures, - fence.as_mut(), - submit_index, - &snatch_guard, - ) + Ok(()) } pub(crate) fn maintain( @@ -359,9 +348,13 @@ pub(crate) struct EncoderInFlight { /// /// Instead, `Device::pending_writes` owns one of these values, which /// has its own hal command encoder and resource lists. The commands -/// accumulated here are automatically submitted to the queue the next -/// time the user submits a wgpu command buffer, ahead of the user's -/// commands. +/// accumulated here are automatically submitted to the queue at the +/// sooner of: +/// +/// 1. The user's next wgpu command buffer submission. (Pending writes +/// are inserted ahead of the user's commands.) +/// 2. The next `mapAsync` request for a buffer that has pending +/// writes. /// /// Important: /// When locking pending_writes be sure that tracker is not locked @@ -541,8 +534,6 @@ pub enum QueueSubmitError { Queue(#[from] DeviceError), #[error(transparent)] DestroyedResource(#[from] DestroyedResourceError), - #[error(transparent)] - Unmap(#[from] BufferAccessError), #[error("{0} is still mapped")] BufferStillMapped(ResourceErrorIdent), #[error(transparent)] @@ -557,7 +548,6 @@ impl WebGpuError for QueueSubmitError { fn webgpu_error_type(&self) -> ErrorType { match self { Self::Queue(e) => e.webgpu_error_type(), - Self::Unmap(e) => e.webgpu_error_type(), Self::CommandEncoder(e) => e.webgpu_error_type(), Self::ValidateAsActionsError(e) => e.webgpu_error_type(), Self::InvalidResource(e) => e.webgpu_error_type(), @@ -566,6 +556,41 @@ impl WebGpuError for QueueSubmitError { } } +/// A partially-assembled submission. +/// +/// Returned from [`Queue::allocate_submission`] and consumed by [`submit`]. +/// These are internal APIs used in `Queue::submit` and other places within +/// `wgpu-core` that need to submit work. +/// +/// [`submit`]: `PendingSubmission::submit` +pub(crate) struct PendingSubmission<'a> { + queue: &'a Queue, + snatch_guard: SnatchGuard<'a>, + fence: FenceWriteGuard<'a>, + command_index_guard: RwLockWriteGuard<'a, CommandIndices>, + // Command buffers to be executed, along with trackers for the resources they use. + pub executions: Vec, + // Surface textures referenced by command buffers in this submission. These need to be + // passed to the HAL `submit` call. Deduplicated using a hashmap to avoid vulkan + // deadlocking from the same surface texture being submitted multiple times. + pub surface_textures: FastHashMap<*const Texture, Arc>, + pub index: SubmissionIndex, +} + +pub(crate) struct SubmissionResult<'a> { + pub fence: FenceReadGuard<'a>, + pub snatch_guard: SnatchGuard<'a>, +} + +impl<'a> PendingSubmission<'a> { + fn submit( + self, + pending_writes: MutexGuard<'a, PendingWrites>, + ) -> Result, DeviceError> { + self.queue.submit_pending_submission(pending_writes, self) + } +} + //TODO: move out common parts of write_xxx. impl Queue { @@ -1222,6 +1247,36 @@ impl Queue { Ok(()) } + /// Flush `PendingWrites` if it contains a write to `buffer`. + pub fn flush_writes_for_buffer( + &self, + buffer: &Arc, + snatch_guard: SnatchGuard, + ) -> Result<(), BufferAccessError> { + let (submission, _index) = self.allocate_submission(snatch_guard); + + let pending_writes = self.pending_writes.lock(); + if !pending_writes.contains_buffer(buffer) { + return Ok(()); + } + + submission.submit(pending_writes)?; + + Ok(()) + } + + fn flush_pending_writes(&self) -> Result, DeviceError> { + let snatch_guard = self.device.snatchable_lock.read(); + let (submission, submit_index) = self.allocate_submission(snatch_guard); + let pending_writes = self.pending_writes.lock(); + if pending_writes.is_recording { + submission.submit(pending_writes)?; + Ok(Some(submit_index)) + } else { + Ok(None) + } + } + #[cfg(feature = "trace")] fn trace_submission( &self, @@ -1256,30 +1311,16 @@ impl Queue { profiling::scope!("Queue::submit"); api_log!("Queue::submit"); - let submit_index; + let snatch_guard = self.device.snatchable_lock.read(); + let (mut submission, submit_index) = self.allocate_submission(snatch_guard); let res = 'error: { - let snatch_guard = self.device.snatchable_lock.read(); - - // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks. - let mut fence = self.device.fence.write(); - - let mut command_index_guard = self.device.command_indices.write(); - command_index_guard.active_submission_index += 1; - submit_index = command_index_guard.active_submission_index; - if let Err(e) = self.device.check_is_valid() { break 'error Err(e.into()); } - let mut active_executions = Vec::new(); - let mut used_surface_textures = track::TextureUsageScope::default(); - // Use a hashmap here to deduplicate the surface textures that are used in the command buffers. - // This avoids vulkan deadlocking from the same surface texture being submitted multiple times. - let mut submit_surface_textures_owned = FastHashMap::default(); - { if !command_buffers.is_empty() { profiling::scope!("prepare"); @@ -1319,10 +1360,10 @@ impl Queue { command_buffer, self, &cmd_buf_data, - &snatch_guard, - &mut submit_surface_textures_owned, + &submission.snatch_guard, + &mut submission.surface_textures, &mut used_surface_textures, - &mut command_index_guard, + &mut submission.command_index_guard, ); if let Err(err) = res { #[cfg(feature = "trace")] @@ -1340,7 +1381,9 @@ impl Queue { self.trace_submission(submit_index, commands); } - cmd_buf_data.set_acceleration_structure_dependencies(&snatch_guard); + cmd_buf_data.set_acceleration_structure_dependencies( + &submission.snatch_guard, + ); cmd_buf_data.into_baked_commands() } Err(err) => { @@ -1365,14 +1408,15 @@ impl Queue { //Note: locking the trackers has to be done after the storages let mut trackers = self.device.trackers.lock(); - if let Err(e) = baked.initialize_buffer_memory(&mut trackers, &snatch_guard) + if let Err(e) = + baked.initialize_buffer_memory(&mut trackers, &submission.snatch_guard) { break 'error Err(e.into()); } if let Err(e) = baked.initialize_texture_memory( &mut trackers, &self.device, - &snatch_guard, + &submission.snatch_guard, ) { break 'error Err(e.into()); } @@ -1383,7 +1427,7 @@ impl Queue { baked.encoder.raw.as_mut(), &mut trackers, &baked.trackers, - &snatch_guard, + &submission.snatch_guard, ); if let Err(e) = baked.encoder.close_and_push_front() { @@ -1404,7 +1448,7 @@ impl Queue { .textures .set_from_usage_scope_and_drain_transitions( &used_surface_textures, - &snatch_guard, + &submission.snatch_guard, ) .collect::>(); unsafe { @@ -1417,7 +1461,7 @@ impl Queue { } // done - active_executions.push(EncoderInFlight { + submission.executions.push(EncoderInFlight { inner: baked.encoder, trackers: baked.trackers, temp_resources: baked.temp_resources, @@ -1437,27 +1481,22 @@ impl Queue { let pending_writes = self.pending_writes.lock(); - if let Err(e) = self.submit_with_pending_writes( - pending_writes, - active_executions, - submit_surface_textures_owned, - fence.as_mut(), - submit_index, - &snatch_guard, - ) { - break 'error Err(e.into()); - } - - drop(command_index_guard); + let SubmissionResult { + fence, + snatch_guard, + } = match submission.submit(pending_writes) { + Ok(result) => result, + Err(e) => break 'error Err(e.into()), + }; profiling::scope!("cleanup"); // This will schedule destruction of all resources that are no longer needed // by the user but used in the command stream, among other things. - let fence_guard = RwLockWriteGuard::downgrade(fence); - let (closures, result) = - self.device - .maintain(fence_guard, wgt::PollType::Poll, snatch_guard); + // `device.maintain` consumes and will release the snatch guard. + let (closures, result) = self + .device + .maintain(fence, wgt::PollType::Poll, snatch_guard); match result { Ok(status) => { debug_assert!(matches!( @@ -1490,22 +1529,88 @@ impl Queue { Ok(submit_index) } - /// Flush pending writes and any additional command encoders as a HAL submission. + /// Allocate a submission index and prepare for a submission. + /// + /// This is an internal API used in [`Queue::submit`] and other places within + /// `wgpu-core` that need to submit work. + /// + /// Returns the index and a [`PendingSubmission`]. + /// + /// The caller passes in the already-acquired [`SnatchGuard`]. This function acquires + /// the fence lock and the command index lock. + /// + /// The caller should update the [`PendingSubmission`] members `executions` and + /// `surface_textures` with details of the submission. + /// + /// To finalize and submit the submission, call [`PendingSubmission::submit`] (which is + /// a convenience wrapper around [`Queue::submit_pending_submission`]). + /// + /// After calling this function and before submitting, the caller must acquire the + /// pending writes lock, and pass it to `submit`. + /// + /// It is also acceptable to drop the `PendingSubmission` without submitting. This may + /// be necessary when locks are required to access the state that determines whether a + /// submission is needed. + fn allocate_submission<'a>( + &'a self, + snatch_guard: SnatchGuard<'a>, + ) -> (PendingSubmission<'a>, SubmissionIndex) { + // Lock ordering requires that the fence lock be acquired after the snatch lock and + // before the command index lock. + let fence = self.device.fence.write(); + + let mut command_index_guard = self.device.command_indices.write(); + command_index_guard.active_submission_index += 1; + let index = command_index_guard.active_submission_index; + + let submission = PendingSubmission { + queue: self, + snatch_guard, + fence, + command_index_guard, + executions: Vec::new(), + surface_textures: FastHashMap::default(), + index, + }; + + (submission, index) + } + + /// Finalize and submit a [`PendingSubmission`] that was returned by + /// [`Queue::allocate_submission`]. + /// + /// This is an internal API used in `Queue::submit` and other places within + /// `wgpu-core` that need to submit work. See [`Queue::allocate_submission`] + /// for more details. + /// + /// This function: /// - /// Advances `last_successful_submission_index` and registers the submission with the lifetime tracker. - fn submit_with_pending_writes( + /// - Performs a HAL submission of the pending writes command + /// encoder and any other command encoders that were added to the + /// [`PendingSubmission`]. + /// - Advances `last_successful_submission_index` and registers the + /// submission with the lifetime tracker. + /// - Returns a [`SubmissionResult`], which contains the snatch guard + /// and a downgraded [`FenceReadGuard`]. + fn submit_pending_submission<'a>( &self, mut pending_writes: MutexGuard<'_, PendingWrites>, - mut active_executions: Vec, - mut surface_textures: FastHashMap<*const Texture, Arc>, - fence: &mut dyn hal::DynFence, - submit_index: SubmissionIndex, - snatch_guard: &SnatchGuard, - ) -> Result<(), DeviceError> { + prepared: PendingSubmission<'a>, + ) -> Result, DeviceError> { + let PendingSubmission { + queue: _, + snatch_guard, + mut fence, + command_index_guard, + mut executions, + mut surface_textures, + index: submit_index, + } = prepared; + let mut used_surface_textures = track::TextureUsageScope::default(); used_surface_textures.set_size(self.device.tracker_indices.textures.size()); for texture in pending_writes.dst_textures.values() { - match texture.try_inner(snatch_guard) { + match texture.try_inner(&snatch_guard) { Ok(TextureInner::Native { .. }) => {} Ok(TextureInner::Surface { .. }) => { // Compare the Arcs by pointer as Textures don't implement Eq @@ -1530,7 +1635,7 @@ impl Queue { let texture_barriers = trackers .textures - .set_from_usage_scope_and_drain_transitions(&used_surface_textures, snatch_guard) + .set_from_usage_scope_and_drain_transitions(&used_surface_textures, &snatch_guard) .collect::>(); unsafe { pending_writes @@ -1541,12 +1646,12 @@ impl Queue { match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) { Ok(Some(pending_execution)) => { - active_executions.insert(0, pending_execution); + executions.insert(0, pending_execution); } Ok(None) => {} Err(e) => return Err(e), } - let hal_command_buffers = active_executions + let hal_command_buffers = executions .iter() .flat_map(|e| e.inner.list.iter().map(|b| b.as_ref())) .collect::>(); @@ -1555,7 +1660,7 @@ impl Queue { let mut submit_surface_textures = SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(surface_textures.len()); for texture in surface_textures.values() { - let raw = match texture.inner.get(snatch_guard) { + let raw = match texture.inner.get(&snatch_guard) { Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), _ => unreachable!(), }; @@ -1566,24 +1671,30 @@ impl Queue { self.raw().submit( &hal_command_buffers, &submit_surface_textures, - (fence, submit_index), + (fence.as_mut(), submit_index), ) } .map_err(|e| self.device.handle_hal_error(e))?; + // Submissions must have strictly increasing indices, so we must hold the + // command index guard until we have submitted, to prevent another submission + // from claiming the next index and reaching `submit` before we do. + drop(pending_writes); + drop(command_index_guard); + // Advance the successful submission index. self.device .last_successful_submission_index .fetch_max(submit_index, Ordering::SeqCst); } - drop(pending_writes); - // this will register the new submission to the life time tracker - self.lock_life() - .track_submission(submit_index, active_executions); + self.lock_life().track_submission(submit_index, executions); - Ok(()) + Ok(SubmissionResult { + fence: RwLockWriteGuard::downgrade(fence), + snatch_guard, + }) } pub fn get_timestamp_period(&self) -> f32 { @@ -1596,7 +1707,12 @@ impl Queue { closure: SubmittedWorkDoneClosure, ) -> Option { api_log!("Queue::on_submitted_work_done"); - //TODO: flush pending writes + + // A `DeviceError` means we're losing the device anyways, so we can ignore it here + // (mostly to avoid a breaking change to the `on_submitted_work_done` signature + // for an error case that it is unlikely the caller will be able to handle). + let _: Result<_, DeviceError> = self.flush_pending_writes(); + self.lock_life().add_work_done_closure(closure) } @@ -1834,7 +1950,6 @@ impl Global { ) -> SubmissionIndex { api_log!("Queue::on_submitted_work_done {queue_id:?}"); - //TODO: flush pending writes let queue = self.hub.queues.get(queue_id); let result = queue.on_submitted_work_done(closure); result.unwrap_or(0) // '0' means no wait is necessary @@ -1895,7 +2010,7 @@ fn validate_command_buffer( queue: &Queue, cmd_buf_data: &crate::command::CommandBufferMutable, snatch_guard: &SnatchGuard, - submit_surface_textures_owned: &mut FastHashMap<*const Texture, Arc>, + surface_textures: &mut FastHashMap<*const Texture, Arc>, used_surface_textures: &mut track::TextureUsageScope, command_index_guard: &mut RwLockWriteGuard, ) -> Result<(), QueueSubmitError> { @@ -1922,7 +2037,7 @@ fn validate_command_buffer( TextureInner::Native { .. } => false, TextureInner::Surface { .. } => { // Compare the Arcs by pointer as Textures don't implement Eq. - submit_surface_textures_owned.insert(Arc::as_ptr(texture), texture.clone()); + surface_textures.insert(Arc::as_ptr(texture), texture.clone()); true } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index b8809a41e7b..67f03bf9135 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -41,7 +41,7 @@ use crate::{ TextureInitTrackerAction, }, instance::{Adapter, RequestDeviceError}, - lock::{rank, Mutex, RwLock}, + lock::{rank, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}, pipeline::{self, ColorStateError}, pool::ResourcePool, present, @@ -288,6 +288,9 @@ pub struct Device { pub(crate) trace: Mutex>>, } +pub(crate) type FenceReadGuard<'a> = RwLockReadGuard<'a, ManuallyDrop>>; +pub(crate) type FenceWriteGuard<'a> = RwLockWriteGuard<'a, ManuallyDrop>>; + pub(crate) enum DeferredDestroy { TextureViews(WeakVec), BindGroups(WeakVec), @@ -826,7 +829,7 @@ impl Device { /// if there was a timeout or a validation error. pub(crate) fn maintain<'this>( &'this self, - fence: crate::lock::RwLockReadGuard>>, + fence: FenceReadGuard<'_>, poll_type: wgt::PollType, snatch_guard: SnatchGuard, ) -> (UserClosures, Result) { diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 682c59b54a8..aa672dc2661 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -629,9 +629,8 @@ impl Buffer { /// with the error, and the caller is responsible for calling the /// callback. /// - /// A return value of `Ok(0)` roughly means that no wait is necessary, - /// but it does not necessarily mean that the buffer has already been - /// mapped. + /// A return value of `Ok(0)` means that mapping does not need to wait on the queue, but + /// it does not mean that the buffer has already been mapped. fn try_map_async( self: &Arc, offset: wgt::BufferAddress, @@ -693,50 +692,79 @@ impl Buffer { return Err((op, e.into())); } - { + let submit_index = { let snatch_guard = device.snatchable_lock.read(); if let Err(e) = self.check_destroyed(&snatch_guard) { return Err((op, e.into())); } - } - { - let map_state = &mut *self.map_state.lock(); - *map_state = match *map_state { - BufferMapState::Init { .. } | BufferMapState::Active { .. } => { - return Err((op, BufferAccessError::AlreadyMapped)); - } - BufferMapState::Waiting(_) => { - return Err((op, BufferAccessError::MapAlreadyPending)); + { + let map_state = &mut *self.map_state.lock(); + *map_state = match *map_state { + BufferMapState::Init { .. } | BufferMapState::Active { .. } => { + return Err((op, BufferAccessError::AlreadyMapped)); + } + BufferMapState::Waiting(_) => { + return Err((op, BufferAccessError::MapAlreadyPending)); + } + BufferMapState::Idle => BufferMapState::Waiting(BufferPendingMapping { + range: offset..end_offset, + op, + _parent_buffer: self.clone(), + }), + }; + } + + if let Some(queue) = device.get_queue().as_ref() { + match queue.flush_writes_for_buffer(self, snatch_guard) { + Err(err) => { + let state = mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle); + let BufferMapState::Waiting(BufferPendingMapping { op, .. }) = state else { + unreachable!(); + }; + return Err((op, err)); + } + Ok(()) => { + // Schedule the buffer map in the lifetime tracker. + // + // This call searches for use of the buffer by pending submissions. + // If we just flushed pending writes, that search is redundant; we + // already know that mapping needs to wait for the latest submission + // and could implement a special case to directly attach it to that + // submission. However, the queue is searched in reverse, so finding + // that the buffer is used by the latest submission will be fast. + Some(queue.lock_life().map(self).unwrap_or(0)) + } } - BufferMapState::Idle => BufferMapState::Waiting(BufferPendingMapping { - range: offset..end_offset, - op, - _parent_buffer: self.clone(), - }), - }; - } + } else { + None + } + }; - // TODO: we are ignoring the transition here, I think we need to add a barrier - // at the end of the submission + // At this point, `submit_index` is: + // - `Some(index)`, if there is a submission the mapping operation must wait for. + // - `Some(0)`, if we have a queue and there is no submission to wait for. + // - `None`, if we don't have a queue. + // + // TODO(https://github.com/gfx-rs/wgpu/issues/9306): we are ignoring the transition + // here, I think we need to add a barrier at the end of the submission device .trackers .lock() .buffers .set_single(self, internal_use); - let submit_index = if let Some(queue) = device.get_queue() { - queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary + if let Some(index) = submit_index { + Ok(index) } else { + // We don't have a queue, so go ahead and map the buffer. // We can safely unwrap below since we just set the `map_state` to `BufferMapState::Waiting`. let (mut operation, status) = self.map(&device.snatchable_lock.read()).unwrap(); if let Some(callback) = operation.callback.take() { callback(status); } - 0 - }; - - Ok(submit_index) + Ok(0) + } } pub fn get_mapped_range( @@ -820,6 +848,7 @@ impl Buffer { } } /// This function returns [`None`] only if [`Self::map_state`] is not [`BufferMapState::Waiting`]. + /// Other errors are returned within `BufferMapPendingClosure`. #[must_use] pub(crate) fn map(&self, snatch_guard: &SnatchGuard) -> Option { // This _cannot_ be inlined into the match. If it is, the lock will be held