diff --git a/CHANGELOG.md b/CHANGELOG.md index 27e60624b32..76ae2c424ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,7 @@ Bottom level categories: #### General +- `SurfaceTexture::present()` has been replaced by `Queue::present(surface_texture)`. By @inner-daemons and @atlv24 in [#9361](https://github.com/gfx-rs/wgpu/pull/9361). - `Features::CLIP_DISTANCE`, `naga::Capabilities::CLIP_DISTANCE`, and `naga::BuiltIn::ClipDistance` have been renamed to `CLIP_DISTANCES` and `ClipDistances` (viz., pluralized) as appropriate, to match the WebGPU spec. By @ErichDonGubler in [#9267](https://github.com/gfx-rs/wgpu/pull/9267). - Added more granular limits for mesh shaders. By @inner-daemons in [#8739](https://github.com/gfx-rs/wgpu/pull/8739). - Added new `InvalidWorkgroupSizeError`, which is now used by `DrawError::InvalidGroupSize` and `StageError::InvalidWorkgroupSize`. By @andyleiserson in [#9357](https://github.com/gfx-rs/wgpu/pull/9357). @@ -99,6 +100,7 @@ Bottom level categories: #### General +- Fix `SYNC-HAZARD-WRITE-AFTER-PRESENT` on Vulkan when a surface texture is presented without being rendered to. By @inner-daemons and @atlv24 in [#9361](https://github.com/gfx-rs/wgpu/pull/9361). - Fix incorrect checks for dynamic binding bounds when calling an encoder's `set_bind_group` in passes and bundles. By @ErichDonGubler in [#9308](https://github.com/gfx-rs/wgpu/pull/9308). #### naga diff --git a/Cargo.lock b/Cargo.lock index c91689ee892..c3d03ce5ee6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4693,6 +4693,16 @@ dependencies = [ "winit", ] +[[package]] +name = "wgpu-bug-repro-02-present-bugs" +version = "0.0.0" +dependencies = [ + "env_logger", + "pollster", + "wgpu", + "winit", +] + [[package]] name = "wgpu-core" version = "29.0.0" diff --git a/examples/bug-repro/01_texture_atomic_bug/src/main.rs b/examples/bug-repro/01_texture_atomic_bug/src/main.rs index 3d93df4eea3..1f9263b4d0c 100644 --- a/examples/bug-repro/01_texture_atomic_bug/src/main.rs +++ b/examples/bug-repro/01_texture_atomic_bug/src/main.rs @@ -340,6 +340,6 @@ impl State { } self.queue.submit([enc.finish()]); - frame.present(); + self.queue.present(frame); } } diff --git a/examples/bug-repro/02_present_bugs/Cargo.toml b/examples/bug-repro/02_present_bugs/Cargo.toml new file mode 100644 index 00000000000..72c9c50259c --- /dev/null +++ b/examples/bug-repro/02_present_bugs/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wgpu-bug-repro-02-present-bugs" +edition = "2021" +rust-version = "1.87" +publish = false + +[dependencies] +env_logger = "0.11" +pollster = "0.4" +wgpu.workspace = true +winit = "0.30.8" diff --git a/examples/bug-repro/02_present_bugs/src/main.rs b/examples/bug-repro/02_present_bugs/src/main.rs new file mode 100644 index 00000000000..191ce86ce5d --- /dev/null +++ b/examples/bug-repro/02_present_bugs/src/main.rs @@ -0,0 +1,145 @@ +//! Repro for various issues with queue presentation +//! +//! The 2 current bugs being tested are presentation after no usage of surface texture +//! and queue destruction immediately after present + +use std::sync::Arc; + +use winit::application::ApplicationHandler; +use winit::event::WindowEvent; +use winit::event_loop::{ActiveEventLoop, EventLoop}; +use winit::window::{Window, WindowId}; + +fn main() { + env_logger::init(); + let event_loop = EventLoop::new().unwrap(); + event_loop.set_control_flow(winit::event_loop::ControlFlow::Poll); + let mut app = App::default(); + event_loop.run_app(&mut app).unwrap(); +} + +#[derive(Default)] +struct App { + state: Option, +} + +struct State { + window: Arc, + instance: wgpu::Instance, + device: wgpu::Device, + queue: Option, + surface: wgpu::Surface<'static>, + surface_config: wgpu::SurfaceConfiguration, +} + +impl ApplicationHandler for App { + fn resumed(&mut self, event_loop: &ActiveEventLoop) { + if self.state.is_some() { + return; + } + let window = Arc::new( + event_loop + .create_window(Window::default_attributes().with_title("Presentation bugs")) + .unwrap(), + ); + self.state = Some(State::new(window)); + } + + fn window_event( + &mut self, + event_loop: &ActiveEventLoop, + _window_id: WindowId, + event: WindowEvent, + ) { + let Some(state) = &mut self.state else { return }; + match event { + WindowEvent::CloseRequested => event_loop.exit(), + WindowEvent::Resized(size) if size.width > 0 && size.height > 0 => { + state.surface_config.width = size.width; + state.surface_config.height = size.height; + state + .surface + .configure(&state.device, &state.surface_config); + } + WindowEvent::RedrawRequested => { + let frame = match state.surface.get_current_texture() { + wgpu::CurrentSurfaceTexture::Success(f) => f, + wgpu::CurrentSurfaceTexture::Suboptimal(_) + | wgpu::CurrentSurfaceTexture::Outdated => { + state + .surface + .configure(&state.device, &state.surface_config); + return; + } + wgpu::CurrentSurfaceTexture::Lost => { + state.surface = + state.instance.create_surface(state.window.clone()).unwrap(); + state + .surface + .configure(&state.device, &state.surface_config); + return; + } + _ => return, + }; + let Some(queue) = state.queue.take() else { + return; + }; + // Immediately present the surface texture (with nothing on it) and then drop the queue, which should cause a full wait. + queue.present(frame); + event_loop.exit(); + } + _ => {} + } + } + + fn about_to_wait(&mut self, _event_loop: &ActiveEventLoop) { + if let Some(state) = &self.state { + state.window.request_redraw(); + } + } +} + +impl State { + fn new(window: Arc) -> Self { + let size = window.inner_size(); + let width = size.width.max(1); + let height = size.height.max(1); + + let mut instance_desc = wgpu::InstanceDescriptor::new_without_display_handle_from_env(); + instance_desc.flags |= wgpu::InstanceFlags::advanced_debugging(); + let instance = wgpu::Instance::new(instance_desc); + let surface = instance.create_surface(window.clone()).unwrap(); + let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { + compatible_surface: Some(&surface), + ..Default::default() + })) + .expect("No adapter"); + + println!("Adapter: {:?}", adapter.get_info().name); + + let (device, queue) = + pollster::block_on(adapter.request_device(&Default::default())).unwrap(); + + let surface_format = surface.get_capabilities(&adapter).formats[0]; + let surface_config = wgpu::SurfaceConfiguration { + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + format: surface_format, + width, + height, + present_mode: wgpu::PresentMode::AutoVsync, + alpha_mode: wgpu::CompositeAlphaMode::Auto, + view_formats: vec![], + desired_maximum_frame_latency: 2, + }; + surface.configure(&device, &surface_config); + + State { + window, + instance, + device, + queue: Some(queue), + surface, + surface_config, + } + } +} diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index a3c662e7516..27b9a0118b4 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -557,7 +557,7 @@ impl ApplicationHandler for App { if let Some(window) = &self.window { window.pre_present_notify(); } - frame.present(); + context.queue.present(frame); } if let Some(window) = &self.window { diff --git a/examples/features/src/hello_triangle/mod.rs b/examples/features/src/hello_triangle/mod.rs index 71cf19e6aeb..6d6c5bb85d8 100644 --- a/examples/features/src/hello_triangle/mod.rs +++ b/examples/features/src/hello_triangle/mod.rs @@ -300,7 +300,7 @@ impl ApplicationHandler for App { if let Some(window) = &self.window { window.pre_present_notify(); } - frame.present(); + wgpu_state.queue.present(frame); } WindowEvent::Occluded(is_occluded) => { if !is_occluded { diff --git a/examples/features/src/hello_windows/mod.rs b/examples/features/src/hello_windows/mod.rs index 825b3e3dd90..9d8c5f4cf6b 100644 --- a/examples/features/src/hello_windows/mod.rs +++ b/examples/features/src/hello_windows/mod.rs @@ -255,7 +255,7 @@ impl ApplicationHandler for App { queue.submit(Some(encoder.finish())); viewport.desc.window.pre_present_notify(); - frame.present(); + queue.present(frame); } } WindowEvent::Occluded(is_occluded) => { diff --git a/examples/features/src/uniform_values/mod.rs b/examples/features/src/uniform_values/mod.rs index 672468deb1f..d9d5def4d0f 100644 --- a/examples/features/src/uniform_values/mod.rs +++ b/examples/features/src/uniform_values/mod.rs @@ -481,7 +481,7 @@ impl ApplicationHandler for App { if let Some(window) = &self.window { window.pre_present_notify(); } - frame.present(); + wgpu_ctx.queue.present(frame); } WindowEvent::Occluded(is_occluded) => { if !is_occluded { diff --git a/examples/standalone/02_hello_window/src/main.rs b/examples/standalone/02_hello_window/src/main.rs index 1b5f6db43ca..a0326a4314b 100644 --- a/examples/standalone/02_hello_window/src/main.rs +++ b/examples/standalone/02_hello_window/src/main.rs @@ -141,7 +141,7 @@ impl State { // Submit the command in the queue to execute self.queue.submit([encoder.finish()]); self.window.pre_present_notify(); - surface_texture.present(); + self.queue.present(surface_texture); } } diff --git a/examples/standalone/custom_backend/src/custom.rs b/examples/standalone/custom_backend/src/custom.rs index 676158e83fb..a643ee8bd4b 100644 --- a/examples/standalone/custom_backend/src/custom.rs +++ b/examples/standalone/custom_backend/src/custom.rs @@ -380,6 +380,10 @@ impl QueueInterface for CustomQueue { fn compact_blas(&self, _blas: &DispatchBlas) -> (Option, DispatchBlas) { unimplemented!() } + + fn present(&self, _detail: &wgpu::custom::DispatchSurfaceOutputDetail) { + unimplemented!() + } } #[derive(Debug)] diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 9aed35e9704..a66ff71bea7 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -117,6 +117,119 @@ impl Queue { self.life_tracker.lock() } + /// Ensure the surface texture is in the PRESENT state, clearing it if it was never rendered to. + /// Submits any necessary work to the GPU before the HAL present call. + /// + /// See + pub(crate) fn prepare_surface_texture_for_present( + &self, + texture: &Arc, + ) -> Result<(), DeviceError> { + let device = &self.device; + let snatch_guard = device.snatchable_lock.read(); + + // If the texture is uninitialized it needs to be cleared before presenting + let needs_clear = { + let status = texture.initialization_status.read(); + status + .mips + .first() + .is_some_and(|mip| mip.check(0..1).is_some()) + }; + + // Fence lock must be acquired after the snatch lock and before + // pending_writes to match the lock ordering in Queue::submit. + let mut fence = device.fence.write(); + let mut pending_writes = self.pending_writes.lock(); + + if needs_clear { + let encoder = pending_writes.activate(); + let mut trackers = device.trackers.lock(); + crate::command::clear_texture( + texture, + TextureInitRange { + mip_range: 0..1, + layer_range: 0..1, + }, + encoder, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref(), + &snatch_guard, + device.instance_flags, + ) + .map_err(|e| match e { + ClearError::Device(e) => e, + _ => DeviceError::Lost, + })?; + texture.initialization_status.write().mips[0].drain(0..1); + } + + // Transition the texture to PRESENT in the device tracker. + // If it's already in PRESENT, this produces no barriers and we can skip the submission. + // + // This has to be after any clear_texture call because clear_texture modifies the tracker state internally. + // Computing transitions afterward ensures they reflect the actual current state. + let pending = { + let mut trackers = device.trackers.lock(); + let pending: Vec> = trackers + .textures + .set_single( + texture, + texture.full_range.clone(), + wgt::TextureUses::PRESENT, + ) + .collect(); + pending + }; + + if pending.is_empty() { + return Ok(()); + } + + // Emit the transition barriers to PRESENT. + { + let raw_texture = texture + .try_raw(&snatch_guard) + .map_err(|_| DeviceError::Lost)?; + let barriers: Vec> = pending + .into_iter() + .map(|pt| pt.into_hal(raw_texture)) + .collect(); + + let encoder = pending_writes.activate(); + // SAFETY: + // - The encoder is in the recording state after `activate()` + // - The texture is kept alive by the Arc from `acquired_texture` + unsafe { + encoder.transition_textures(&barriers); + } + } + + // Keep the texture alive in the submission so its clear_view isn't + // destroyed before the GPU finishes the submitted commands. + pending_writes.insert_texture(texture); + + // Flush pending writes through the standard submission path. + let mut surface_textures = FastHashMap::default(); + surface_textures.insert(Arc::as_ptr(texture), texture.clone()); + + let submit_index = { + let mut indices = device.command_indices.write(); + indices.active_submission_index += 1; + indices.active_submission_index + }; + + self.submit_with_pending_writes( + &mut pending_writes, + Vec::new(), + surface_textures, + fence.as_mut(), + submit_index, + &snatch_guard, + ) + } + pub(crate) fn maintain( &self, submission_index: u64, @@ -158,81 +271,23 @@ impl Drop for Queue { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); + // On Vulkan, pending presents are not tracked by fences. + // wait_for_idle covers both fence-tracked submissions and pending presents. + match unsafe { self.raw.wait_for_idle() } { + Ok(()) => {} + Err(hal::DeviceError::Lost) => { + self.device.handle_hal_error(hal::DeviceError::Lost); + } + Err(e) => { + panic!("Unexpected error while waiting for queue idle on drop: {e:?}"); + } + } + let last_successful_submission_index = self .device .last_successful_submission_index .load(Ordering::Acquire); - let fence = self.device.fence.read(); - - // Try waiting on the last submission using the following sequence of timeouts - let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200]; - - for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() { - let is_last_iter = i == timeouts_in_ms.len() - 1; - - api_log!( - "Waiting on last submission. try: {}/{}. timeout: {}ms", - i + 1, - timeouts_in_ms.len(), - timeout_ms - ); - - let wait_res = unsafe { - self.device.raw().wait( - fence.as_ref(), - last_successful_submission_index, - #[cfg(not(target_arch = "wasm32"))] - Some(core::time::Duration::from_millis(timeout_ms)), - #[cfg(target_arch = "wasm32")] - Some(core::time::Duration::ZERO), // WebKit and Chromium don't support a non-0 timeout - ) - }; - // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU). - match wait_res { - Ok(true) => break, - Ok(false) => { - // It's fine that we timed out on WebGL; GL objects can be deleted early as they - // will be kept around by the driver if GPU work hasn't finished. - // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other - // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands. - // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and - // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538. - #[cfg(target_arch = "wasm32")] - { - break; - } - #[cfg(not(target_arch = "wasm32"))] - { - if is_last_iter { - panic!( - "We timed out while waiting on the last successful submission to complete!" - ); - } - } - } - Err(e) => match e { - hal::DeviceError::OutOfMemory => { - if is_last_iter { - panic!( - "We ran into an OOM error while waiting on the last successful submission to complete!" - ); - } - } - hal::DeviceError::Lost => { - self.device.handle_hal_error(e); // will lose the device - break; - } - hal::DeviceError::Unexpected => { - panic!( - "We ran into an unexpected error while waiting on the last successful submission to complete!" - ); - } - }, - } - } - drop(fence); - let snatch_guard = self.device.snatchable_lock.read(); let (submission_closures, mapping_closures, blas_compact_ready_closures, queue_empty) = self.maintain(last_successful_submission_index, &snatch_guard); @@ -1383,101 +1438,23 @@ impl Queue { let mut pending_writes = self.pending_writes.lock(); - { - used_surface_textures.set_size(self.device.tracker_indices.textures.size()); - for texture in pending_writes.dst_textures.values() { - match texture.try_inner(&snatch_guard) { - Ok(TextureInner::Native { .. }) => {} - Ok(TextureInner::Surface { .. }) => { - // Compare the Arcs by pointer as Textures don't implement Eq - submit_surface_textures_owned - .insert(Arc::as_ptr(texture), texture.clone()); - - unsafe { - used_surface_textures - .merge_single(texture, None, wgt::TextureUses::PRESENT) - .unwrap() - }; - } - // The texture must not have been destroyed when its usage here was - // encoded. If it was destroyed after that, then it was transferred - // to `pending_writes.temp_resources` at the time of destruction, so - // we are still okay to use it. - Err(DestroyedResourceError(_)) => {} - } - } - - if !used_surface_textures.is_empty() { - let mut trackers = self.device.trackers.lock(); - - let texture_barriers = trackers - .textures - .set_from_usage_scope_and_drain_transitions( - &used_surface_textures, - &snatch_guard, - ) - .collect::>(); - unsafe { - pending_writes - .command_encoder - .transition_textures(&texture_barriers); - }; - } - } - - match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) { - Ok(Some(pending_execution)) => { - active_executions.insert(0, pending_execution); - } - Ok(None) => {} - Err(e) => break 'error Err(e.into()), + if let Err(e) = self.submit_with_pending_writes( + &mut pending_writes, + active_executions, + submit_surface_textures_owned, + fence.as_mut(), + submit_index, + &snatch_guard, + ) { + break 'error Err(e.into()); } - let hal_command_buffers = active_executions - .iter() - .flat_map(|e| e.inner.list.iter().map(|b| b.as_ref())) - .collect::>(); - { - let mut submit_surface_textures = - SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity( - submit_surface_textures_owned.len(), - ); - - for texture in submit_surface_textures_owned.values() { - let raw = match texture.inner.get(&snatch_guard) { - Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), - _ => unreachable!(), - }; - submit_surface_textures.push(raw); - } + drop(command_index_guard); - if let Err(e) = unsafe { - self.raw().submit( - &hal_command_buffers, - &submit_surface_textures, - (fence.as_mut(), submit_index), - ) - } - .map_err(|e| self.device.handle_hal_error(e)) - { - break 'error Err(e.into()); - } - - drop(command_index_guard); - - // Advance the successful submission index. - self.device - .last_successful_submission_index - .fetch_max(submit_index, Ordering::SeqCst); - } + drop(pending_writes); profiling::scope!("cleanup"); - // this will register the new submission to the life time tracker - self.lock_life() - .track_submission(submit_index, active_executions); - drop(pending_writes); - // This will schedule destruction of all resources that are no longer needed // by the user but used in the command stream, among other things. let fence_guard = RwLockWriteGuard::downgrade(fence); @@ -1516,6 +1493,99 @@ impl Queue { Ok(submit_index) } + /// Flush pending writes and any additional command encoders as a HAL submission. + /// + /// Advances `last_successful_submission_index` and registers the submission with the lifetime tracker. + fn submit_with_pending_writes( + &self, + pending_writes: &mut PendingWrites, + mut active_executions: Vec, + mut surface_textures: FastHashMap<*const Texture, Arc>, + fence: &mut dyn hal::DynFence, + submit_index: SubmissionIndex, + snatch_guard: &SnatchGuard, + ) -> Result<(), DeviceError> { + let mut used_surface_textures = track::TextureUsageScope::default(); + used_surface_textures.set_size(self.device.tracker_indices.textures.size()); + for texture in pending_writes.dst_textures.values() { + match texture.try_inner(snatch_guard) { + Ok(TextureInner::Native { .. }) => {} + Ok(TextureInner::Surface { .. }) => { + // Compare the Arcs by pointer as Textures don't implement Eq + surface_textures.insert(Arc::as_ptr(texture), texture.clone()); + + unsafe { + used_surface_textures + .merge_single(texture, None, wgt::TextureUses::PRESENT) + .unwrap() + }; + } + // The texture must not have been destroyed when its usage here was + // encoded. If it was destroyed after that, then it was transferred + // to `pending_writes.temp_resources` at the time of destruction, so + // we are still okay to use it. + Err(DestroyedResourceError(_)) => {} + } + } + + if !used_surface_textures.is_empty() { + let mut trackers = self.device.trackers.lock(); + + let texture_barriers = trackers + .textures + .set_from_usage_scope_and_drain_transitions(&used_surface_textures, snatch_guard) + .collect::>(); + unsafe { + pending_writes + .command_encoder + .transition_textures(&texture_barriers); + }; + } + + match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) { + Ok(Some(pending_execution)) => { + active_executions.insert(0, pending_execution); + } + Ok(None) => {} + Err(e) => return Err(e), + } + let hal_command_buffers = active_executions + .iter() + .flat_map(|e| e.inner.list.iter().map(|b| b.as_ref())) + .collect::>(); + + { + let mut submit_surface_textures = + SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(surface_textures.len()); + for texture in surface_textures.values() { + let raw = match texture.inner.get(snatch_guard) { + Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), + _ => unreachable!(), + }; + submit_surface_textures.push(raw); + } + + unsafe { + self.raw().submit( + &hal_command_buffers, + &submit_surface_textures, + (fence, submit_index), + ) + } + .map_err(|e| self.device.handle_hal_error(e))?; + + // Advance the successful submission index. + self.device + .last_successful_submission_index + .fetch_max(submit_index, Ordering::SeqCst); + } + // this will register the new submission to the life time tracker + self.lock_life() + .track_submission(submit_index, active_executions); + + Ok(()) + } + pub fn get_timestamp_period(&self) -> f32 { unsafe { self.raw().get_timestamp_period() } } diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 86e7f453f18..e010e687fad 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -9,18 +9,18 @@ When this texture is presented, we remove it from the device tracker as well as extract it from the hub. !*/ -use alloc::{sync::Arc, vec::Vec}; +use alloc::{boxed::Box, sync::Arc, vec::Vec}; use core::mem::ManuallyDrop; #[cfg(feature = "trace")] use crate::device::trace::{Action, IntoTrace}; use crate::{ conv, - device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError}, + device::{queue::Queue, Device, DeviceError, MissingDownlevelFlags, WaitIdleError}, global::Global, hal_label, id, instance::Surface, - resource, + resource::{self, Labeled}, }; use thiserror::Error; @@ -49,6 +49,8 @@ pub enum SurfaceError { Device(#[from] DeviceError), #[error("Surface image is already acquired")] AlreadyAcquired, + #[error("No surface image is currently acquired to present")] + NothingToPresent, #[error("Texture has been destroyed")] TextureDestroyed, } @@ -60,6 +62,7 @@ impl WebGpuError for SurfaceError { Self::Invalid | Self::NotConfigured | Self::AlreadyAcquired + | Self::NothingToPresent | Self::TextureDestroyed => ErrorType::Validation, } } @@ -276,21 +279,60 @@ impl Surface { pub fn present(&self) -> Result { profiling::scope!("Surface::present"); - let mut presentation = self.presentation.lock(); - let present = match presentation.as_mut() { + let presentation = self.presentation.lock(); + let present = match presentation.as_ref() { Some(present) => present, None => return Err(SurfaceError::NotConfigured), }; - let device = &present.device; + present.device.check_is_valid()?; + let queue = present + .device + .get_queue() + .ok_or(SurfaceError::Device(DeviceError::Lost))?; + drop(presentation); - device.check_is_valid()?; - let queue = device.get_queue().unwrap(); + queue.present(self) + } +} - let texture = present - .acquired_texture - .take() - .ok_or(SurfaceError::AlreadyAcquired)?; +impl Queue { + pub fn present(&self, surface: &Surface) -> Result { + profiling::scope!("Queue::present"); + + let texture = { + let mut presentation = surface.presentation.lock(); + let present = match presentation.as_mut() { + Some(present) => present, + None => return Err(SurfaceError::NotConfigured), + }; + + let device = &self.device; + + // Check the surface is configured for this device. + if !Arc::ptr_eq(&present.device, device) { + return Err(SurfaceError::Device(DeviceError::DeviceMismatch(Box::new( + crate::device::DeviceMismatch { + res: self.error_ident(), + res_device: device.error_ident(), + target: None, + target_device: present.device.error_ident(), + }, + )))); + } + + present + .acquired_texture + .take() + .ok_or(SurfaceError::NothingToPresent)? + }; + + // If the texture was never rendered to, clear it and transition to + // PRESENT state before presenting. + // Fixes + self.prepare_surface_texture_for_present(&texture)?; + + let device = &self.device; let mut exclusive_snatch_guard = device.snatchable_lock.write(); let inner = texture.inner.snatch(&mut exclusive_snatch_guard); @@ -299,8 +341,8 @@ impl Surface { let result = match inner { None => return Err(SurfaceError::TextureDestroyed), Some(resource::TextureInner::Surface { raw }) => { - let raw_surface = self.raw(device.backend()).unwrap(); - let raw_queue = queue.raw(); + let raw_surface = surface.raw(device.backend()).unwrap(); + let raw_queue = self.raw(); let _fence_lock = device.fence.write(); unsafe { raw_queue.present(raw_surface, raw) } } @@ -324,7 +366,9 @@ impl Surface { }, } } +} +impl Surface { pub fn discard(&self) -> Result<(), SurfaceError> { profiling::scope!("Surface::discard"); @@ -341,7 +385,7 @@ impl Surface { let texture = present .acquired_texture .take() - .ok_or(SurfaceError::AlreadyAcquired)?; + .ok_or(SurfaceError::NothingToPresent)?; let mut exclusive_snatch_guard = device.snatchable_lock.write(); let inner = texture.inner.snatch(&mut exclusive_snatch_guard); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 0117b7c2d96..e21e303c1aa 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -1,5 +1,5 @@ use alloc::{string::String, sync::Arc, vec::Vec}; -use core::ptr; +use core::{ptr, sync::atomic::AtomicU64}; use std::thread; use parking_lot::Mutex; @@ -1073,11 +1073,21 @@ impl crate::Adapter for super::Adapter { self.compiler_container.clone(), self.options.clone(), )?; + let idle_fence: Direct3D12::ID3D12Fence = unsafe { + self.device + .CreateFence(0, Direct3D12::D3D12_FENCE_FLAG_NONE) + } + .into_device_result("Queue idle fence creation")?; + let idle_event = super::Event::create(false, false)?; + Ok(crate::OpenDevice { device, queue: super::Queue { raw: queue, temp_lists: Mutex::new(Vec::new()), + idle_fence, + idle_event, + idle_fence_value: AtomicU64::new(0), }, }) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 2afefbe4a6f..b2ae383bf77 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -88,7 +88,7 @@ mod types; mod view; use alloc::{borrow::ToOwned as _, string::String, sync::Arc, vec::Vec}; -use core::{ffi, fmt, mem, ops::Deref}; +use core::{ffi, fmt, mem, ops::Deref, sync::atomic::AtomicU64}; use arrayvec::ArrayVec; use hashbrown::HashMap; @@ -788,6 +788,9 @@ unsafe impl Sync for Device {} pub struct Queue { raw: Direct3D12::ID3D12CommandQueue, temp_lists: Mutex>>, + idle_fence: Direct3D12::ID3D12Fence, + idle_event: Event, + idle_fence_value: AtomicU64, } impl Queue { @@ -1660,6 +1663,22 @@ impl crate::Queue for Queue { let frequency = unsafe { self.raw.GetTimestampFrequency() }.expect("GetTimestampFrequency"); (1_000_000_000.0 / frequency as f64) as f32 } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + let value = self + .idle_fence_value + .fetch_add(1, core::sync::atomic::Ordering::Relaxed) + + 1; + unsafe { self.raw.Signal(&self.idle_fence, value) } + .into_device_result("Signal idle fence")?; + unsafe { + self.idle_fence + .SetEventOnCompletion(value, self.idle_event.0) + } + .into_device_result("SetEventOnCompletion")?; + unsafe { Threading::WaitForSingleObject(self.idle_event.0, Threading::INFINITE) }; + Ok(()) + } } #[derive(Debug)] pub struct DxilPassthroughShader { diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs index 2af6d1f9c77..04744a9b4fa 100644 --- a/wgpu-hal/src/dynamic/queue.rs +++ b/wgpu-hal/src/dynamic/queue.rs @@ -20,6 +20,7 @@ pub trait DynQueue: DynResource { texture: Box, ) -> Result<(), SurfaceError>; unsafe fn get_timestamp_period(&self) -> f32; + unsafe fn wait_for_idle(&self) -> Result<(), DeviceError>; } impl DynQueue for Q { @@ -53,4 +54,8 @@ impl DynQueue for Q { unsafe fn get_timestamp_period(&self) -> f32 { unsafe { Q::get_timestamp_period(self) } } + + unsafe fn wait_for_idle(&self) -> Result<(), DeviceError> { + unsafe { Q::wait_for_idle(self) } + } } diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index a67af60044c..09ce9da9cac 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1963,6 +1963,12 @@ impl crate::Queue for super::Queue { unsafe fn get_timestamp_period(&self) -> f32 { 1.0 } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + let gl = &self.shared.context.lock(); + unsafe { gl.finish() }; + Ok(()) + } } #[cfg(send_sync)] diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 33991f8bd5d..d8dc3a7045c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1245,11 +1245,45 @@ pub trait Queue: WasmNotSendSync { surface_textures: &[&::SurfaceTexture], signal_fence: (&mut ::Fence, FenceValue), ) -> Result<(), DeviceError>; + /// Present a surface texture to the screen. + /// + /// This consumes the surface texture, returning it to the swapchain. + /// + /// # Safety + /// + /// - `texture` must have been acquired from `surface` via + /// [`Surface::acquire_texture`] and not yet presented or discarded. + /// - `surface` must be configured for use with the [`Device`][d] associated + /// with this [`Queue`]. + /// - `texture` must be in the "present" state. Either: + /// - It was passed in [`submit`][s]'s `surface_textures` argument + /// (which transitions it to the present state), or + /// - The caller has otherwise transitioned it (e.g. via a clear + + /// barrier to `PRESENT` for textures that were never rendered to). + /// - Any command buffers that write to `texture` must have been submitted + /// via [`submit`][s] before this call. The submissions do not need to + /// have completed on the GPU; platform-level synchronization handles the + /// ordering between rendering and display. + /// - Must be externally synchronized with all other queue operations + /// ([`submit`][s], [`present`][Queue::present], + /// [`wait_for_idle`][Queue::wait_for_idle]) on the same queue. + /// + /// [d]: Api::Device + /// [s]: Queue::submit unsafe fn present( &self, surface: &::Surface, texture: ::SurfaceTexture, ) -> Result<(), SurfaceError>; + /// Block until all previously submitted work on this queue has completed, + /// including any pending presentations. + /// + /// # Safety + /// + /// - Must be externally synchronized with all other queue operations + /// ([`submit`][Queue::submit], [`present`][Queue::present], + /// [`wait_for_idle`][Queue::wait_for_idle]) on the same queue. + unsafe fn wait_for_idle(&self) -> Result<(), DeviceError>; unsafe fn get_timestamp_period(&self) -> f32; } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 13974241180..c482b9a2a48 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -614,6 +614,16 @@ impl crate::Queue for Queue { unsafe fn get_timestamp_period(&self) -> f32 { self.timestamp_period } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + autoreleasepool(|_| { + let command_buffer = self.shared.raw.commandBuffer().unwrap(); + command_buffer.setLabel(Some(ns_string!("(wgpu internal) wait_for_idle"))); + command_buffer.commit(); + command_buffer.waitUntilCompleted(); + }); + Ok(()) + } } #[derive(Debug)] diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index 703e0b58d9c..563b1b27de9 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -286,6 +286,10 @@ impl crate::Queue for Context { unsafe fn get_timestamp_period(&self) -> f32 { 1.0 } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + Ok(()) + } } impl crate::Device for Context { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 43915a7a0de..f90ad45d4b4 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -1365,6 +1365,11 @@ impl crate::Queue for Queue { unsafe fn get_timestamp_period(&self) -> f32 { self.device.timestamp_period } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + unsafe { self.device.raw.queue_wait_idle(self.raw) } + .map_err(map_host_device_oom_and_lost_err) + } } impl Queue { diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index aa5a0f24eda..2d0fbd70ce9 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -343,6 +343,21 @@ impl Queue { unsafe { queue.context.queue_as_hal::(queue) } } + /// Schedule a surface texture to be presented on the owning surface. + /// + /// Should be called after any work on the texture is submitted via [`Queue::submit`]. + /// If no work was submitted, the texture will be cleared automatically before presenting. + /// + /// # Platform dependent behavior + /// + /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface + /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter + /// or synchronize other double buffered state, then these operations should be done before the call to `present`. + pub fn present(&self, mut surface_texture: SurfaceTexture) { + surface_texture.presented = true; + self.inner.present(&surface_texture.detail); + } + /// Compact a BLAS, it must have had [`Blas::prepare_compaction_async`] called on it and had the /// callback provided called. /// diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs index 45ffd91e5d9..11f6f64564e 100644 --- a/wgpu/src/api/surface.rs +++ b/wgpu/src/api/surface.rs @@ -108,9 +108,8 @@ impl Surface<'_> { /// Returns the next texture to be presented by the surface for drawing. /// - /// In order to present the [`SurfaceTexture`] returned by this method, - /// first a [`Queue::submit`] needs to be done with some work rendering to this texture. - /// Then [`SurfaceTexture::present`] needs to be called. + /// After rendering to the returned [`SurfaceTexture`], submit work via [`Queue::submit`] + /// and then call [`Queue::present`] to display it. /// /// If a [`SurfaceTexture`] referencing this surface is alive when [`Surface::configure()`] /// is called, the configure call will panic. diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs index f08c462127c..5126c10bb85 100644 --- a/wgpu/src/api/surface_texture.rs +++ b/wgpu/src/api/surface_texture.rs @@ -19,20 +19,6 @@ static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync); crate::cmp::impl_eq_ord_hash_proxy!(SurfaceTexture => .texture.inner); impl SurfaceTexture { - /// Schedule this texture to be presented on the owning surface. - /// - /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`]. - /// - /// # Platform dependent behavior - /// - /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface - /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter - /// or synchronize other double buffered state, then these operations should be done before the call to `present`. - pub fn present(mut self) { - self.presented = true; - self.detail.present(); - } - #[cfg(custom)] /// Returns custom implementation of SurfaceTexture (if custom backend and is internally T) pub fn as_custom(&self) -> Option<&T> { diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 2bb0913eb72..8512966c67c 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -2780,6 +2780,10 @@ impl dispatch::QueueInterface for WebQueue { ) -> (Option, dispatch::DispatchBlas) { unimplemented!("Raytracing not implemented for web") } + + fn present(&self, _detail: &dispatch::DispatchSurfaceOutputDetail) { + // Swapchain is presented automatically on the web. + } } impl Drop for WebQueue { fn drop(&mut self) { @@ -4006,10 +4010,6 @@ impl Drop for WebSurface { } impl dispatch::SurfaceOutputDetailInterface for WebSurfaceOutputDetail { - fn present(&self) { - // Swapchain is presented automatically on the web. - } - fn texture_discard(&self) { // Can't really discard the texture on the web. } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 5cdad618278..04f116805e4 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -2142,6 +2142,17 @@ impl dispatch::QueueInterface for CoreQueue { .into(), ) } + + fn present(&self, detail: &dispatch::DispatchSurfaceOutputDetail) { + let detail = detail.as_core(); + match self.context.0.surface_present(detail.surface_id) { + Ok(_status) => (), + Err(err) => { + self.context + .handle_error_nolabel(&self.error_sink, err, "Queue::present"); + } + } + } } impl Drop for CoreQueue { @@ -3991,24 +4002,15 @@ impl Drop for CoreSurface { } impl dispatch::SurfaceOutputDetailInterface for CoreSurfaceOutputDetail { - fn present(&self) { - match self.context.0.surface_present(self.surface_id) { + fn texture_discard(&self) { + match self.context.0.surface_texture_discard(self.surface_id) { Ok(_status) => (), Err(err) => { self.context - .handle_error_nolabel(&self.error_sink, err, "Surface::present"); + .handle_error_nolabel(&self.error_sink, err, "Surface::discard_texture") } } } - - fn texture_discard(&self) { - match self.context.0.surface_texture_discard(self.surface_id) { - Ok(_status) => (), - Err(err) => self - .context - .handle_error_fatal(err, "Surface::discard_texture"), - } - } } impl Drop for CoreSurfaceOutputDetail { fn drop(&mut self) { diff --git a/wgpu/src/dispatch.rs b/wgpu/src/dispatch.rs index 7df5b8c9882..099ad81b5a1 100644 --- a/wgpu/src/dispatch.rs +++ b/wgpu/src/dispatch.rs @@ -261,6 +261,8 @@ pub trait QueueInterface: CommonTraits { fn on_submitted_work_done(&self, callback: BoxSubmittedWorkDoneCallback); fn compact_blas(&self, blas: &DispatchBlas) -> (Option, DispatchBlas); + + fn present(&self, detail: &DispatchSurfaceOutputDetail); } pub trait ShaderModuleInterface: CommonTraits { @@ -588,7 +590,6 @@ pub trait SurfaceInterface: CommonTraits { } pub trait SurfaceOutputDetailInterface: CommonTraits { - fn present(&self); fn texture_discard(&self); }