diff --git a/CHANGELOG.md b/CHANGELOG.md index 86481abe968..0133f63ff73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ Bottom level categories: #### General - `Features::CLIP_DISTANCE`, `naga::Capabilities::CLIP_DISTANCE`, and `naga::BuiltIn::ClipDistance` have been renamed to `CLIP_DISTANCES` and `ClipDistances` (viz., pluralized) as appropriate, to match the WebGPU spec. By @ErichDonGubler in [#9267](https://github.com/gfx-rs/wgpu/pull/9267). +- `Surface::present` has been moved to `Queue::present`. By @inner-daemons in [#9222](https://github.com/gfx-rs/wgpu/pull/9222). #### Validation @@ -67,6 +68,7 @@ Bottom level categories: #### General - Fix limit comparison logic for `max_inter_stage_shader_variables` By @ErichDonGubler in [9264](https://github.com/gfx-rs/wgpu/pull/9264). +- Fix several bugs related to surface presentation, including presenting without rendering and dropping queue after presenting. By @inner-daemons in [#9222](https://github.com/gfx-rs/wgpu/pull/9222). #### naga diff --git a/Cargo.lock b/Cargo.lock index 69422820a7b..f5532a9ba1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4625,6 +4625,16 @@ dependencies = [ "winit", ] +[[package]] +name = "wgpu-bug-repro-02-present-bugs" +version = "0.0.0" +dependencies = [ + "env_logger", + "pollster", + "wgpu", + "winit", +] + [[package]] name = "wgpu-core" version = "29.0.0" diff --git a/examples/bug-repro/01_texture_atomic_bug/Cargo.toml b/examples/bug-repro/01_texture_atomic_bug/Cargo.toml index d06c80a5848..13d847a6e3d 100644 --- a/examples/bug-repro/01_texture_atomic_bug/Cargo.toml +++ b/examples/bug-repro/01_texture_atomic_bug/Cargo.toml @@ -7,5 +7,5 @@ publish = false [dependencies] env_logger = "0.11" pollster = "0.4" -wgpu = "29.0.0" +wgpu.workspace = true winit = "0.30.8" diff --git a/examples/bug-repro/01_texture_atomic_bug/src/main.rs b/examples/bug-repro/01_texture_atomic_bug/src/main.rs index 86ebfc7cf21..1888e2eb015 100644 --- a/examples/bug-repro/01_texture_atomic_bug/src/main.rs +++ b/examples/bug-repro/01_texture_atomic_bug/src/main.rs @@ -335,6 +335,6 @@ impl State { } self.queue.submit([enc.finish()]); - frame.present(); + self.queue.present(frame); } } diff --git a/examples/bug-repro/02_present_bugs/Cargo.toml b/examples/bug-repro/02_present_bugs/Cargo.toml new file mode 100644 index 00000000000..72c9c50259c --- /dev/null +++ b/examples/bug-repro/02_present_bugs/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wgpu-bug-repro-02-present-bugs" +edition = "2021" +rust-version = "1.87" +publish = false + +[dependencies] +env_logger = "0.11" +pollster = "0.4" +wgpu.workspace = true +winit = "0.30.8" diff --git a/examples/bug-repro/02_present_bugs/src/main.rs b/examples/bug-repro/02_present_bugs/src/main.rs new file mode 100644 index 00000000000..191ce86ce5d --- /dev/null +++ b/examples/bug-repro/02_present_bugs/src/main.rs @@ -0,0 +1,145 @@ +//! Repro for various issues with queue presentation +//! +//! The 2 current bugs being tested are presentation after no usage of surface texture +//! and queue destruction immediately after present + +use std::sync::Arc; + +use winit::application::ApplicationHandler; +use winit::event::WindowEvent; +use winit::event_loop::{ActiveEventLoop, EventLoop}; +use winit::window::{Window, WindowId}; + +fn main() { + env_logger::init(); + let event_loop = EventLoop::new().unwrap(); + event_loop.set_control_flow(winit::event_loop::ControlFlow::Poll); + let mut app = App::default(); + event_loop.run_app(&mut app).unwrap(); +} + +#[derive(Default)] +struct App { + state: Option, +} + +struct State { + window: Arc, + instance: wgpu::Instance, + device: wgpu::Device, + queue: Option, + surface: wgpu::Surface<'static>, + surface_config: wgpu::SurfaceConfiguration, +} + +impl ApplicationHandler for App { + fn resumed(&mut self, event_loop: &ActiveEventLoop) { + if self.state.is_some() { + return; + } + let window = Arc::new( + event_loop + .create_window(Window::default_attributes().with_title("Presentation bugs")) + .unwrap(), + ); + self.state = Some(State::new(window)); + } + + fn window_event( + &mut self, + event_loop: &ActiveEventLoop, + _window_id: WindowId, + event: WindowEvent, + ) { + let Some(state) = &mut self.state else { return }; + match event { + WindowEvent::CloseRequested => event_loop.exit(), + WindowEvent::Resized(size) if size.width > 0 && size.height > 0 => { + state.surface_config.width = size.width; + state.surface_config.height = size.height; + state + .surface + .configure(&state.device, &state.surface_config); + } + WindowEvent::RedrawRequested => { + let frame = match state.surface.get_current_texture() { + wgpu::CurrentSurfaceTexture::Success(f) => f, + wgpu::CurrentSurfaceTexture::Suboptimal(_) + | wgpu::CurrentSurfaceTexture::Outdated => { + state + .surface + .configure(&state.device, &state.surface_config); + return; + } + wgpu::CurrentSurfaceTexture::Lost => { + state.surface = + state.instance.create_surface(state.window.clone()).unwrap(); + state + .surface + .configure(&state.device, &state.surface_config); + return; + } + _ => return, + }; + let Some(queue) = state.queue.take() else { + return; + }; + // Immediately present the surface texture (with nothing on it) and then drop the queue, which should cause a full wait. + queue.present(frame); + event_loop.exit(); + } + _ => {} + } + } + + fn about_to_wait(&mut self, _event_loop: &ActiveEventLoop) { + if let Some(state) = &self.state { + state.window.request_redraw(); + } + } +} + +impl State { + fn new(window: Arc) -> Self { + let size = window.inner_size(); + let width = size.width.max(1); + let height = size.height.max(1); + + let mut instance_desc = wgpu::InstanceDescriptor::new_without_display_handle_from_env(); + instance_desc.flags |= wgpu::InstanceFlags::advanced_debugging(); + let instance = wgpu::Instance::new(instance_desc); + let surface = instance.create_surface(window.clone()).unwrap(); + let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { + compatible_surface: Some(&surface), + ..Default::default() + })) + .expect("No adapter"); + + println!("Adapter: {:?}", adapter.get_info().name); + + let (device, queue) = + pollster::block_on(adapter.request_device(&Default::default())).unwrap(); + + let surface_format = surface.get_capabilities(&adapter).formats[0]; + let surface_config = wgpu::SurfaceConfiguration { + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + format: surface_format, + width, + height, + present_mode: wgpu::PresentMode::AutoVsync, + alpha_mode: wgpu::CompositeAlphaMode::Auto, + view_formats: vec![], + desired_maximum_frame_latency: 2, + }; + surface.configure(&device, &surface_config); + + State { + window, + instance, + device, + queue: Some(queue), + surface, + surface_config, + } + } +} diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index 4c57be910e9..4a229f6ba58 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -548,7 +548,7 @@ impl ApplicationHandler for App { if let Some(window) = &self.window { window.pre_present_notify(); } - frame.present(); + context.queue.present(frame); } if let Some(window) = &self.window { diff --git a/examples/features/src/hello_triangle/mod.rs b/examples/features/src/hello_triangle/mod.rs index 962da3ff994..1907e6ed198 100644 --- a/examples/features/src/hello_triangle/mod.rs +++ b/examples/features/src/hello_triangle/mod.rs @@ -289,7 +289,7 @@ impl ApplicationHandler for App { if let Some(window) = &self.window { window.pre_present_notify(); } - frame.present(); + wgpu_state.queue.present(frame); } WindowEvent::Occluded(is_occluded) => { if !is_occluded { diff --git a/examples/features/src/hello_windows/mod.rs b/examples/features/src/hello_windows/mod.rs index e1810bc154d..6ac054e7d64 100644 --- a/examples/features/src/hello_windows/mod.rs +++ b/examples/features/src/hello_windows/mod.rs @@ -249,7 +249,7 @@ impl ApplicationHandler for App { queue.submit(Some(encoder.finish())); viewport.desc.window.pre_present_notify(); - frame.present(); + queue.present(frame); } } WindowEvent::Occluded(is_occluded) => { diff --git a/examples/features/src/uniform_values/mod.rs b/examples/features/src/uniform_values/mod.rs index c1392f1e64c..d155f0ad65f 100644 --- a/examples/features/src/uniform_values/mod.rs +++ b/examples/features/src/uniform_values/mod.rs @@ -470,7 +470,7 @@ impl ApplicationHandler for App { if let Some(window) = &self.window { window.pre_present_notify(); } - frame.present(); + wgpu_ctx.queue.present(frame); } WindowEvent::Occluded(is_occluded) => { if !is_occluded { diff --git a/examples/standalone/02_hello_window/src/main.rs b/examples/standalone/02_hello_window/src/main.rs index 7da68e6e2f9..9f102e82aa6 100644 --- a/examples/standalone/02_hello_window/src/main.rs +++ b/examples/standalone/02_hello_window/src/main.rs @@ -136,7 +136,7 @@ impl State { // Submit the command in the queue to execute self.queue.submit([encoder.finish()]); self.window.pre_present_notify(); - surface_texture.present(); + self.queue.present(surface_texture); } } diff --git a/examples/standalone/custom_backend/src/custom.rs b/examples/standalone/custom_backend/src/custom.rs index 676158e83fb..a119933d451 100644 --- a/examples/standalone/custom_backend/src/custom.rs +++ b/examples/standalone/custom_backend/src/custom.rs @@ -380,6 +380,10 @@ impl QueueInterface for CustomQueue { fn compact_blas(&self, _blas: &DispatchBlas) -> (Option, DispatchBlas) { unimplemented!() } + + fn present(&self, _detail: &wgpu::custom::DispatchSurfaceOutputDetail) -> u64 { + unimplemented!() + } } #[derive(Debug)] diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs index ed7a4ffc861..0892844d522 100644 --- a/player/src/bin/play.rs +++ b/player/src/bin/play.rs @@ -201,10 +201,10 @@ fn main() { ); self.player.get_surface_texture(id, surface); } - Some(trace::Action::Present(_id)) => { + Some(trace::Action::Present(_id, _surface_id)) => { self.frame_count += 1; log::debug!("Presenting frame {}", self.frame_count); - surface.present().unwrap(); + self.queue.present(surface).unwrap(); break; } Some(trace::Action::DiscardSurfaceTexture(_id)) => { diff --git a/player/src/lib.rs b/player/src/lib.rs index 7e2100aa6c4..f73e976e7e7 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -125,7 +125,7 @@ impl Player { panic!("Unexpected Action::Init: has to be the first action only") } Action::ConfigureSurface { .. } - | Action::Present(_) + | Action::Present(..) | Action::DiscardSurfaceTexture(_) => { panic!("Unexpected Surface action: winit feature is not enabled") } diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index aef58019f8a..9ab45f5c5de 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -191,6 +191,20 @@ pub(crate) struct LifetimeTracker { /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them /// here until the next time the device is maintained. work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, + + /// Surface textures waiting for a presentation to complete. + /// + /// Each entry holds the [`SubmissionIndex`] that was active when + /// [`present`][LifetimeTracker::track_present] was called, together with + /// the texture whose lifetime must extend until the presentation is done. + /// A presentation is considered complete once a real queue submission with + /// index **strictly greater** than the recorded index has finished (i.e. + /// something was submitted *after* the present and that work completed). + /// + /// In the absence of any subsequent submission the queue drop path calls + /// [`drain_pending_presents`][LifetimeTracker::drain_pending_presents] + /// after a full [`wait_for_idle`][hal::Queue::wait_for_idle]. + pending_presents: Vec<(SubmissionIndex, Arc)>, } impl LifetimeTracker { @@ -200,12 +214,45 @@ impl LifetimeTracker { ready_to_map: Vec::new(), ready_to_compact: Vec::new(), work_done_closures: SmallVec::new(), + pending_presents: Vec::new(), } } - /// Return true if there are no queue submissions still in flight. + /// Return true if there are no queue submissions still in flight and no + /// presentations are waiting to be confirmed complete. pub fn queue_empty(&self) -> bool { - self.active.is_empty() + self.active.is_empty() && self.pending_presents.is_empty() + } + + /// Return the submission index of the oldest pending presentation, if any. + /// + /// Used by [`Queue::wait_for_present`] to decide whether to wait on a + /// fence or call [`wait_for_idle`][hal::Queue::wait_for_idle]. + /// + /// [`Queue::wait_for_present`]: super::queue::Queue::wait_for_present + pub fn oldest_pending_present_index(&self) -> Option { + self.pending_presents.first().map(|(idx, _)| *idx) + } + + /// Keep `texture` alive until a submission after `index` completes. + /// + /// Call this immediately after a successful HAL present, passing the + /// `active_submission_index` that was current at the time of the present. + /// The texture is released by [`triage_submissions`][Self::triage_submissions] + /// once `last_done > index`, or explicitly via + /// [`drain_pending_presents`][Self::drain_pending_presents]. + pub fn track_present(&mut self, index: SubmissionIndex, texture: Arc) { + self.pending_presents.push((index, texture)); + } + + /// Release all pending present textures unconditionally. + /// + /// This must only be called after the queue is known to be fully idle + /// (e.g. after [`wait_for_idle`][hal::Queue::wait_for_idle]), since there + /// may not have been a subsequent submission to trigger normal release via + /// [`triage_submissions`][Self::triage_submissions]. + pub fn drain_pending_presents(&mut self) { + self.pending_presents.clear(); } /// Start tracking resources associated with a new queue submission. @@ -321,6 +368,13 @@ impl LifetimeTracker { } work_done_closures.extend(a.work_done_closures); } + + // Release surface textures whose presentations are now complete. + // A presentation is complete once a submission with index strictly + // greater than the recorded present index has finished. + self.pending_presents + .retain(|(present_index, _)| last_done <= *present_index); + work_done_closures } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 30008354fe2..1ddca513123 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -19,10 +19,10 @@ use crate::device::trace::{Action, IntoTrace}; use crate::{ api_log, command::{ - extract_texture_selector, validate_linear_texture_data, validate_texture_buffer_copy, - validate_texture_copy_dst_format, validate_texture_copy_range, ClearError, - CommandAllocator, CommandBuffer, CommandEncoder, CommandEncoderError, CopySide, - TransferError, + clear_texture, extract_texture_selector, validate_linear_texture_data, + validate_texture_buffer_copy, validate_texture_copy_dst_format, + validate_texture_copy_range, ClearError, CommandAllocator, CommandBuffer, CommandEncoder, + CommandEncoderError, CopySide, TransferError, }, device::{DeviceError, WaitIdleError}, get_lowest_common_denom, @@ -142,6 +142,40 @@ impl Queue { queue_empty, ) } + + /// Wait for a presentation with the given `present_index` to complete. + /// + /// A presentation is complete once a real queue submission with an index + /// strictly greater than `present_index` has finished executing on the GPU. + /// If such a submission has already been issued, this waits for its fence. + /// Otherwise — no subsequent submission has been made — the whole queue is + /// drained via [`wait_for_idle`][hal::Queue::wait_for_idle]. + pub(crate) fn wait_for_present( + &self, + present_index: SubmissionIndex, + ) -> Result<(), DeviceError> { + let last_successful = self + .device + .last_successful_submission_index + .load(Ordering::Acquire); + + if last_successful > present_index { + // A submission made after the present has completed (or is in + // flight); wait for the fence to reach that index. + let fence = self.device.fence.read(); + unsafe { + self.device + .raw() + .wait(fence.as_ref(), last_successful, None) + .map(|_| ()) + .map_err(|e| self.device.handle_hal_error(e)) + } + } else { + // No submission has been made after the present yet; drain the + // entire queue so we know the present is complete. + unsafe { self.raw().wait_for_idle() }.map_err(|e| self.device.handle_hal_error(e)) + } + } } crate::impl_resource_type!(Queue); @@ -158,81 +192,28 @@ impl Drop for Queue { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); + // Wait for all GPU work — submissions *and* any pending presentations — + // to complete before releasing resources. + match unsafe { self.raw.wait_for_idle() } { + Ok(()) => {} + Err(hal::DeviceError::Lost) => { + self.device.handle_hal_error(hal::DeviceError::Lost); + } + Err(e) => { + panic!("Unexpected error while waiting for queue idle on drop: {e:?}"); + } + } + + // Drain pending present textures: the GPU is now idle so all + // presentations have completed regardless of whether a subsequent + // submission was ever made. + self.lock_life().drain_pending_presents(); + let last_successful_submission_index = self .device .last_successful_submission_index .load(Ordering::Acquire); - let fence = self.device.fence.read(); - - // Try waiting on the last submission using the following sequence of timeouts - let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200]; - - for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() { - let is_last_iter = i == timeouts_in_ms.len() - 1; - - api_log!( - "Waiting on last submission. try: {}/{}. timeout: {}ms", - i + 1, - timeouts_in_ms.len(), - timeout_ms - ); - - let wait_res = unsafe { - self.device.raw().wait( - fence.as_ref(), - last_successful_submission_index, - #[cfg(not(target_arch = "wasm32"))] - Some(core::time::Duration::from_millis(timeout_ms)), - #[cfg(target_arch = "wasm32")] - Some(core::time::Duration::ZERO), // WebKit and Chromium don't support a non-0 timeout - ) - }; - // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU). - match wait_res { - Ok(true) => break, - Ok(false) => { - // It's fine that we timed out on WebGL; GL objects can be deleted early as they - // will be kept around by the driver if GPU work hasn't finished. - // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other - // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands. - // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and - // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538. - #[cfg(target_arch = "wasm32")] - { - break; - } - #[cfg(not(target_arch = "wasm32"))] - { - if is_last_iter { - panic!( - "We timed out while waiting on the last successful submission to complete!" - ); - } - } - } - Err(e) => match e { - hal::DeviceError::OutOfMemory => { - if is_last_iter { - panic!( - "We ran into an OOM error while waiting on the last successful submission to complete!" - ); - } - } - hal::DeviceError::Lost => { - self.device.handle_hal_error(e); // will lose the device - break; - } - hal::DeviceError::Unexpected => { - panic!( - "We ran into an unexpected error while waiting on the last successful submission to complete!" - ); - } - }, - } - } - drop(fence); - let snatch_guard = self.device.snatchable_lock.read(); let (submission_closures, mapping_closures, blas_compact_ready_closures, queue_empty) = self.maintain(last_successful_submission_index, &snatch_guard); @@ -829,7 +810,7 @@ impl Queue { .collect::>>() { let mut trackers = self.device.trackers.lock(); - crate::command::clear_texture( + clear_texture( &dst, TextureInitRange { mip_range: destination.mip_level..(destination.mip_level + 1), @@ -1086,7 +1067,7 @@ impl Queue { .collect::>>() { let mut trackers = self.device.trackers.lock(); - crate::command::clear_texture( + clear_texture( &dst, TextureInitRange { mip_range: destination.mip_level..(destination.mip_level + 1), @@ -1186,6 +1167,145 @@ impl Queue { } } + /// If a surface texture was acquired but never submitted in a command buffer, + /// clear it and transition it to the `PRESENT` state so the HAL present call + /// succeeds. Both operations are skipped when the texture has already been + /// properly handled by a prior [`Queue::submit`] call. + pub(crate) fn prepare_surface_texture_for_present( + &self, + texture: &Arc, + ) -> Result<(), DeviceError> { + // Fast path: if every mip/layer has been initialized the texture was + // submitted (render passes initialized and transitioned it) — skip. + { + let status = texture.initialization_status.read(); + let is_uninitialized = status + .mips + .first() + .is_some_and(|mip| mip.check(0..1).is_some()); + if !is_uninitialized { + return Ok(()); + } + } + + let device = &self.device; + + // Hold a read snatch guard so we can safely access the texture's raw + // throughout this function. + let snatch_guard = device.snatchable_lock.read(); + + { + let mut pending_writes = self.pending_writes.lock(); + + { + // Step 1: Clear via render pass. This also records the + // UNINITIALIZED → COLOR_TARGET barrier in `encoder` and + // updates the device tracker to COLOR_TARGET. + let encoder = pending_writes.activate(); + let mut trackers = device.trackers.lock(); + clear_texture( + texture, + TextureInitRange { + mip_range: 0..1, + layer_range: 0..1, + }, + encoder, + &mut trackers.textures, + &device.alignments, + device.zero_buffer.as_ref(), + &snatch_guard, + device.instance_flags, + ) + .map_err(|e| match e { + ClearError::Device(e) => e, + _ => DeviceError::Lost, + })?; + // `encoder` borrow ends here; `trackers` dropped below. + + // Step 2: Transition COLOR_TARGET → PRESENT. Collect the + // pending transitions as plain values (no lifetime on the + // tracker) so we can drop `trackers` before recording them. + let pending_present: Vec> = trackers + .textures + .set_single( + texture, + texture.full_range.clone(), + wgt::TextureUses::PRESENT, + ) + .collect(); + + drop(trackers); + + let raw_texture = texture + .try_raw(&snatch_guard) + .map_err(|_| DeviceError::Lost)?; + let present_barriers: Vec> = + pending_present + .into_iter() + .map(|pt| pt.into_hal(raw_texture)) + .collect(); + + unsafe { + pending_writes + .command_encoder + .as_mut() + .transition_textures(&present_barriers); + } + // `present_barriers` dropped here, releasing the reference to + // `raw_texture` (and through it to `snatch_guard`). + } + + // Step 3: Mark the texture's content as initialized (it was just cleared). + texture.initialization_status.write().mips[0].drain(0..1); + + // Step 4: Finalize the pending-writes encoder. + let encoder_in_flight = + pending_writes.pre_submit(&device.command_allocator, device, self)?; + + // Step 5: Assign a fresh submission index for the mini-submit. + let mini_index = { + let mut indices = device.command_indices.write(); + indices.active_submission_index += 1; + indices.active_submission_index + }; + + // Step 6: Submit. Fence lock must be acquired after snatch lock. + let hal_cmd_bufs = encoder_in_flight + .as_ref() + .map(|e| e.inner.list.iter().map(|b| b.as_ref()).collect::>()) + .unwrap_or_default(); + + let raw_surface_tex = match texture.inner.get(&snatch_guard) { + Some(TextureInner::Surface { raw }) => raw.as_ref(), + _ => return Err(DeviceError::Lost), + }; + + { + let mut fence = device.fence.write(); + unsafe { + self.raw().submit( + &hal_cmd_bufs, + &[raw_surface_tex], + (fence.as_mut(), mini_index), + ) + } + .map_err(|e| device.handle_hal_error(e))?; + } + + device + .last_successful_submission_index + .store(mini_index, Ordering::Release); + + // Step 7: Track the submission so the command buffer is kept alive + // until the GPU finishes with it. + if let Some(encoder) = encoder_in_flight { + self.lock_life().track_submission(mini_index, vec![encoder]); + } + } + + Ok(()) + } + pub fn submit( &self, command_buffers: &[Arc], diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 73043f2e458..5aba6352f60 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -928,6 +928,28 @@ impl Device { // - When `queue` goes out of scope here, `Queue::drop` runs and tries to acquire the // snatch guard — but Thread A (this thread) still holds it, causing a deadlock. drop(snatch_guard); + + // If this was a wait request, resolve any pending presentations + // whose index is at or before the wait target. Only those + // presents are part of the operation being waited on; presents + // with a higher index (made *after* the target) must not cause + // an unintended stall. + // + // `wait_for_present` either waits for the next real fence signal + // (if a submission after the present already exists) or falls + // back to a full queue idle when no subsequent submission has + // been made. + if let Some(wait_target) = wait_submission_index { + if let Some(present_index) = queue.lock_life().oldest_pending_present_index() { + if present_index <= wait_target { + if let Err(e) = queue.wait_for_present(present_index) { + let hal_error: WaitIdleError = e.into(); + return (user_closures, Err(hal_error)); + } + queue_empty = queue.lock_life().queue_empty(); + } + } + } } else { drop(snatch_guard); }; diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 9a64fd216d2..c0f302d9522 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -152,7 +152,7 @@ pub enum Action<'a, R: ReferenceType> { id: R::Texture, parent: R::Surface, }, - Present(R::Surface), + Present(crate::SubmissionIndex, R::Surface), DiscardSurfaceTexture(R::Surface), CreateBindGroupLayout( PointerId, diff --git a/wgpu-core/src/device/trace/record.rs b/wgpu-core/src/device/trace/record.rs index 6e8e686fea7..f72b99ad865 100644 --- a/wgpu-core/src/device/trace/record.rs +++ b/wgpu-core/src/device/trace/record.rs @@ -866,7 +866,7 @@ fn action_to_owned(action: Action<'_, PointerReferences>) -> Action<'static, Poi A::DestroyExternalTexture(external_texture) => A::DestroyExternalTexture(external_texture), A::DestroySampler(sampler) => A::DestroySampler(sampler), A::GetSurfaceTexture { id, parent } => A::GetSurfaceTexture { id, parent }, - A::Present(surface) => A::Present(surface), + A::Present(a, b) => A::Present(a, b), A::DiscardSurfaceTexture(surface) => A::DiscardSurfaceTexture(surface), A::DestroyBindGroupLayout(layout) => A::DestroyBindGroupLayout(layout), A::GetRenderPipelineBindGroupLayout { diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 524cc1f6dec..640882ec546 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -9,18 +9,21 @@ When this texture is presented, we remove it from the device tracker as well as extract it from the hub. !*/ -use alloc::{sync::Arc, vec::Vec}; -use core::mem::ManuallyDrop; +use alloc::{boxed::Box, sync::Arc, vec::Vec}; +use core::{mem::ManuallyDrop, sync::atomic::Ordering}; #[cfg(feature = "trace")] use crate::device::trace::{Action, IntoTrace}; use crate::{ conv, - device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError}, + device::{ + queue::Queue, Device, DeviceError, DeviceMismatch, MissingDownlevelFlags, WaitIdleError, + }, global::Global, hal_label, id, instance::Surface, - resource, + resource::{self, Labeled}, + SubmissionIndex, }; use thiserror::Error; @@ -151,6 +154,135 @@ pub struct SurfaceOutput { pub texture: Option, } +impl Queue { + /// Present a surface texture and return the submission index that was + /// active at the time of the present. + /// + /// The returned [`SubmissionIndex`] can be passed to + /// [`Queue::wait_for_present`] to wait for the presentation to complete. + /// The surface texture is kept alive internally until a subsequent queue + /// submission (with index strictly greater than the returned value) + /// finishes, or until the queue is dropped. + pub fn present(&self, surface: &Surface) -> Result<(Status, SubmissionIndex), SurfaceError> { + profiling::scope!("Queue::present"); + + self.device.check_is_valid()?; + + let mut presentation_lock = surface.presentation.lock(); + let presentation = presentation_lock.as_mut(); + + if let Some(presentation) = presentation.as_ref() { + let same_device = Arc::ptr_eq(&presentation.device, &self.device); + + if !same_device { + return Err(SurfaceError::Device(DeviceError::DeviceMismatch(Box::new( + DeviceMismatch { + res: self.error_ident(), + res_device: self.device.error_ident(), + target: None, + target_device: presentation.device.error_ident(), + }, + )))); + } + } + + let present = match presentation { + Some(present) => present, + None => return Err(SurfaceError::NotConfigured), + }; + + let texture = present + .acquired_texture + .take() + .ok_or(SurfaceError::AlreadyAcquired)?; + + let device = &self.device; + + // If the texture was acquired but never rendered to / submitted, clear + // it and transition it to the PRESENT state before presenting. + self.prepare_surface_texture_for_present(&texture)?; + + let mut exclusive_snatch_guard = device.snatchable_lock.write(); + let inner = texture.inner.snatch(&mut exclusive_snatch_guard); + drop(exclusive_snatch_guard); + + let result = match inner { + None => return Err(SurfaceError::TextureDestroyed), + Some(resource::TextureInner::Surface { raw }) => { + let raw_surface = surface.raw(device.backend()).unwrap(); + let _fence_lock = device.fence.write(); + unsafe { self.raw().present(raw_surface, raw) } + } + _ => unreachable!(), + }; + + // Assign a fresh submission index to this present so it is distinct + // from any prior real queue submission. This means: + // + // - Waiting for the *submission* index (N) will not accidentally + // trigger present resolution — the present is at N+1, which is + // strictly after N. + // - Waiting for the *present* index (N+1) will correctly block + // until the GPU is done with the presentation. + // + // We also update `last_successful_submission_index` so that the normal + // `Device::maintain` wait-validity check does not reject a poll for + // this index. + let present_index = { + let mut indices = device.command_indices.write(); + indices.active_submission_index += 1; + indices.active_submission_index + }; + + match result { + Ok(()) => { + device + .last_successful_submission_index + .store(present_index, Ordering::Release); + self.lock_life().track_present(present_index, texture); + Ok((Status::Good, present_index)) + } + Err(err) => match err { + hal::SurfaceError::Timeout => { + device + .last_successful_submission_index + .store(present_index, Ordering::Release); + self.lock_life().track_present(present_index, texture); + Ok((Status::Timeout, present_index)) + } + hal::SurfaceError::Occluded => { + device + .last_successful_submission_index + .store(present_index, Ordering::Release); + self.lock_life().track_present(present_index, texture); + Ok((Status::Occluded, present_index)) + } + hal::SurfaceError::Lost => { + device + .last_successful_submission_index + .store(present_index, Ordering::Release); + self.lock_life().track_present(present_index, texture); + Ok((Status::Lost, present_index)) + } + hal::SurfaceError::Device(err) => { + Err(SurfaceError::from(device.handle_hal_error(err))) + } + hal::SurfaceError::Outdated => { + device + .last_successful_submission_index + .store(present_index, Ordering::Release); + self.lock_life().track_present(present_index, texture); + Ok((Status::Outdated, present_index)) + } + hal::SurfaceError::Other(msg) => { + log::error!("present error: {msg}"); + Err(SurfaceError::Invalid) + } + }, + } + } +} + impl Surface { pub fn get_current_texture(&self) -> Result { profiling::scope!("Surface::get_current_texture"); @@ -273,58 +405,6 @@ impl Surface { Ok(ResolvedSurfaceOutput { status, texture }) } - pub fn present(&self) -> Result { - profiling::scope!("Surface::present"); - - let mut presentation = self.presentation.lock(); - let present = match presentation.as_mut() { - Some(present) => present, - None => return Err(SurfaceError::NotConfigured), - }; - - let device = &present.device; - - device.check_is_valid()?; - let queue = device.get_queue().unwrap(); - - let texture = present - .acquired_texture - .take() - .ok_or(SurfaceError::AlreadyAcquired)?; - - let mut exclusive_snatch_guard = device.snatchable_lock.write(); - let inner = texture.inner.snatch(&mut exclusive_snatch_guard); - drop(exclusive_snatch_guard); - - let result = match inner { - None => return Err(SurfaceError::TextureDestroyed), - Some(resource::TextureInner::Surface { raw }) => { - let raw_surface = self.raw(device.backend()).unwrap(); - let raw_queue = queue.raw(); - let _fence_lock = device.fence.write(); - unsafe { raw_queue.present(raw_surface, raw) } - } - _ => unreachable!(), - }; - - match result { - Ok(()) => Ok(Status::Good), - Err(err) => match err { - hal::SurfaceError::Timeout => Ok(Status::Timeout), - hal::SurfaceError::Occluded => Ok(Status::Occluded), - hal::SurfaceError::Lost => Ok(Status::Lost), - hal::SurfaceError::Device(err) => { - Err(SurfaceError::from(device.handle_hal_error(err))) - } - hal::SurfaceError::Outdated => Ok(Status::Outdated), - hal::SurfaceError::Other(msg) => { - log::error!("present error: {msg}"); - Err(SurfaceError::Invalid) - } - }, - } - } - pub fn discard(&self) -> Result<(), SurfaceError> { profiling::scope!("Surface::discard"); @@ -395,17 +475,50 @@ impl Global { }) } + pub fn queue_present( + &self, + queue_id: id::QueueId, + surface_id: id::SurfaceId, + ) -> Result<(Status, SubmissionIndex), SurfaceError> { + let queue = self.hub.queues.get(queue_id); + let surface = self.surfaces.get(surface_id); + + let result = queue.present(&surface); + + #[cfg(feature = "trace")] + if let Ok((_, present_index)) = &result { + if let Some(presentation) = surface.presentation.lock().as_ref() { + if let Some(ref mut trace) = *presentation.device.trace.lock() { + trace.add(Action::Present(*present_index, surface.to_trace())); + } + } + } + + result + } + + /// TODO: is this needed by deno? pub fn surface_present(&self, surface_id: id::SurfaceId) -> Result { let surface = self.surfaces.get(surface_id); + let queue = { + let lock = surface.presentation.lock(); + let present = lock.as_ref().ok_or(SurfaceError::NotConfigured)?; + present.device.get_queue().unwrap() + }; + + let result = queue.present(&surface); + #[cfg(feature = "trace")] - if let Some(present) = surface.presentation.lock().as_ref() { - if let Some(ref mut trace) = *present.device.trace.lock() { - trace.add(Action::Present(surface.to_trace())); + if let Ok((_, present_index)) = &result { + if let Some(present) = surface.presentation.lock().as_ref() { + if let Some(ref mut trace) = *present.device.trace.lock() { + trace.add(Action::Present(*present_index, surface.to_trace())); + } } } - surface.present() + result.map(|(status, _)| status) } pub fn surface_texture_discard(&self, surface_id: id::SurfaceId) -> Result<(), SurfaceError> { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index ac520b270ff..f3bbc1bd58a 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -1653,6 +1653,26 @@ impl crate::Queue for Queue { let frequency = unsafe { self.raw.GetTimestampFrequency() }.expect("GetTimestampFrequency"); (1_000_000_000.0 / frequency as f64) as f32 } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + let mut device = Default::default(); + unsafe { + self.raw + .GetDevice::(&mut device) + .into_device_result("Queue::wait_for_idle GetDevice")?; + } + let device = device.unwrap(); + + let fence: Direct3D12::ID3D12Fence = + unsafe { device.CreateFence(0, Direct3D12::D3D12_FENCE_FLAG_NONE) } + .into_device_result("Queue::wait_for_idle CreateFence")?; + unsafe { self.raw.Signal(&fence, 1) }.into_device_result("Queue::wait_for_idle Signal")?; + let event = Event::create(false, false)?; + unsafe { fence.SetEventOnCompletion(1, event.0) } + .into_device_result("Queue::wait_for_idle SetEventOnCompletion")?; + unsafe { Threading::WaitForSingleObject(event.0, Threading::INFINITE) }; + Ok(()) + } } #[derive(Debug)] pub struct DxilPassthroughShader { diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs index 2af6d1f9c77..04744a9b4fa 100644 --- a/wgpu-hal/src/dynamic/queue.rs +++ b/wgpu-hal/src/dynamic/queue.rs @@ -20,6 +20,7 @@ pub trait DynQueue: DynResource { texture: Box, ) -> Result<(), SurfaceError>; unsafe fn get_timestamp_period(&self) -> f32; + unsafe fn wait_for_idle(&self) -> Result<(), DeviceError>; } impl DynQueue for Q { @@ -53,4 +54,8 @@ impl DynQueue for Q { unsafe fn get_timestamp_period(&self) -> f32 { unsafe { Q::get_timestamp_period(self) } } + + unsafe fn wait_for_idle(&self) -> Result<(), DeviceError> { + unsafe { Q::wait_for_idle(self) } + } } diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index a1f09a5c038..62b31b58c81 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1955,6 +1955,12 @@ impl crate::Queue for super::Queue { unsafe fn get_timestamp_period(&self) -> f32 { 1.0 } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + let gl = &self.shared.context.lock(); + unsafe { gl.finish() }; + Ok(()) + } } #[cfg(send_sync)] diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 3f36e675d40..5c2942c220c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1251,6 +1251,10 @@ pub trait Queue: WasmNotSendSync { texture: ::SurfaceTexture, ) -> Result<(), SurfaceError>; unsafe fn get_timestamp_period(&self) -> f32; + /// Block until all queue operations have completed. + /// + /// This is useful for waiting on presentations, which in vulkan don't have fences. + unsafe fn wait_for_idle(&self) -> Result<(), DeviceError>; } /// Encoder and allocation pool for `CommandBuffer`s. diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 3945a995a83..94da77d46db 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -604,6 +604,16 @@ impl crate::Queue for Queue { unsafe fn get_timestamp_period(&self) -> f32 { self.timestamp_period } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + autoreleasepool(|_| { + let cmd_buf = self.shared.raw.commandBuffer().unwrap(); + cmd_buf.setLabel(Some(ns_string!("(wgpu internal) wait_for_idle"))); + cmd_buf.commit(); + cmd_buf.waitUntilCompleted(); + }); + Ok(()) + } } #[derive(Debug)] diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index c1dc6ffcbde..edc946cb71b 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -349,6 +349,10 @@ impl crate::Queue for Context { unsafe fn get_timestamp_period(&self) -> f32 { 1.0 } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + Ok(()) + } } impl crate::Device for Context { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 2fbb629894f..9ceb1e35889 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -1363,6 +1363,15 @@ impl crate::Queue for Queue { unsafe fn get_timestamp_period(&self) -> f32 { self.device.timestamp_period } + + unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> { + unsafe { + self.device + .raw + .queue_wait_idle(self.raw) + .map_err(map_host_device_oom_and_lost_err) + } + } } impl Queue { diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs index aa5a0f24eda..3abec7f35a1 100644 --- a/wgpu/src/api/queue.rs +++ b/wgpu/src/api/queue.rs @@ -1,7 +1,7 @@ use alloc::boxed::Box; use core::ops::{Deref, RangeBounds}; -use crate::{api::DeferredCommandBufferActions, *}; +use crate::{api::DeferredCommandBufferActions, api::SurfaceTexture, *}; /// Handle to a command queue on a device. /// @@ -343,6 +343,21 @@ impl Queue { unsafe { queue.context.queue_as_hal::(queue) } } + /// Schedule a surface texture to be presented on the owning surface. + /// + /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`]. + /// + /// # Platform dependent behavior + /// + /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface + /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter + /// or synchronize other double buffered state, then these operations should be done before the call to `present`. + pub fn present(&self, mut surface_texture: SurfaceTexture) -> SubmissionIndex { + surface_texture.presented = true; + let index = self.inner.present(&surface_texture.detail); + SubmissionIndex { index } + } + /// Compact a BLAS, it must have had [`Blas::prepare_compaction_async`] called on it and had the /// callback provided called. /// diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs index 82e819051bf..30f10a59712 100644 --- a/wgpu/src/api/surface.rs +++ b/wgpu/src/api/surface.rs @@ -110,7 +110,7 @@ impl Surface<'_> { /// /// In order to present the [`SurfaceTexture`] returned by this method, /// first a [`Queue::submit`] needs to be done with some work rendering to this texture. - /// Then [`SurfaceTexture::present`] needs to be called. + /// Then [`Queue::present`] needs to be called. /// /// If a [`SurfaceTexture`] referencing this surface is alive when [`Surface::configure()`] /// is called, the configure call will panic. diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs index f08c462127c..5126c10bb85 100644 --- a/wgpu/src/api/surface_texture.rs +++ b/wgpu/src/api/surface_texture.rs @@ -19,20 +19,6 @@ static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync); crate::cmp::impl_eq_ord_hash_proxy!(SurfaceTexture => .texture.inner); impl SurfaceTexture { - /// Schedule this texture to be presented on the owning surface. - /// - /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`]. - /// - /// # Platform dependent behavior - /// - /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface - /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter - /// or synchronize other double buffered state, then these operations should be done before the call to `present`. - pub fn present(mut self) { - self.presented = true; - self.detail.present(); - } - #[cfg(custom)] /// Returns custom implementation of SurfaceTexture (if custom backend and is internally T) pub fn as_custom(&self) -> Option<&T> { diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 65f6ba9da12..bdae3071bf0 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -2738,6 +2738,11 @@ impl dispatch::QueueInterface for WebQueue { ) -> (Option, dispatch::DispatchBlas) { unimplemented!("Raytracing not implemented for web") } + + fn present(&self, _detail: &dispatch::DispatchSurfaceOutputDetail) -> u64 { + // Swapchain is presented automatically on the web. + 0 + } } impl Drop for WebQueue { fn drop(&mut self) { @@ -3945,10 +3950,6 @@ impl Drop for WebSurface { } impl dispatch::SurfaceOutputDetailInterface for WebSurfaceOutputDetail { - fn present(&self) { - // Swapchain is presented automatically on the web. - } - fn texture_discard(&self) { // Can't really discard the texture on the web. } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index ce7d1f48eae..a01d6698bfa 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -614,7 +614,6 @@ pub struct CoreTlas { pub struct CoreSurfaceOutputDetail { context: ContextWgpuCore, surface_id: wgc::id::SurfaceId, - error_sink: ErrorSink, } type ErrorSink = Arc>; @@ -2139,6 +2138,18 @@ impl dispatch::QueueInterface for CoreQueue { .into(), ) } + + fn present(&self, detail: &dispatch::DispatchSurfaceOutputDetail) -> u64 { + let detail = detail.as_core(); + match self.context.0.queue_present(self.id, detail.surface_id) { + Ok((_status, index)) => index, + Err(err) => { + self.context + .handle_error_nolabel(&self.error_sink, err, "Queue::present"); + 0 + } + } + } } impl Drop for CoreQueue { @@ -3901,7 +3912,6 @@ impl dispatch::SurfaceInterface for CoreSurface { let output_detail = CoreSurfaceOutputDetail { context: self.context.clone(), surface_id: self.id, - error_sink: error_sink.clone(), } .into(); @@ -3947,16 +3957,6 @@ impl Drop for CoreSurface { } impl dispatch::SurfaceOutputDetailInterface for CoreSurfaceOutputDetail { - fn present(&self) { - match self.context.0.surface_present(self.surface_id) { - Ok(_status) => (), - Err(err) => { - self.context - .handle_error_nolabel(&self.error_sink, err, "Surface::present"); - } - } - } - fn texture_discard(&self) { match self.context.0.surface_texture_discard(self.surface_id) { Ok(_status) => (), diff --git a/wgpu/src/dispatch.rs b/wgpu/src/dispatch.rs index d2b2366ed09..e84d164541c 100644 --- a/wgpu/src/dispatch.rs +++ b/wgpu/src/dispatch.rs @@ -261,6 +261,8 @@ pub trait QueueInterface: CommonTraits { fn on_submitted_work_done(&self, callback: BoxSubmittedWorkDoneCallback); fn compact_blas(&self, blas: &DispatchBlas) -> (Option, DispatchBlas); + + fn present(&self, detail: &DispatchSurfaceOutputDetail) -> u64; } pub trait ShaderModuleInterface: CommonTraits { @@ -578,7 +580,6 @@ pub trait SurfaceInterface: CommonTraits { } pub trait SurfaceOutputDetailInterface: CommonTraits { - fn present(&self); fn texture_discard(&self); }