diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86481abe968..0133f63ff73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,7 @@ Bottom level categories:
 #### General
 
 - `Features::CLIP_DISTANCE`, `naga::Capabilities::CLIP_DISTANCE`, and `naga::BuiltIn::ClipDistance` have been renamed to `CLIP_DISTANCES` and `ClipDistances` (viz., pluralized) as appropriate, to match the WebGPU spec. By @ErichDonGubler in [#9267](https://github.com/gfx-rs/wgpu/pull/9267).
+- `Surface::present` has been moved to `Queue::present`. By @inner-daemons in [#9222](https://github.com/gfx-rs/wgpu/pull/9222).
 
 #### Validation
 
@@ -67,6 +68,7 @@ Bottom level categories:
 #### General
 
 - Fix limit comparison logic for `max_inter_stage_shader_variables` By @ErichDonGubler in [9264](https://github.com/gfx-rs/wgpu/pull/9264).
+- Fix several bugs related to surface presentation, including presenting without rendering and dropping queue after presenting. By @inner-daemons in [#9222](https://github.com/gfx-rs/wgpu/pull/9222).
 
 #### naga
 
diff --git a/Cargo.lock b/Cargo.lock
index 69422820a7b..f5532a9ba1d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4625,6 +4625,16 @@ dependencies = [
  "winit",
 ]
 
+[[package]]
+name = "wgpu-bug-repro-02-present-bugs"
+version = "0.0.0"
+dependencies = [
+ "env_logger",
+ "pollster",
+ "wgpu",
+ "winit",
+]
+
 [[package]]
 name = "wgpu-core"
 version = "29.0.0"
diff --git a/examples/bug-repro/01_texture_atomic_bug/Cargo.toml b/examples/bug-repro/01_texture_atomic_bug/Cargo.toml
index d06c80a5848..13d847a6e3d 100644
--- a/examples/bug-repro/01_texture_atomic_bug/Cargo.toml
+++ b/examples/bug-repro/01_texture_atomic_bug/Cargo.toml
@@ -7,5 +7,5 @@ publish = false
 [dependencies]
 env_logger = "0.11"
 pollster = "0.4"
-wgpu = "29.0.0"
+wgpu.workspace = true
 winit = "0.30.8"
diff --git a/examples/bug-repro/01_texture_atomic_bug/src/main.rs b/examples/bug-repro/01_texture_atomic_bug/src/main.rs
index 86ebfc7cf21..1888e2eb015 100644
--- a/examples/bug-repro/01_texture_atomic_bug/src/main.rs
+++ b/examples/bug-repro/01_texture_atomic_bug/src/main.rs
@@ -335,6 +335,6 @@ impl State {
         }
 
         self.queue.submit([enc.finish()]);
-        frame.present();
+        self.queue.present(frame);
     }
 }
diff --git a/examples/bug-repro/02_present_bugs/Cargo.toml b/examples/bug-repro/02_present_bugs/Cargo.toml
new file mode 100644
index 00000000000..72c9c50259c
--- /dev/null
+++ b/examples/bug-repro/02_present_bugs/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "wgpu-bug-repro-02-present-bugs"
+edition = "2021"
+rust-version = "1.87"
+publish = false
+
+[dependencies]
+env_logger = "0.11"
+pollster = "0.4"
+wgpu.workspace = true
+winit = "0.30.8"
diff --git a/examples/bug-repro/02_present_bugs/src/main.rs b/examples/bug-repro/02_present_bugs/src/main.rs
new file mode 100644
index 00000000000..191ce86ce5d
--- /dev/null
+++ b/examples/bug-repro/02_present_bugs/src/main.rs
@@ -0,0 +1,145 @@
+//! Repro for various issues with queue presentation
+//!
+//! The 2 current bugs being tested are presentation after no usage of surface texture
+//! and queue destruction immediately after present
+
+use std::sync::Arc;
+
+use winit::application::ApplicationHandler;
+use winit::event::WindowEvent;
+use winit::event_loop::{ActiveEventLoop, EventLoop};
+use winit::window::{Window, WindowId};
+
+fn main() {
+    env_logger::init();
+    let event_loop = EventLoop::new().unwrap();
+    event_loop.set_control_flow(winit::event_loop::ControlFlow::Poll);
+    let mut app = App::default();
+    event_loop.run_app(&mut app).unwrap();
+}
+
+#[derive(Default)]
+struct App {
+    state: Option<State>,
+}
+
+struct State {
+    window: Arc<Window>,
+    instance: wgpu::Instance,
+    device: wgpu::Device,
+    queue: Option<wgpu::Queue>,
+    surface: wgpu::Surface<'static>,
+    surface_config: wgpu::SurfaceConfiguration,
+}
+
+impl ApplicationHandler for App {
+    fn resumed(&mut self, event_loop: &ActiveEventLoop) {
+        if self.state.is_some() {
+            return;
+        }
+        let window = Arc::new(
+            event_loop
+                .create_window(Window::default_attributes().with_title("Presentation bugs"))
+                .unwrap(),
+        );
+        self.state = Some(State::new(window));
+    }
+
+    fn window_event(
+        &mut self,
+        event_loop: &ActiveEventLoop,
+        _window_id: WindowId,
+        event: WindowEvent,
+    ) {
+        let Some(state) = &mut self.state else { return };
+        match event {
+            WindowEvent::CloseRequested => event_loop.exit(),
+            WindowEvent::Resized(size) if size.width > 0 && size.height > 0 => {
+                state.surface_config.width = size.width;
+                state.surface_config.height = size.height;
+                state
+                    .surface
+                    .configure(&state.device, &state.surface_config);
+            }
+            WindowEvent::RedrawRequested => {
+                let frame = match state.surface.get_current_texture() {
+                    wgpu::CurrentSurfaceTexture::Success(f) => f,
+                    wgpu::CurrentSurfaceTexture::Suboptimal(_)
+                    | wgpu::CurrentSurfaceTexture::Outdated => {
+                        state
+                            .surface
+                            .configure(&state.device, &state.surface_config);
+                        return;
+                    }
+                    wgpu::CurrentSurfaceTexture::Lost => {
+                        state.surface =
+                            state.instance.create_surface(state.window.clone()).unwrap();
+                        state
+                            .surface
+                            .configure(&state.device, &state.surface_config);
+                        return;
+                    }
+                    _ => return,
+                };
+                let Some(queue) = state.queue.take() else {
+                    return;
+                };
+                // Immediately present the surface texture (with nothing on it) and then drop the queue, which should cause a full wait.
+                queue.present(frame);
+                event_loop.exit();
+            }
+            _ => {}
+        }
+    }
+
+    fn about_to_wait(&mut self, _event_loop: &ActiveEventLoop) {
+        if let Some(state) = &self.state {
+            state.window.request_redraw();
+        }
+    }
+}
+
+impl State {
+    fn new(window: Arc<Window>) -> Self {
+        let size = window.inner_size();
+        let width = size.width.max(1);
+        let height = size.height.max(1);
+
+        let mut instance_desc = wgpu::InstanceDescriptor::new_without_display_handle_from_env();
+        instance_desc.flags |= wgpu::InstanceFlags::advanced_debugging();
+        let instance = wgpu::Instance::new(instance_desc);
+        let surface = instance.create_surface(window.clone()).unwrap();
+        let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
+            compatible_surface: Some(&surface),
+            ..Default::default()
+        }))
+        .expect("No adapter");
+
+        println!("Adapter: {:?}", adapter.get_info().name);
+
+        let (device, queue) =
+            pollster::block_on(adapter.request_device(&Default::default())).unwrap();
+
+        let surface_format = surface.get_capabilities(&adapter).formats[0];
+        let surface_config = wgpu::SurfaceConfiguration {
+            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+            format: surface_format,
+            width,
+            height,
+            present_mode: wgpu::PresentMode::AutoVsync,
+            alpha_mode: wgpu::CompositeAlphaMode::Auto,
+            view_formats: vec![],
+            desired_maximum_frame_latency: 2,
+        };
+        surface.configure(&device, &surface_config);
+
+        State {
+            window,
+            instance,
+            device,
+            queue: Some(queue),
+            surface,
+            surface_config,
+        }
+    }
+}
diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs
index 4c57be910e9..4a229f6ba58 100644
--- a/examples/features/src/framework.rs
+++ b/examples/features/src/framework.rs
@@ -548,7 +548,7 @@ impl<E: Example> ApplicationHandler<AppAction> for App<E> {
                     if let Some(window) = &self.window {
                         window.pre_present_notify();
                     }
-                    frame.present();
+                    context.queue.present(frame);
                 }
 
                 if let Some(window) = &self.window {
diff --git a/examples/features/src/hello_triangle/mod.rs b/examples/features/src/hello_triangle/mod.rs
index 962da3ff994..1907e6ed198 100644
--- a/examples/features/src/hello_triangle/mod.rs
+++ b/examples/features/src/hello_triangle/mod.rs
@@ -289,7 +289,7 @@ impl ApplicationHandler<TriangleAction> for App {
                 if let Some(window) = &self.window {
                     window.pre_present_notify();
                 }
-                frame.present();
+                wgpu_state.queue.present(frame);
             }
             WindowEvent::Occluded(is_occluded) => {
                 if !is_occluded {
diff --git a/examples/features/src/hello_windows/mod.rs b/examples/features/src/hello_windows/mod.rs
index e1810bc154d..6ac054e7d64 100644
--- a/examples/features/src/hello_windows/mod.rs
+++ b/examples/features/src/hello_windows/mod.rs
@@ -249,7 +249,7 @@ impl ApplicationHandler for App {
 
                     queue.submit(Some(encoder.finish()));
                     viewport.desc.window.pre_present_notify();
-                    frame.present();
+                    queue.present(frame);
                 }
             }
             WindowEvent::Occluded(is_occluded) => {
diff --git a/examples/features/src/uniform_values/mod.rs b/examples/features/src/uniform_values/mod.rs
index c1392f1e64c..d155f0ad65f 100644
--- a/examples/features/src/uniform_values/mod.rs
+++ b/examples/features/src/uniform_values/mod.rs
@@ -470,7 +470,7 @@ impl ApplicationHandler<UniformAction> for App {
                 if let Some(window) = &self.window {
                     window.pre_present_notify();
                 }
-                frame.present();
+                wgpu_ctx.queue.present(frame);
             }
             WindowEvent::Occluded(is_occluded) => {
                 if !is_occluded {
diff --git a/examples/standalone/02_hello_window/src/main.rs b/examples/standalone/02_hello_window/src/main.rs
index 7da68e6e2f9..9f102e82aa6 100644
--- a/examples/standalone/02_hello_window/src/main.rs
+++ b/examples/standalone/02_hello_window/src/main.rs
@@ -136,7 +136,7 @@ impl State {
         // Submit the command in the queue to execute
         self.queue.submit([encoder.finish()]);
         self.window.pre_present_notify();
-        surface_texture.present();
+        self.queue.present(surface_texture);
     }
 }
 
diff --git a/examples/standalone/custom_backend/src/custom.rs b/examples/standalone/custom_backend/src/custom.rs
index 676158e83fb..a119933d451 100644
--- a/examples/standalone/custom_backend/src/custom.rs
+++ b/examples/standalone/custom_backend/src/custom.rs
@@ -380,6 +380,10 @@ impl QueueInterface for CustomQueue {
     fn compact_blas(&self, _blas: &DispatchBlas) -> (Option<u64>, DispatchBlas) {
         unimplemented!()
     }
+
+    fn present(&self, _detail: &wgpu::custom::DispatchSurfaceOutputDetail) -> u64 {
+        unimplemented!()
+    }
 }
 
 #[derive(Debug)]
diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs
index ed7a4ffc861..0892844d522 100644
--- a/player/src/bin/play.rs
+++ b/player/src/bin/play.rs
@@ -201,10 +201,10 @@ fn main() {
                                 );
                                 self.player.get_surface_texture(id, surface);
                             }
-                            Some(trace::Action::Present(_id)) => {
+                            Some(trace::Action::Present(_id, _surface_id)) => {
                                 self.frame_count += 1;
                                 log::debug!("Presenting frame {}", self.frame_count);
-                                surface.present().unwrap();
+                                self.queue.present(surface).unwrap();
                                 break;
                             }
                             Some(trace::Action::DiscardSurfaceTexture(_id)) => {
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 7e2100aa6c4..f73e976e7e7 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -125,7 +125,7 @@ impl Player {
                 panic!("Unexpected Action::Init: has to be the first action only")
             }
             Action::ConfigureSurface { .. }
-            | Action::Present(_)
+            | Action::Present(..)
             | Action::DiscardSurfaceTexture(_) => {
                 panic!("Unexpected Surface action: winit feature is not enabled")
             }
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index aef58019f8a..9ab45f5c5de 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -191,6 +191,20 @@ pub(crate) struct LifetimeTracker {
     /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them
     /// here until the next time the device is maintained.
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
+
+    /// Surface textures waiting for a presentation to complete.
+    ///
+    /// Each entry holds the [`SubmissionIndex`] that was active when
+    /// [`present`][LifetimeTracker::track_present] was called, together with
+    /// the texture whose lifetime must extend until the presentation is done.
+    /// A presentation is considered complete once a real queue submission with
+    /// index **strictly greater** than the recorded index has finished (i.e.
+    /// something was submitted *after* the present and that work completed).
+    ///
+    /// In the absence of any subsequent submission the queue drop path calls
+    /// [`drain_pending_presents`][LifetimeTracker::drain_pending_presents]
+    /// after a full [`wait_for_idle`][hal::Queue::wait_for_idle].
+    pending_presents: Vec<(SubmissionIndex, Arc<Texture>)>,
 }
 
 impl LifetimeTracker {
@@ -200,12 +214,45 @@ impl LifetimeTracker {
             ready_to_map: Vec::new(),
             ready_to_compact: Vec::new(),
             work_done_closures: SmallVec::new(),
+            pending_presents: Vec::new(),
         }
     }
 
-    /// Return true if there are no queue submissions still in flight.
+    /// Return true if there are no queue submissions still in flight and no
+    /// presentations are waiting to be confirmed complete.
     pub fn queue_empty(&self) -> bool {
-        self.active.is_empty()
+        self.active.is_empty() && self.pending_presents.is_empty()
+    }
+
+    /// Return the submission index of the oldest pending presentation, if any.
+    ///
+    /// Used by [`Queue::wait_for_present`] to decide whether to wait on a
+    /// fence or call [`wait_for_idle`][hal::Queue::wait_for_idle].
+    ///
+    /// [`Queue::wait_for_present`]: super::queue::Queue::wait_for_present
+    pub fn oldest_pending_present_index(&self) -> Option<SubmissionIndex> {
+        self.pending_presents.first().map(|(idx, _)| *idx)
+    }
+
+    /// Keep `texture` alive until a submission after `index` completes.
+    ///
+    /// Call this immediately after a successful HAL present, passing the
+    /// `active_submission_index` that was current at the time of the present.
+    /// The texture is released by [`triage_submissions`][Self::triage_submissions]
+    /// once `last_done > index`, or explicitly via
+    /// [`drain_pending_presents`][Self::drain_pending_presents].
+    pub fn track_present(&mut self, index: SubmissionIndex, texture: Arc<Texture>) {
+        self.pending_presents.push((index, texture));
+    }
+
+    /// Release all pending present textures unconditionally.
+    ///
+    /// This must only be called after the queue is known to be fully idle
+    /// (e.g. after [`wait_for_idle`][hal::Queue::wait_for_idle]), since there
+    /// may not have been a subsequent submission to trigger normal release via
+    /// [`triage_submissions`][Self::triage_submissions].
+    pub fn drain_pending_presents(&mut self) {
+        self.pending_presents.clear();
     }
 
     /// Start tracking resources associated with a new queue submission.
@@ -321,6 +368,13 @@ impl LifetimeTracker {
             }
             work_done_closures.extend(a.work_done_closures);
         }
+
+        // Release surface textures whose presentations are now complete.
+        // A presentation is complete once a submission with index strictly
+        // greater than the recorded present index has finished.
+        self.pending_presents
+            .retain(|(present_index, _)| last_done <= *present_index);
+
         work_done_closures
     }
 
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 30008354fe2..1ddca513123 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -19,10 +19,10 @@ use crate::device::trace::{Action, IntoTrace};
 use crate::{
     api_log,
     command::{
-        extract_texture_selector, validate_linear_texture_data, validate_texture_buffer_copy,
-        validate_texture_copy_dst_format, validate_texture_copy_range, ClearError,
-        CommandAllocator, CommandBuffer, CommandEncoder, CommandEncoderError, CopySide,
-        TransferError,
+        clear_texture, extract_texture_selector, validate_linear_texture_data,
+        validate_texture_buffer_copy, validate_texture_copy_dst_format,
+        validate_texture_copy_range, ClearError, CommandAllocator, CommandBuffer, CommandEncoder,
+        CommandEncoderError, CopySide, TransferError,
     },
     device::{DeviceError, WaitIdleError},
     get_lowest_common_denom,
@@ -142,6 +142,40 @@ impl Queue {
             queue_empty,
         )
     }
+
+    /// Wait for a presentation with the given `present_index` to complete.
+    ///
+    /// A presentation is complete once a real queue submission with an index
+    /// strictly greater than `present_index` has finished executing on the GPU.
+    /// If such a submission has already been issued, this waits for its fence.
+    /// Otherwise — no subsequent submission has been made — the whole queue is
+    /// drained via [`wait_for_idle`][hal::Queue::wait_for_idle].
+    pub(crate) fn wait_for_present(
+        &self,
+        present_index: SubmissionIndex,
+    ) -> Result<(), DeviceError> {
+        let last_successful = self
+            .device
+            .last_successful_submission_index
+            .load(Ordering::Acquire);
+
+        if last_successful > present_index {
+            // A submission made after the present has completed (or is in
+            // flight); wait for the fence to reach that index.
+            let fence = self.device.fence.read();
+            unsafe {
+                self.device
+                    .raw()
+                    .wait(fence.as_ref(), last_successful, None)
+                    .map(|_| ())
+                    .map_err(|e| self.device.handle_hal_error(e))
+            }
+        } else {
+            // No submission has been made after the present yet; drain the
+            // entire queue so we know the present is complete.
+            unsafe { self.raw().wait_for_idle() }.map_err(|e| self.device.handle_hal_error(e))
+        }
+    }
 }
 
 crate::impl_resource_type!(Queue);
@@ -158,81 +192,28 @@ impl Drop for Queue {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
 
+        // Wait for all GPU work — submissions *and* any pending presentations —
+        // to complete before releasing resources.
+        match unsafe { self.raw.wait_for_idle() } {
+            Ok(()) => {}
+            Err(hal::DeviceError::Lost) => {
+                self.device.handle_hal_error(hal::DeviceError::Lost);
+            }
+            Err(e) => {
+                panic!("Unexpected error while waiting for queue idle on drop: {e:?}");
+            }
+        }
+
+        // Drain pending present textures: the GPU is now idle so all
+        // presentations have completed regardless of whether a subsequent
+        // submission was ever made.
+        self.lock_life().drain_pending_presents();
+
         let last_successful_submission_index = self
             .device
             .last_successful_submission_index
             .load(Ordering::Acquire);
 
-        let fence = self.device.fence.read();
-
-        // Try waiting on the last submission using the following sequence of timeouts
-        let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
-
-        for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
-            let is_last_iter = i == timeouts_in_ms.len() - 1;
-
-            api_log!(
-                "Waiting on last submission. try: {}/{}. timeout: {}ms",
-                i + 1,
-                timeouts_in_ms.len(),
-                timeout_ms
-            );
-
-            let wait_res = unsafe {
-                self.device.raw().wait(
-                    fence.as_ref(),
-                    last_successful_submission_index,
-                    #[cfg(not(target_arch = "wasm32"))]
-                    Some(core::time::Duration::from_millis(timeout_ms)),
-                    #[cfg(target_arch = "wasm32")]
-                    Some(core::time::Duration::ZERO), // WebKit and Chromium don't support a non-0 timeout
-                )
-            };
-            // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
-            match wait_res {
-                Ok(true) => break,
-                Ok(false) => {
-                    // It's fine that we timed out on WebGL; GL objects can be deleted early as they
-                    // will be kept around by the driver if GPU work hasn't finished.
-                    // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other
-                    // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
-                    // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
-                    // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538.
-                    #[cfg(target_arch = "wasm32")]
-                    {
-                        break;
-                    }
-                    #[cfg(not(target_arch = "wasm32"))]
-                    {
-                        if is_last_iter {
-                            panic!(
-                                "We timed out while waiting on the last successful submission to complete!"
-                            );
-                        }
-                    }
-                }
-                Err(e) => match e {
-                    hal::DeviceError::OutOfMemory => {
-                        if is_last_iter {
-                            panic!(
-                                "We ran into an OOM error while waiting on the last successful submission to complete!"
-                            );
-                        }
-                    }
-                    hal::DeviceError::Lost => {
-                        self.device.handle_hal_error(e); // will lose the device
-                        break;
-                    }
-                    hal::DeviceError::Unexpected => {
-                        panic!(
-                            "We ran into an unexpected error while waiting on the last successful submission to complete!"
-                        );
-                    }
-                },
-            }
-        }
-        drop(fence);
-
         let snatch_guard = self.device.snatchable_lock.read();
         let (submission_closures, mapping_closures, blas_compact_ready_closures, queue_empty) =
             self.maintain(last_successful_submission_index, &snatch_guard);
@@ -829,7 +810,7 @@ impl Queue {
                     .collect::<Vec<core::ops::Range<u32>>>()
                 {
                     let mut trackers = self.device.trackers.lock();
-                    crate::command::clear_texture(
+                    clear_texture(
                         &dst,
                         TextureInitRange {
                             mip_range: destination.mip_level..(destination.mip_level + 1),
@@ -1086,7 +1067,7 @@ impl Queue {
                     .collect::<Vec<core::ops::Range<u32>>>()
                 {
                     let mut trackers = self.device.trackers.lock();
-                    crate::command::clear_texture(
+                    clear_texture(
                         &dst,
                         TextureInitRange {
                             mip_range: destination.mip_level..(destination.mip_level + 1),
@@ -1186,6 +1167,145 @@ impl Queue {
         }
     }
 
+    /// If a surface texture was acquired but never submitted in a command buffer,
+    /// clear it and transition it to the `PRESENT` state so the HAL present call
+    /// succeeds.  Both operations are skipped when the texture has already been
+    /// properly handled by a prior [`Queue::submit`] call.
+    pub(crate) fn prepare_surface_texture_for_present(
+        &self,
+        texture: &Arc<Texture>,
+    ) -> Result<(), DeviceError> {
+        // Fast path: if every mip/layer has been initialized the texture was
+        // submitted (render passes initialized and transitioned it) — skip.
+        {
+            let status = texture.initialization_status.read();
+            let is_uninitialized = status
+                .mips
+                .first()
+                .is_some_and(|mip| mip.check(0..1).is_some());
+            if !is_uninitialized {
+                return Ok(());
+            }
+        }
+
+        let device = &self.device;
+
+        // Hold a read snatch guard so we can safely access the texture's raw
+        // throughout this function.
+        let snatch_guard = device.snatchable_lock.read();
+
+        {
+            let mut pending_writes = self.pending_writes.lock();
+
+            {
+                // Step 1: Clear via render pass.  This also records the
+                // UNINITIALIZED → COLOR_TARGET barrier in `encoder` and
+                // updates the device tracker to COLOR_TARGET.
+                let encoder = pending_writes.activate();
+                let mut trackers = device.trackers.lock();
+                clear_texture(
+                    texture,
+                    TextureInitRange {
+                        mip_range: 0..1,
+                        layer_range: 0..1,
+                    },
+                    encoder,
+                    &mut trackers.textures,
+                    &device.alignments,
+                    device.zero_buffer.as_ref(),
+                    &snatch_guard,
+                    device.instance_flags,
+                )
+                .map_err(|e| match e {
+                    ClearError::Device(e) => e,
+                    _ => DeviceError::Lost,
+                })?;
+                // `encoder` borrow ends here; `trackers` dropped below.
+
+                // Step 2: Transition COLOR_TARGET → PRESENT.  Collect the
+                // pending transitions as plain values (no lifetime on the
+                // tracker) so we can drop `trackers` before recording them.
+                let pending_present: Vec<track::PendingTransition<wgt::TextureUses>> = trackers
+                    .textures
+                    .set_single(
+                        texture,
+                        texture.full_range.clone(),
+                        wgt::TextureUses::PRESENT,
+                    )
+                    .collect();
+
+                drop(trackers);
+
+                let raw_texture = texture
+                    .try_raw(&snatch_guard)
+                    .map_err(|_| DeviceError::Lost)?;
+                let present_barriers: Vec<hal::TextureBarrier<'_, dyn hal::DynTexture>> =
+                    pending_present
+                        .into_iter()
+                        .map(|pt| pt.into_hal(raw_texture))
+                        .collect();
+
+                unsafe {
+                    pending_writes
+                        .command_encoder
+                        .as_mut()
+                        .transition_textures(&present_barriers);
+                }
+                // `present_barriers` dropped here, releasing the reference to
+                // `raw_texture` (and through it to `snatch_guard`).
+            }
+
+            // Step 3: Mark the texture's content as initialized (it was just cleared).
+            texture.initialization_status.write().mips[0].drain(0..1);
+
+            // Step 4: Finalize the pending-writes encoder.
+            let encoder_in_flight =
+                pending_writes.pre_submit(&device.command_allocator, device, self)?;
+
+            // Step 5: Assign a fresh submission index for the mini-submit.
+            let mini_index = {
+                let mut indices = device.command_indices.write();
+                indices.active_submission_index += 1;
+                indices.active_submission_index
+            };
+
+            // Step 6: Submit.  Fence lock must be acquired after snatch lock.
+            let hal_cmd_bufs = encoder_in_flight
+                .as_ref()
+                .map(|e| e.inner.list.iter().map(|b| b.as_ref()).collect::<Vec<_>>())
+                .unwrap_or_default();
+
+            let raw_surface_tex = match texture.inner.get(&snatch_guard) {
+                Some(TextureInner::Surface { raw }) => raw.as_ref(),
+                _ => return Err(DeviceError::Lost),
+            };
+
+            {
+                let mut fence = device.fence.write();
+                unsafe {
+                    self.raw().submit(
+                        &hal_cmd_bufs,
+                        &[raw_surface_tex],
+                        (fence.as_mut(), mini_index),
+                    )
+                }
+                .map_err(|e| device.handle_hal_error(e))?;
+            }
+
+            device
+                .last_successful_submission_index
+                .store(mini_index, Ordering::Release);
+
+            // Step 7: Track the submission so the command buffer is kept alive
+            // until the GPU finishes with it.
+            if let Some(encoder) = encoder_in_flight {
+                self.lock_life().track_submission(mini_index, vec![encoder]);
+            }
+        }
+
+        Ok(())
+    }
+
     pub fn submit(
         &self,
         command_buffers: &[Arc<CommandBuffer>],
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 73043f2e458..5aba6352f60 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -928,6 +928,28 @@ impl Device {
             // - When `queue` goes out of scope here, `Queue::drop` runs and tries to acquire the
             //   snatch guard — but Thread A (this thread) still holds it, causing a deadlock.
             drop(snatch_guard);
+
+            // If this was a wait request, resolve any pending presentations
+            // whose index is at or before the wait target.  Only those
+            // presents are part of the operation being waited on; presents
+            // with a higher index (made *after* the target) must not cause
+            // an unintended stall.
+            //
+            // `wait_for_present` either waits for the next real fence signal
+            // (if a submission after the present already exists) or falls
+            // back to a full queue idle when no subsequent submission has
+            // been made.
+            if let Some(wait_target) = wait_submission_index {
+                if let Some(present_index) = queue.lock_life().oldest_pending_present_index() {
+                    if present_index <= wait_target {
+                        if let Err(e) = queue.wait_for_present(present_index) {
+                            let hal_error: WaitIdleError = e.into();
+                            return (user_closures, Err(hal_error));
+                        }
+                        queue_empty = queue.lock_life().queue_empty();
+                    }
+                }
+            }
         } else {
             drop(snatch_guard);
         };
diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs
index 9a64fd216d2..c0f302d9522 100644
--- a/wgpu-core/src/device/trace.rs
+++ b/wgpu-core/src/device/trace.rs
@@ -152,7 +152,7 @@ pub enum Action<'a, R: ReferenceType> {
         id: R::Texture,
         parent: R::Surface,
     },
-    Present(R::Surface),
+    Present(crate::SubmissionIndex, R::Surface),
     DiscardSurfaceTexture(R::Surface),
     CreateBindGroupLayout(
         PointerId<markers::BindGroupLayout>,
diff --git a/wgpu-core/src/device/trace/record.rs b/wgpu-core/src/device/trace/record.rs
index 6e8e686fea7..f72b99ad865 100644
--- a/wgpu-core/src/device/trace/record.rs
+++ b/wgpu-core/src/device/trace/record.rs
@@ -866,7 +866,7 @@ fn action_to_owned(action: Action<'_, PointerReferences>) -> Action<'static, Poi
         A::DestroyExternalTexture(external_texture) => A::DestroyExternalTexture(external_texture),
         A::DestroySampler(sampler) => A::DestroySampler(sampler),
         A::GetSurfaceTexture { id, parent } => A::GetSurfaceTexture { id, parent },
-        A::Present(surface) => A::Present(surface),
+        A::Present(a, b) => A::Present(a, b),
         A::DiscardSurfaceTexture(surface) => A::DiscardSurfaceTexture(surface),
         A::DestroyBindGroupLayout(layout) => A::DestroyBindGroupLayout(layout),
         A::GetRenderPipelineBindGroupLayout {
diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs
index 524cc1f6dec..640882ec546 100644
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@@ -9,18 +9,21 @@ When this texture is presented, we remove it from the device tracker as well as
 extract it from the hub.
 !*/
 
-use alloc::{sync::Arc, vec::Vec};
-use core::mem::ManuallyDrop;
+use alloc::{boxed::Box, sync::Arc, vec::Vec};
+use core::{mem::ManuallyDrop, sync::atomic::Ordering};
 
 #[cfg(feature = "trace")]
 use crate::device::trace::{Action, IntoTrace};
 use crate::{
     conv,
-    device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError},
+    device::{
+        queue::Queue, Device, DeviceError, DeviceMismatch, MissingDownlevelFlags, WaitIdleError,
+    },
     global::Global,
     hal_label, id,
     instance::Surface,
-    resource,
+    resource::{self, Labeled},
+    SubmissionIndex,
 };
 
 use thiserror::Error;
@@ -151,6 +154,135 @@ pub struct SurfaceOutput<T = id::TextureId> {
     pub texture: Option<T>,
 }
 
+impl Queue {
+    /// Present a surface texture and return the submission index that was
+    /// active at the time of the present.
+    ///
+    /// The returned [`SubmissionIndex`] can be passed to
+    /// [`Queue::wait_for_present`] to wait for the presentation to complete.
+    /// The surface texture is kept alive internally until a subsequent queue
+    /// submission (with index strictly greater than the returned value)
+    /// finishes, or until the queue is dropped.
+    pub fn present(&self, surface: &Surface) -> Result<(Status, SubmissionIndex), SurfaceError> {
+        profiling::scope!("Queue::present");
+
+        self.device.check_is_valid()?;
+
+        let mut presentation_lock = surface.presentation.lock();
+        let presentation = presentation_lock.as_mut();
+
+        if let Some(presentation) = presentation.as_ref() {
+            let same_device = Arc::ptr_eq(&presentation.device, &self.device);
+
+            if !same_device {
+                return Err(SurfaceError::Device(DeviceError::DeviceMismatch(Box::new(
+                    DeviceMismatch {
+                        res: self.error_ident(),
+                        res_device: self.device.error_ident(),
+                        target: None,
+                        target_device: presentation.device.error_ident(),
+                    },
+                ))));
+            }
+        }
+
+        let present = match presentation {
+            Some(present) => present,
+            None => return Err(SurfaceError::NotConfigured),
+        };
+
+        let texture = present
+            .acquired_texture
+            .take()
+            .ok_or(SurfaceError::AlreadyAcquired)?;
+
+        let device = &self.device;
+
+        // If the texture was acquired but never rendered to / submitted, clear
+        // it and transition it to the PRESENT state before presenting.
+        self.prepare_surface_texture_for_present(&texture)?;
+
+        let mut exclusive_snatch_guard = device.snatchable_lock.write();
+        let inner = texture.inner.snatch(&mut exclusive_snatch_guard);
+        drop(exclusive_snatch_guard);
+
+        let result = match inner {
+            None => return Err(SurfaceError::TextureDestroyed),
+            Some(resource::TextureInner::Surface { raw }) => {
+                let raw_surface = surface.raw(device.backend()).unwrap();
+                let _fence_lock = device.fence.write();
+                unsafe { self.raw().present(raw_surface, raw) }
+            }
+            _ => unreachable!(),
+        };
+
+        // Assign a fresh submission index to this present so it is distinct
+        // from any prior real queue submission.  This means:
+        //
+        //   - Waiting for the *submission* index (N) will not accidentally
+        //     trigger present resolution — the present is at N+1, which is
+        //     strictly after N.
+        //   - Waiting for the *present* index (N+1) will correctly block
+        //     until the GPU is done with the presentation.
+        //
+        // We also update `last_successful_submission_index` so that the normal
+        // `Device::maintain` wait-validity check does not reject a poll for
+        // this index.
+        let present_index = {
+            let mut indices = device.command_indices.write();
+            indices.active_submission_index += 1;
+            indices.active_submission_index
+        };
+
+        match result {
+            Ok(()) => {
+                device
+                    .last_successful_submission_index
+                    .store(present_index, Ordering::Release);
+                self.lock_life().track_present(present_index, texture);
+                Ok((Status::Good, present_index))
+            }
+            Err(err) => match err {
+                hal::SurfaceError::Timeout => {
+                    device
+                        .last_successful_submission_index
+                        .store(present_index, Ordering::Release);
+                    self.lock_life().track_present(present_index, texture);
+                    Ok((Status::Timeout, present_index))
+                }
+                hal::SurfaceError::Occluded => {
+                    device
+                        .last_successful_submission_index
+                        .store(present_index, Ordering::Release);
+                    self.lock_life().track_present(present_index, texture);
+                    Ok((Status::Occluded, present_index))
+                }
+                hal::SurfaceError::Lost => {
+                    device
+                        .last_successful_submission_index
+                        .store(present_index, Ordering::Release);
+                    self.lock_life().track_present(present_index, texture);
+                    Ok((Status::Lost, present_index))
+                }
+                hal::SurfaceError::Device(err) => {
+                    Err(SurfaceError::from(device.handle_hal_error(err)))
+                }
+                hal::SurfaceError::Outdated => {
+                    device
+                        .last_successful_submission_index
+                        .store(present_index, Ordering::Release);
+                    self.lock_life().track_present(present_index, texture);
+                    Ok((Status::Outdated, present_index))
+                }
+                hal::SurfaceError::Other(msg) => {
+                    log::error!("present error: {msg}");
+                    Err(SurfaceError::Invalid)
+                }
+            },
+        }
+    }
+}
+
 impl Surface {
     pub fn get_current_texture(&self) -> Result<ResolvedSurfaceOutput, SurfaceError> {
         profiling::scope!("Surface::get_current_texture");
@@ -273,58 +405,6 @@ impl Surface {
         Ok(ResolvedSurfaceOutput { status, texture })
     }
 
-    pub fn present(&self) -> Result<Status, SurfaceError> {
-        profiling::scope!("Surface::present");
-
-        let mut presentation = self.presentation.lock();
-        let present = match presentation.as_mut() {
-            Some(present) => present,
-            None => return Err(SurfaceError::NotConfigured),
-        };
-
-        let device = &present.device;
-
-        device.check_is_valid()?;
-        let queue = device.get_queue().unwrap();
-
-        let texture = present
-            .acquired_texture
-            .take()
-            .ok_or(SurfaceError::AlreadyAcquired)?;
-
-        let mut exclusive_snatch_guard = device.snatchable_lock.write();
-        let inner = texture.inner.snatch(&mut exclusive_snatch_guard);
-        drop(exclusive_snatch_guard);
-
-        let result = match inner {
-            None => return Err(SurfaceError::TextureDestroyed),
-            Some(resource::TextureInner::Surface { raw }) => {
-                let raw_surface = self.raw(device.backend()).unwrap();
-                let raw_queue = queue.raw();
-                let _fence_lock = device.fence.write();
-                unsafe { raw_queue.present(raw_surface, raw) }
-            }
-            _ => unreachable!(),
-        };
-
-        match result {
-            Ok(()) => Ok(Status::Good),
-            Err(err) => match err {
-                hal::SurfaceError::Timeout => Ok(Status::Timeout),
-                hal::SurfaceError::Occluded => Ok(Status::Occluded),
-                hal::SurfaceError::Lost => Ok(Status::Lost),
-                hal::SurfaceError::Device(err) => {
-                    Err(SurfaceError::from(device.handle_hal_error(err)))
-                }
-                hal::SurfaceError::Outdated => Ok(Status::Outdated),
-                hal::SurfaceError::Other(msg) => {
-                    log::error!("present error: {msg}");
-                    Err(SurfaceError::Invalid)
-                }
-            },
-        }
-    }
-
     pub fn discard(&self) -> Result<(), SurfaceError> {
         profiling::scope!("Surface::discard");
 
@@ -395,17 +475,50 @@ impl Global {
         })
     }
 
+    pub fn queue_present(
+        &self,
+        queue_id: id::QueueId,
+        surface_id: id::SurfaceId,
+    ) -> Result<(Status, SubmissionIndex), SurfaceError> {
+        let queue = self.hub.queues.get(queue_id);
+        let surface = self.surfaces.get(surface_id);
+
+        let result = queue.present(&surface);
+
+        #[cfg(feature = "trace")]
+        if let Ok((_, present_index)) = &result {
+            if let Some(presentation) = surface.presentation.lock().as_ref() {
+                if let Some(ref mut trace) = *presentation.device.trace.lock() {
+                    trace.add(Action::Present(*present_index, surface.to_trace()));
+                }
+            }
+        }
+
+        result
+    }
+
+    /// TODO: is this needed by deno?
     pub fn surface_present(&self, surface_id: id::SurfaceId) -> Result<Status, SurfaceError> {
         let surface = self.surfaces.get(surface_id);
 
+        let queue = {
+            let lock = surface.presentation.lock();
+            let present = lock.as_ref().ok_or(SurfaceError::NotConfigured)?;
+            present.device.get_queue().unwrap()
+        };
+
+        let result = queue.present(&surface);
+
         #[cfg(feature = "trace")]
-        if let Some(present) = surface.presentation.lock().as_ref() {
-            if let Some(ref mut trace) = *present.device.trace.lock() {
-                trace.add(Action::Present(surface.to_trace()));
+        if let Ok((_, present_index)) = &result {
+            if let Some(present) = surface.presentation.lock().as_ref() {
+                if let Some(ref mut trace) = *present.device.trace.lock() {
+                    trace.add(Action::Present(*present_index, surface.to_trace()));
+                }
             }
         }
 
-        surface.present()
+        result.map(|(status, _)| status)
     }
 
     pub fn surface_texture_discard(&self, surface_id: id::SurfaceId) -> Result<(), SurfaceError> {
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index ac520b270ff..f3bbc1bd58a 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -1653,6 +1653,26 @@ impl crate::Queue for Queue {
         let frequency = unsafe { self.raw.GetTimestampFrequency() }.expect("GetTimestampFrequency");
         (1_000_000_000.0 / frequency as f64) as f32
     }
+
+    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
+        let mut device = Default::default();
+        unsafe {
+            self.raw
+                .GetDevice::<Direct3D12::ID3D12Device>(&mut device)
+                .into_device_result("Queue::wait_for_idle GetDevice")?;
+        }
+        let device = device.unwrap();
+
+        let fence: Direct3D12::ID3D12Fence =
+            unsafe { device.CreateFence(0, Direct3D12::D3D12_FENCE_FLAG_NONE) }
+                .into_device_result("Queue::wait_for_idle CreateFence")?;
+        unsafe { self.raw.Signal(&fence, 1) }.into_device_result("Queue::wait_for_idle Signal")?;
+        let event = Event::create(false, false)?;
+        unsafe { fence.SetEventOnCompletion(1, event.0) }
+            .into_device_result("Queue::wait_for_idle SetEventOnCompletion")?;
+        unsafe { Threading::WaitForSingleObject(event.0, Threading::INFINITE) };
+        Ok(())
+    }
 }
 #[derive(Debug)]
 pub struct DxilPassthroughShader {
diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs
index 2af6d1f9c77..04744a9b4fa 100644
--- a/wgpu-hal/src/dynamic/queue.rs
+++ b/wgpu-hal/src/dynamic/queue.rs
@@ -20,6 +20,7 @@ pub trait DynQueue: DynResource {
         texture: Box<dyn DynSurfaceTexture>,
     ) -> Result<(), SurfaceError>;
     unsafe fn get_timestamp_period(&self) -> f32;
+    unsafe fn wait_for_idle(&self) -> Result<(), DeviceError>;
 }
 
 impl<Q: Queue + DynResource> DynQueue for Q {
@@ -53,4 +54,8 @@ impl<Q: Queue + DynResource> DynQueue for Q {
     unsafe fn get_timestamp_period(&self) -> f32 {
         unsafe { Q::get_timestamp_period(self) }
     }
+
+    unsafe fn wait_for_idle(&self) -> Result<(), DeviceError> {
+        unsafe { Q::wait_for_idle(self) }
+    }
 }
diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs
index a1f09a5c038..62b31b58c81 100644
--- a/wgpu-hal/src/gles/queue.rs
+++ b/wgpu-hal/src/gles/queue.rs
@@ -1955,6 +1955,12 @@ impl crate::Queue for super::Queue {
     unsafe fn get_timestamp_period(&self) -> f32 {
         1.0
     }
+
+    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
+        let gl = &self.shared.context.lock();
+        unsafe { gl.finish() };
+        Ok(())
+    }
 }
 
 #[cfg(send_sync)]
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 3f36e675d40..5c2942c220c 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1251,6 +1251,10 @@ pub trait Queue: WasmNotSendSync {
         texture: <Self::A as Api>::SurfaceTexture,
     ) -> Result<(), SurfaceError>;
     unsafe fn get_timestamp_period(&self) -> f32;
+    /// Block until all queue operations have completed.
+    ///
+    /// This is useful for waiting on presentations, which in vulkan don't have fences.
+    unsafe fn wait_for_idle(&self) -> Result<(), DeviceError>;
 }
 
 /// Encoder and allocation pool for `CommandBuffer`s.
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 3945a995a83..94da77d46db 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -604,6 +604,16 @@ impl crate::Queue for Queue {
     unsafe fn get_timestamp_period(&self) -> f32 {
         self.timestamp_period
     }
+
+    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
+        autoreleasepool(|_| {
+            let cmd_buf = self.shared.raw.commandBuffer().unwrap();
+            cmd_buf.setLabel(Some(ns_string!("(wgpu internal) wait_for_idle")));
+            cmd_buf.commit();
+            cmd_buf.waitUntilCompleted();
+        });
+        Ok(())
+    }
 }
 
 #[derive(Debug)]
diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs
index c1dc6ffcbde..edc946cb71b 100644
--- a/wgpu-hal/src/noop/mod.rs
+++ b/wgpu-hal/src/noop/mod.rs
@@ -349,6 +349,10 @@ impl crate::Queue for Context {
     unsafe fn get_timestamp_period(&self) -> f32 {
         1.0
     }
+
+    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
+        Ok(())
+    }
 }
 
 impl crate::Device for Context {
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 2fbb629894f..9ceb1e35889 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -1363,6 +1363,15 @@ impl crate::Queue for Queue {
     unsafe fn get_timestamp_period(&self) -> f32 {
         self.device.timestamp_period
     }
+
+    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
+        unsafe {
+            self.device
+                .raw
+                .queue_wait_idle(self.raw)
+                .map_err(map_host_device_oom_and_lost_err)
+        }
+    }
 }
 
 impl Queue {
diff --git a/wgpu/src/api/queue.rs b/wgpu/src/api/queue.rs
index aa5a0f24eda..3abec7f35a1 100644
--- a/wgpu/src/api/queue.rs
+++ b/wgpu/src/api/queue.rs
@@ -1,7 +1,7 @@
 use alloc::boxed::Box;
 use core::ops::{Deref, RangeBounds};
 
-use crate::{api::DeferredCommandBufferActions, *};
+use crate::{api::DeferredCommandBufferActions, api::SurfaceTexture, *};
 
 /// Handle to a command queue on a device.
 ///
@@ -343,6 +343,21 @@ impl Queue {
         unsafe { queue.context.queue_as_hal::<A>(queue) }
     }
 
+    /// Schedule a surface texture to be presented on the owning surface.
+    ///
+    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
+    ///
+    /// # Platform dependent behavior
+    ///
+    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
+    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
+    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
+    pub fn present(&self, mut surface_texture: SurfaceTexture) -> SubmissionIndex {
+        surface_texture.presented = true;
+        let index = self.inner.present(&surface_texture.detail);
+        SubmissionIndex { index }
+    }
+
     /// Compact a BLAS, it must have had [`Blas::prepare_compaction_async`] called on it and had the
     /// callback provided called.
     ///
diff --git a/wgpu/src/api/surface.rs b/wgpu/src/api/surface.rs
index 82e819051bf..30f10a59712 100644
--- a/wgpu/src/api/surface.rs
+++ b/wgpu/src/api/surface.rs
@@ -110,7 +110,7 @@ impl Surface<'_> {
     ///
     /// In order to present the [`SurfaceTexture`] returned by this method,
     /// first a [`Queue::submit`] needs to be done with some work rendering to this texture.
-    /// Then [`SurfaceTexture::present`] needs to be called.
+    /// Then [`Queue::present`] needs to be called.
     ///
     /// If a [`SurfaceTexture`] referencing this surface is alive when [`Surface::configure()`]
     /// is called, the configure call will panic.
diff --git a/wgpu/src/api/surface_texture.rs b/wgpu/src/api/surface_texture.rs
index f08c462127c..5126c10bb85 100644
--- a/wgpu/src/api/surface_texture.rs
+++ b/wgpu/src/api/surface_texture.rs
@@ -19,20 +19,6 @@ static_assertions::assert_impl_all!(SurfaceTexture: Send, Sync);
 crate::cmp::impl_eq_ord_hash_proxy!(SurfaceTexture => .texture.inner);
 
 impl SurfaceTexture {
-    /// Schedule this texture to be presented on the owning surface.
-    ///
-    /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`].
-    ///
-    /// # Platform dependent behavior
-    ///
-    /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface
-    /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter
-    /// or synchronize other double buffered state, then these operations should be done before the call to `present`.
-    pub fn present(mut self) {
-        self.presented = true;
-        self.detail.present();
-    }
-
     #[cfg(custom)]
     /// Returns custom implementation of SurfaceTexture (if custom backend and is internally T)
     pub fn as_custom<T: crate::custom::SurfaceOutputDetailInterface>(&self) -> Option<&T> {
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 65f6ba9da12..bdae3071bf0 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -2738,6 +2738,11 @@ impl dispatch::QueueInterface for WebQueue {
     ) -> (Option<u64>, dispatch::DispatchBlas) {
         unimplemented!("Raytracing not implemented for web")
     }
+
+    fn present(&self, _detail: &dispatch::DispatchSurfaceOutputDetail) -> u64 {
+        // Swapchain is presented automatically on the web.
+        0
+    }
 }
 impl Drop for WebQueue {
     fn drop(&mut self) {
@@ -3945,10 +3950,6 @@ impl Drop for WebSurface {
 }
 
 impl dispatch::SurfaceOutputDetailInterface for WebSurfaceOutputDetail {
-    fn present(&self) {
-        // Swapchain is presented automatically on the web.
-    }
-
     fn texture_discard(&self) {
         // Can't really discard the texture on the web.
     }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index ce7d1f48eae..a01d6698bfa 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -614,7 +614,6 @@ pub struct CoreTlas {
 pub struct CoreSurfaceOutputDetail {
     context: ContextWgpuCore,
     surface_id: wgc::id::SurfaceId,
-    error_sink: ErrorSink,
 }
 
 type ErrorSink = Arc<Mutex<ErrorSinkRaw>>;
@@ -2139,6 +2138,18 @@ impl dispatch::QueueInterface for CoreQueue {
             .into(),
         )
     }
+
+    fn present(&self, detail: &dispatch::DispatchSurfaceOutputDetail) -> u64 {
+        let detail = detail.as_core();
+        match self.context.0.queue_present(self.id, detail.surface_id) {
+            Ok((_status, index)) => index,
+            Err(err) => {
+                self.context
+                    .handle_error_nolabel(&self.error_sink, err, "Queue::present");
+                0
+            }
+        }
+    }
 }
 
 impl Drop for CoreQueue {
@@ -3901,7 +3912,6 @@ impl dispatch::SurfaceInterface for CoreSurface {
         let output_detail = CoreSurfaceOutputDetail {
             context: self.context.clone(),
             surface_id: self.id,
-            error_sink: error_sink.clone(),
         }
         .into();
 
@@ -3947,16 +3957,6 @@ impl Drop for CoreSurface {
 }
 
 impl dispatch::SurfaceOutputDetailInterface for CoreSurfaceOutputDetail {
-    fn present(&self) {
-        match self.context.0.surface_present(self.surface_id) {
-            Ok(_status) => (),
-            Err(err) => {
-                self.context
-                    .handle_error_nolabel(&self.error_sink, err, "Surface::present");
-            }
-        }
-    }
-
     fn texture_discard(&self) {
         match self.context.0.surface_texture_discard(self.surface_id) {
             Ok(_status) => (),
diff --git a/wgpu/src/dispatch.rs b/wgpu/src/dispatch.rs
index d2b2366ed09..e84d164541c 100644
--- a/wgpu/src/dispatch.rs
+++ b/wgpu/src/dispatch.rs
@@ -261,6 +261,8 @@ pub trait QueueInterface: CommonTraits {
     fn on_submitted_work_done(&self, callback: BoxSubmittedWorkDoneCallback);
 
     fn compact_blas(&self, blas: &DispatchBlas) -> (Option<u64>, DispatchBlas);
+
+    fn present(&self, detail: &DispatchSurfaceOutputDetail) -> u64;
 }
 
 pub trait ShaderModuleInterface: CommonTraits {
@@ -578,7 +580,6 @@ pub trait SurfaceInterface: CommonTraits {
 }
 
 pub trait SurfaceOutputDetailInterface: CommonTraits {
-    fn present(&self);
     fn texture_discard(&self);
 }