diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9cd1cf8902..b7ed417c54e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -116,6 +116,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505).
 
 #### General
 
+- Add `StagingBelt::finish_and_recall_on_submit`, a convenience that combines `finish` and `recall` by deferring the buffer re-map via `CommandEncoder::map_buffer_on_submit`, so no explicit `recall()` call is needed after submission. By @ruihe774.
 - Implement `i16`/`u16` 16-bit integer support in WGSL shaders, gated behind `Features::SHADER_I16` and `enable wgpu_int16;`. Supported on Vulkan, Metal, and DX12 (SM 6.2+). By @JMS55 in [#9412](https://github.com/gfx-rs/wgpu/pull/9412).
 - BLAS support for procedural AABB geometry (`BlasGeometrySizeDescriptors::AABBs`, `BlasAabbGeometry`, and related descriptors). By @dylanblokhuis in [#9290](https://github.com/gfx-rs/wgpu/pull/9290)
 - Added "limit bucketing" functionality which can adjust adapter limits and features to match one of several pre-defined buckets. This is controlled by the new `apply_limit_buckets` member in `RequestAdapterOptions`, which is `false` by default. By @andyleiserson in [#9119](https://github.com/gfx-rs/wgpu/pull/9119).
diff --git a/tests/tests/wgpu-validation/util.rs b/tests/tests/wgpu-validation/util.rs
index 80a87117c14..aa72f4a1054 100644
--- a/tests/tests/wgpu-validation/util.rs
+++ b/tests/tests/wgpu-validation/util.rs
@@ -3,8 +3,7 @@
 use nanorand::Rng;
 
 /// Generate (deterministic) random staging belt operations to exercise its logic.
-#[test]
-fn staging_belt_random_test() {
+fn staging_belt_random_test(use_recall_on_submit: bool) {
     let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
     let mut rng = nanorand::WyRand::new_seed(0xDEAD_BEEF);
     let buffer_size = 1024;
@@ -35,12 +34,28 @@ fn staging_belt_random_test() {
             slice.slice(..1).copy_from_slice(&[1]);
         }
 
-        belt.finish();
-        queue.submit([encoder.finish()]);
-        belt.recall();
+        if use_recall_on_submit {
+            belt.finish_and_recall_on_submit(&encoder);
+            queue.submit([encoder.finish()]);
+            // No explicit recall() needed.
+        } else {
+            belt.finish();
+            queue.submit([encoder.finish()]);
+            belt.recall();
+        }
     }
 }
 
+#[test]
+fn staging_belt_manual_recall() {
+    staging_belt_random_test(false);
+}
+
+#[test]
+fn staging_belt_finish_and_recall_on_submit() {
+    staging_belt_random_test(true);
+}
+
 #[test]
 fn staging_belt_panics_with_invalid_buffer_usages() {
     #[track_caller]
diff --git a/wgpu/src/util/belt.rs b/wgpu/src/util/belt.rs
index f8b03a23f20..718fe12c76f 100644
--- a/wgpu/src/util/belt.rs
+++ b/wgpu/src/util/belt.rs
@@ -23,6 +23,10 @@ use crate::COPY_BUFFER_ALIGNMENT;
 /// 3. Submit all command encoders that were used in step 1.
 /// 4. Call [`StagingBelt::recall()`].
 ///
+/// Alternatively, steps 2 and 4 can be combined into a single call to
+/// [`StagingBelt::finish_and_recall_on_submit()`], which schedules the re-map
+/// automatically when the encoder is submitted, so no explicit `recall()` is needed.
+///
 /// [`Queue::write_buffer_with()`]: crate::Queue::write_buffer_with
 pub struct StagingBelt {
     device: Device,
@@ -268,6 +272,39 @@ impl StagingBelt {
         }
     }
 
+    /// Convenience for [`StagingBelt::finish()`] followed by a deferred
+    /// [`StagingBelt::recall()`] that runs automatically when `encoder`'s command
+    /// buffer is submitted.
+    ///
+    /// After calling this method, the staging belt's internal buffers will be
+    /// re-mapped for write once the submission completes, without requiring an
+    /// explicit call to [`StagingBelt::recall()`].
+    ///
+    /// Like [`StagingBelt::recall()`], this method does not block.
+    ///
+    /// # Important
+    ///
+    /// `encoder` must be finished (via [`CommandEncoder::finish()`]) and the
+    /// resulting [`CommandBuffer`] must be submitted to the [`Queue`] **before**
+    /// the next call that needs free staging-belt chunks. If the encoder is
+    /// never submitted, the belt's closed chunks will not be returned and the
+    /// belt will allocate new buffers indefinitely.
+    ///
+    /// [`CommandBuffer`]: crate::CommandBuffer
+    /// [`Queue`]: crate::Queue
+    pub fn finish_and_recall_on_submit(&mut self, encoder: &CommandEncoder) {
+        self.finish();
+        self.receive_chunks();
+
+        for chunk in self.closed_chunks.drain(..) {
+            let sender = self.sender.get_mut().clone();
+            let buffer = chunk.buffer.clone();
+            encoder.map_buffer_on_submit(&buffer, MapMode::Write, .., move |_| {
+                let _ = sender.send(chunk);
+            });
+        }
+    }
+
     /// Move all chunks that the GPU is done with (and are now mapped again)
     /// from `self.receiver` to `self.free_chunks`.
     fn receive_chunks(&mut self) {