diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index ab3045b77d7..6a9f10f9418 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -28,7 +28,7 @@ pub mod ray_tracing;
 pub mod resource;
 #[cfg(any(feature = "trace", feature = "replay"))]
 pub mod trace;
-pub(crate) use resource::{FenceReadGuard, FenceWriteGuard};
+pub(crate) use resource::FenceReadGuard;
 pub use {life::WaitIdleError, resource::Device};
 
 pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 8d4da748887..dd26abe66c2 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -24,7 +24,7 @@ use crate::{
         CommandAllocator, CommandBuffer, CommandEncoder, CommandEncoderError, CopySide,
         TransferError,
     },
-    device::{DeviceError, FenceReadGuard, FenceWriteGuard, WaitIdleError},
+    device::{DeviceError, FenceReadGuard, WaitIdleError},
     get_lowest_common_denom,
     global::Global,
     hal_label,
@@ -568,7 +568,7 @@ impl WebGpuError for QueueSubmitError {
 pub(crate) struct PendingSubmission<'a> {
     queue: &'a Queue,
     snatch_guard: SnatchGuard<'a>,
-    fence: FenceWriteGuard<'a>,
+    fence: FenceReadGuard<'a>,
     command_index_guard: RwLockWriteGuard<'a, CommandIndices>,
     // Command buffers to be executed, along with trackers for the resources they use.
     pub executions: Vec<EncoderInFlight>,
@@ -1563,7 +1563,7 @@ impl Queue {
     ) -> Result<PendingSubmission<'a>, (SubmissionIndex, DeviceError)> {
         // Lock ordering requires that the fence lock be acquired after the snatch lock and
         // before the command index lock.
-        let fence = self.device.fence.write();
+        let fence = self.device.fence.read();
 
         let mut command_index_guard = self.device.command_indices.write();
         command_index_guard.active_submission_index += 1;
@@ -1610,7 +1610,7 @@ impl Queue {
         let PendingSubmission {
             queue: _,
             snatch_guard,
-            mut fence,
+            fence,
             command_index_guard,
             mut executions,
             mut surface_textures,
@@ -1681,7 +1681,7 @@ impl Queue {
                 self.raw().submit(
                     &hal_command_buffers,
                     &submit_surface_textures,
-                    (fence.as_mut(), submit_index),
+                    (fence.as_ref(), submit_index),
                 )
             }
             .map_err(|e| self.device.handle_hal_error(e))?;
@@ -1702,7 +1702,7 @@ impl Queue {
         self.lock_life().track_submission(submit_index, executions);
 
         Ok(SubmissionResult {
-            fence: RwLockWriteGuard::downgrade(fence),
+            fence,
             snatch_guard,
         })
     }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 67f03bf9135..d9d781e9c43 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -41,7 +41,7 @@ use crate::{
         TextureInitTrackerAction,
     },
     instance::{Adapter, RequestDeviceError},
-    lock::{rank, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard},
+    lock::{rank, Mutex, RwLock, RwLockReadGuard},
     pipeline::{self, ColorStateError},
     pool::ResourcePool,
     present,
@@ -289,7 +289,6 @@ pub struct Device {
 }
 
 pub(crate) type FenceReadGuard<'a> = RwLockReadGuard<'a, ManuallyDrop<Box<dyn hal::DynFence>>>;
-pub(crate) type FenceWriteGuard<'a> = RwLockWriteGuard<'a, ManuallyDrop<Box<dyn hal::DynFence>>>;
 
 pub(crate) enum DeferredDestroy {
     TextureViews(WeakVec<TextureView>),
diff --git a/wgpu-core/src/lock/observing.rs b/wgpu-core/src/lock/observing.rs
index 3c61d613c45..25c25185df4 100644
--- a/wgpu-core/src/lock/observing.rs
+++ b/wgpu-core/src/lock/observing.rs
@@ -185,15 +185,6 @@ impl<'a, T> RwLockReadGuard<'a, T> {
     }
 }
 
-impl<'a, T> RwLockWriteGuard<'a, T> {
-    pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
-        RwLockReadGuard {
-            inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
-            _state: this._state,
-        }
-    }
-}
-
 impl<T: core::fmt::Debug> core::fmt::Debug for RwLock<T> {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         self.inner.fmt(f)
@@ -226,11 +217,11 @@ impl<'a, T> core::ops::DerefMut for RwLockWriteGuard<'a, T> {
 ///
 /// This type serves two purposes:
 ///
-/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
-///   destructure lock guards and reassemble their pieces into new guards, but
-///   if the guard type itself implements `Drop`, we can't destructure it
-///   without unsafe code or pointless `Option`s whose state is almost always
-///   statically known.
+/// - Operations would like to be able to destructure lock guards and
+///   reassemble their pieces into new guards, but if the guard type
+///   itself implements `Drop`, we can't destructure it without unsafe
+///   code or pointless `Option`s whose state is almost always statically
+///   known.
 ///
 /// - We can just implement `Drop` for this type once, and then use it in lock
 ///   guards, rather than implementing `Drop` separately for each guard type.
diff --git a/wgpu-core/src/lock/ranked.rs b/wgpu-core/src/lock/ranked.rs
index 2e68b04dd8e..745300ceae5 100644
--- a/wgpu-core/src/lock/ranked.rs
+++ b/wgpu-core/src/lock/ranked.rs
@@ -110,11 +110,11 @@ impl LockState {
 ///
 /// This type serves two purposes:
 ///
-/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
-///   destructure lock guards and reassemble their pieces into new guards, but
-///   if the guard type itself implements `Drop`, we can't destructure it
-///   without unsafe code or pointless `Option`s whose state is almost always
-///   statically known.
+/// - Operations would like to be able to destructure lock guards and
+///   reassemble their pieces into new guards, but if the guard type
+///   itself implements `Drop`, we can't destructure it without unsafe
+///   code or pointless `Option`s whose state is almost always statically
+///   known.
 ///
 /// - We can just implement `Drop` for this type once, and then use it in lock
 ///   guards, rather than implementing `Drop` separately for each guard type.
@@ -299,15 +299,6 @@ impl<'a, T> RwLockReadGuard<'a, T> {
     }
 }
 
-impl<'a, T> RwLockWriteGuard<'a, T> {
-    pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
-        RwLockReadGuard {
-            inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
-            saved: this.saved,
-        }
-    }
-}
-
 impl<T: fmt::Debug> fmt::Debug for RwLock<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         self.inner.fmt(f)
diff --git a/wgpu-core/src/lock/vanilla.rs b/wgpu-core/src/lock/vanilla.rs
index 4ed38c38011..96999696282 100644
--- a/wgpu-core/src/lock/vanilla.rs
+++ b/wgpu-core/src/lock/vanilla.rs
@@ -116,12 +116,6 @@ impl<'a, T> RwLockReadGuard<'a, T> {
     }
 }
 
-impl<'a, T> RwLockWriteGuard<'a, T> {
-    pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
-        RwLockReadGuard(parking_lot::RwLockWriteGuard::downgrade(this.0))
-    }
-}
-
 impl<T: fmt::Debug> fmt::Debug for RwLock<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         self.0.fmt(f)
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 34f4f3a9749..a1bd44d2e27 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -1670,7 +1670,7 @@ impl crate::Queue for Queue {
         &self,
         command_buffers: &[&CommandBuffer],
         _surface_textures: &[&Texture],
-        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
+        (signal_fence, signal_value): (&Fence, crate::FenceValue),
     ) -> Result<(), crate::DeviceError> {
         let mut temp_lists = self.temp_lists.lock();
         temp_lists.clear();
diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs
index 85d8ca00450..f450513637e 100644
--- a/wgpu-hal/src/dynamic/mod.rs
+++ b/wgpu-hal/src/dynamic/mod.rs
@@ -57,10 +57,6 @@ trait DynResourceExt {
     ///
     /// - Panics if `self` is not downcastable to `T`.
     fn expect_downcast_ref<T: DynResource>(&self) -> &T;
-    /// # Panics
-    ///
-    /// - Panics if `self` is not downcastable to `T`.
-    fn expect_downcast_mut<T: DynResource>(&mut self) -> &mut T;
 
     /// Unboxes a `Box<dyn DynResource>` to a concrete type.
     ///
@@ -77,12 +73,6 @@ impl<R: DynResource + ?Sized> DynResourceExt for R {
             .expect("Resource doesn't have the expected backend type.")
     }
 
-    fn expect_downcast_mut<'a, T: DynResource>(&'a mut self) -> &'a mut T {
-        self.as_any_mut()
-            .downcast_mut()
-            .expect("Resource doesn't have the expected backend type.")
-    }
-
     unsafe fn unbox<T: DynResource + 'static>(self: Box<Self>) -> T {
         debug_assert!(
             <Self as Any>::type_id(self.as_ref()) == TypeId::of::<T>(),
diff --git a/wgpu-hal/src/dynamic/queue.rs b/wgpu-hal/src/dynamic/queue.rs
index 04744a9b4fa..5425b2d1297 100644
--- a/wgpu-hal/src/dynamic/queue.rs
+++ b/wgpu-hal/src/dynamic/queue.rs
@@ -12,7 +12,7 @@ pub trait DynQueue: DynResource {
         &self,
         command_buffers: &[&dyn DynCommandBuffer],
         surface_textures: &[&dyn DynSurfaceTexture],
-        signal_fence: (&mut dyn DynFence, FenceValue),
+        signal_fence: (&dyn DynFence, FenceValue),
     ) -> Result<(), DeviceError>;
     unsafe fn present(
         &self,
@@ -28,7 +28,7 @@ impl<Q: Queue + DynResource> DynQueue for Q {
         &self,
         command_buffers: &[&dyn DynCommandBuffer],
         surface_textures: &[&dyn DynSurfaceTexture],
-        signal_fence: (&mut dyn DynFence, FenceValue),
+        signal_fence: (&dyn DynFence, FenceValue),
     ) -> Result<(), DeviceError> {
         let command_buffers = command_buffers
             .iter()
@@ -38,7 +38,7 @@ impl<Q: Queue + DynResource> DynQueue for Q {
             .iter()
             .map(|surface| (*surface).expect_downcast_ref())
             .collect::<Vec<_>>();
-        let signal_fence = (signal_fence.0.expect_downcast_mut(), signal_fence.1);
+        let signal_fence = (signal_fence.0.expect_downcast_ref(), signal_fence.1);
         unsafe { Q::submit(self, &command_buffers, &surface_textures, signal_fence) }
     }
 
diff --git a/wgpu-hal/src/gles/fence.rs b/wgpu-hal/src/gles/fence.rs
index 201f78fcb43..0470e5ba151 100644
--- a/wgpu-hal/src/gles/fence.rs
+++ b/wgpu-hal/src/gles/fence.rs
@@ -1,20 +1,29 @@
-use alloc::vec::Vec;
+use alloc::{sync::Arc, vec::Vec};
 use core::sync::atomic::Ordering;
+use parking_lot::RwLock;
 
 use glow::HasContext;
 
 use crate::AtomicFenceValue;
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug)]
 struct GLFence {
-    sync: glow::Fence,
+    // Since a fence can be `Copy`ed, there can exist some
+    // cases where a fence could be destroyed while something
+    // else is still using it. Therefore, while a function is
+    // using this fence (and doesn't keep pending read locked),
+    // it should clone the `Arc` to show it needs this to
+    // stay alive.
+    //
+    // The arc should not be kept after a function has finished
+    sync: Arc<glow::Fence>,
     value: crate::FenceValue,
 }
 
 #[derive(Debug)]
 pub struct Fence {
     last_completed: AtomicFenceValue,
-    pending: Vec<GLFence>,
+    pending: RwLock<Vec<GLFence>>,
     fence_behavior: wgt::GlFenceBehavior,
 }
 
@@ -29,24 +38,27 @@ impl Fence {
     pub fn new(options: &wgt::GlBackendOptions) -> Self {
         Self {
             last_completed: AtomicFenceValue::new(0),
-            pending: Vec::new(),
+            pending: RwLock::new(Vec::new()),
             fence_behavior: options.fence_behavior,
         }
     }
 
     pub fn signal(
-        &mut self,
+        &self,
         gl: &glow::Context,
         value: crate::FenceValue,
     ) -> Result<(), crate::DeviceError> {
         if self.fence_behavior.is_auto_finish() {
-            *self.last_completed.get_mut() = value;
+            self.last_completed.store(value, Ordering::Release);
             return Ok(());
         }
 
         let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) }
             .map_err(|_| crate::DeviceError::OutOfMemory)?;
-        self.pending.push(GLFence { sync, value });
+        self.pending.write().push(GLFence {
+            sync: Arc::new(sync),
+            value,
+        });
 
         Ok(())
     }
@@ -62,12 +74,15 @@ impl Fence {
             return max_value;
         }
 
-        for gl_fence in self.pending.iter() {
+        let pending = self.pending.read();
+
+        for gl_fence in pending.iter() {
             if gl_fence.value <= max_value {
                 // We already know this was good, no need to check again
                 continue;
             }
-            let status = unsafe { gl.get_sync_status(gl_fence.sync) };
+            // We have pending `read` locked, so we shouldn't have to clone it.
+            let status = unsafe { gl.get_sync_status(*gl_fence.sync) };
             if status == glow::SIGNALED {
                 max_value = gl_fence.value;
             } else {
@@ -82,20 +97,29 @@ impl Fence {
         max_value
     }
 
-    pub fn maintain(&mut self, gl: &glow::Context) {
+    pub fn maintain(&self, gl: &glow::Context) {
         if self.fence_behavior.is_auto_finish() {
             return;
         }
 
         let latest = self.get_latest(gl);
-        for &gl_fence in self.pending.iter() {
-            if gl_fence.value <= latest {
+        let mut pending = self.pending.write();
+        pending.retain_mut(|gl_fence| {
+            if gl_fence.value > latest {
+                true
+            } else if let Some(fence) = Arc::get_mut(&mut gl_fence.sync) {
                 unsafe {
-                    gl.delete_sync(gl_fence.sync);
+                    gl.delete_sync(*fence);
                 }
+                false
+            } else {
+                // Another function is currently using this value. In general, these should finish
+                // very quickly (for wait because the fence should already be signaled, an all
+                // others are just fast), but submit should be very fast, so we shouldn't block on
+                // this.
+                true
             }
-        }
-        self.pending.retain(|&gl_fence| gl_fence.value > latest);
+        });
     }
 
     pub fn wait(
@@ -115,25 +139,32 @@ impl Fence {
             return Ok(true);
         }
 
+        let pending = self.pending.read();
+
         // Find a matching fence
-        let gl_fence = self
-            .pending
-            .iter()
-            .find(|gl_fence| gl_fence.value >= wait_value);
+        let gl_fence = pending.iter().find(|gl_fence| gl_fence.value >= wait_value);
 
         let Some(gl_fence) = gl_fence else {
             log::warn!("Tried to wait for {wait_value} but that value has not been signalled yet");
             return Ok(false);
         };
 
+        // clone to show we're using the fence
+        let sync = gl_fence.sync.clone();
+        let fence_value = gl_fence.value;
+
+        drop(pending);
+
         let status = unsafe {
             gl.client_wait_sync(
-                gl_fence.sync,
+                *sync,
                 glow::SYNC_FLUSH_COMMANDS_BIT,
                 timeout_ns.min(i32::MAX as u32) as i32,
             )
         };
 
+        drop(sync);
+
         let signalled = match status {
             glow::ALREADY_SIGNALED | glow::CONDITION_SATISFIED => true,
             glow::TIMEOUT_EXPIRED | glow::WAIT_FAILED => false,
@@ -144,8 +175,7 @@ impl Fence {
         };
 
         if signalled {
-            self.last_completed
-                .fetch_max(gl_fence.value, Ordering::AcqRel);
+            self.last_completed.fetch_max(fence_value, Ordering::AcqRel);
         }
 
         Ok(signalled)
@@ -156,9 +186,12 @@ impl Fence {
             return;
         }
 
-        for gl_fence in self.pending {
+        for gl_fence in self.pending.into_inner() {
             unsafe {
-                gl.delete_sync(gl_fence.sync);
+                gl.delete_sync(
+                    Arc::into_inner(gl_fence.sync)
+                        .expect("A function has failed to drop all its references to this"),
+                );
             }
         }
     }
diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs
index 09ce9da9cac..967fc3b856e 100644
--- a/wgpu-hal/src/gles/queue.rs
+++ b/wgpu-hal/src/gles/queue.rs
@@ -1901,7 +1901,7 @@ impl crate::Queue for super::Queue {
         &self,
         command_buffers: &[&super::CommandBuffer],
         _surface_textures: &[&super::Texture],
-        (signal_fence, signal_value): (&mut super::Fence, crate::FenceValue),
+        (signal_fence, signal_value): (&super::Fence, crate::FenceValue),
     ) -> Result<(), crate::DeviceError> {
         let shared = Arc::clone(&self.shared);
         let gl = &shared.context.lock();
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 2183bd66971..a92933fd980 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1247,7 +1247,7 @@ pub trait Queue: WasmNotSendSync {
         &self,
         command_buffers: &[&<Self::A as Api>::CommandBuffer],
         surface_textures: &[&<Self::A as Api>::SurfaceTexture],
-        signal_fence: (&mut <Self::A as Api>::Fence, FenceValue),
+        signal_fence: (&<Self::A as Api>::Fence, FenceValue),
     ) -> Result<(), DeviceError>;
     /// Present a surface texture to the screen.
     ///
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 508da11cc52..70bf2a7ee49 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -1,5 +1,6 @@
 use alloc::{borrow::ToOwned as _, sync::Arc, vec::Vec};
 use core::{ptr::NonNull, sync::atomic};
+use parking_lot::RwLock;
 use std::{thread, time};
 
 use bytemuck::TransparentWrapper;
@@ -1886,7 +1887,7 @@ impl crate::Device for super::Device {
         };
         Ok(super::Fence {
             completed_value: Arc::new(atomic::AtomicU64::new(0)),
-            pending_command_buffers: Vec::new(),
+            pending_command_buffers: RwLock::new(Vec::new()),
             shared_event,
         })
     }
@@ -1897,7 +1898,8 @@ impl crate::Device for super::Device {
 
     unsafe fn get_fence_value(&self, fence: &super::Fence) -> DeviceResult<crate::FenceValue> {
         let mut max_value = fence.completed_value.load(atomic::Ordering::Acquire);
-        for &(value, ref cmd_buf) in fence.pending_command_buffers.iter() {
+        let pending_command_buffers = fence.pending_command_buffers.read();
+        for &(value, ref cmd_buf) in pending_command_buffers.iter() {
             if cmd_buf.status() == MTLCommandBufferStatus::Completed {
                 max_value = value;
             }
@@ -1914,18 +1916,22 @@ impl crate::Device for super::Device {
             return Ok(true);
         }
 
-        let cmd_buf = match fence
-            .pending_command_buffers
+        let pending_command_buffers = fence.pending_command_buffers.read();
+
+        let cmd_buf = match pending_command_buffers
             .iter()
             .find(|&&(value, _)| value >= wait_value)
         {
-            Some((_, cmd_buf)) => cmd_buf,
+            Some((_, cmd_buf)) => cmd_buf.clone(),
             None => {
                 log::error!("No active command buffers for fence value {wait_value}");
                 return Err(crate::DeviceError::Lost);
             }
         };
 
+        // Make sure that nothing is blocked during the actual wait.
+        drop(pending_command_buffers);
+
         let start = time::Instant::now();
         loop {
             if let MTLCommandBufferStatus::Completed = cmd_buf.status() {
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index d7edd7ce6e3..9f7cb1601bd 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -531,7 +531,7 @@ impl crate::Queue for Queue {
         &self,
         command_buffers: &[&CommandBuffer],
         _surface_textures: &[&SurfaceTexture],
-        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
+        (signal_fence, signal_value): (&Fence, crate::FenceValue),
     ) -> Result<(), crate::DeviceError> {
         autoreleasepool(|_| {
             let extra_command_buffer = {
@@ -557,6 +557,7 @@ impl crate::Queue for Queue {
                 signal_fence.maintain();
                 signal_fence
                     .pending_command_buffers
+                    .write()
                     .push((signal_value, raw.clone()));
 
                 if let Some(shared_event) = &signal_fence.shared_event {
@@ -1022,13 +1023,15 @@ unsafe impl Sync for QuerySet {}
 pub struct Fence {
     completed_value: Arc<atomic::AtomicU64>,
     /// The pending fence values have to be ascending.
-    pending_command_buffers: Vec<(
-        crate::FenceValue,
-        Retained<ProtocolObject<dyn MTLCommandBuffer>>,
-    )>,
+    pending_command_buffers: RwLock<Vec<PendingCommandBuffer>>,
     shared_event: Option<Retained<ProtocolObject<dyn MTLSharedEvent>>>,
 }
 
+type PendingCommandBuffer = (
+    crate::FenceValue,
+    Retained<ProtocolObject<dyn MTLCommandBuffer>>,
+);
+
 impl crate::DynFence for Fence {}
 
 unsafe impl Send for Fence {}
@@ -1037,7 +1040,8 @@ unsafe impl Sync for Fence {}
 impl Fence {
     fn get_latest(&self) -> crate::FenceValue {
         let mut max_value = self.completed_value.load(atomic::Ordering::Acquire);
-        for &(value, ref cmd_buf) in self.pending_command_buffers.iter() {
+        let pending_command_buffers = self.pending_command_buffers.read();
+        for &(value, ref cmd_buf) in pending_command_buffers.iter() {
             if cmd_buf.status() == MTLCommandBufferStatus::Completed {
                 max_value = value;
             }
@@ -1045,9 +1049,10 @@ impl Fence {
         max_value
     }
 
-    fn maintain(&mut self) {
+    fn maintain(&self) {
         let latest = self.get_latest();
         self.pending_command_buffers
+            .write()
             .retain(|&(value, _)| value > latest);
     }
 
diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs
index 563b1b27de9..c8491ded4ef 100644
--- a/wgpu-hal/src/noop/mod.rs
+++ b/wgpu-hal/src/noop/mod.rs
@@ -262,7 +262,7 @@ impl crate::Queue for Context {
         &self,
         command_buffers: &[&CommandBuffer],
         surface_textures: &[&Resource],
-        (fence, fence_value): (&mut Fence, crate::FenceValue),
+        (fence, fence_value): (&Fence, crate::FenceValue),
     ) -> DeviceResult<()> {
         // All commands are executed synchronously.
         for cb in command_buffers {
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index f5f76de4eac..c7c8129ad46 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -10,7 +10,7 @@ use core::{
 use arrayvec::ArrayVec;
 use ash::{ext, vk};
 use hashbrown::hash_map::Entry;
-use parking_lot::Mutex;
+use parking_lot::{Mutex, RwLock};
 
 use super::{conv, descriptor::DescriptorCounts, RawTlasInstance};
 use crate::TlasInstance;
@@ -2279,11 +2279,11 @@ impl crate::Device for super::Device {
 
             super::Fence::TimelineSemaphore(raw)
         } else {
-            super::Fence::FencePool {
+            super::Fence::FencePool(RwLock::new(super::FencePool {
                 last_completed: 0,
                 active: Vec::new(),
                 free: Vec::new(),
-            }
+            }))
         })
     }
     unsafe fn destroy_fence(&self, fence: super::Fence) {
@@ -2291,13 +2291,17 @@ impl crate::Device for super::Device {
             super::Fence::TimelineSemaphore(raw) => {
                 unsafe { self.shared.raw.destroy_semaphore(raw, None) };
             }
-            super::Fence::FencePool {
-                active,
-                free,
-                last_completed: _,
-            } => {
+            super::Fence::FencePool(pool) => {
+                let super::FencePool {
+                    active,
+                    free,
+                    last_completed: _,
+                } = pool.into_inner();
+
                 for (_, raw) in active {
-                    unsafe { self.shared.raw.destroy_fence(raw, None) };
+                    unsafe {
+                        self.shared.raw.destroy_fence(Arc::into_inner(raw).expect("Fence should have its reference count be one by the end of each function"), None)
+                    };
                 }
                 for raw in free {
                     unsafe { self.shared.raw.destroy_fence(raw, None) };
@@ -2829,17 +2833,28 @@ impl super::DeviceShared {
                     Err(other) => Err(super::map_host_device_oom_and_lost_err(other)),
                 }
             }
-            super::Fence::FencePool {
-                last_completed,
-                ref active,
-                free: _,
-            } => {
+            super::Fence::FencePool(ref pool) => {
+                let pool = pool.read();
+                let super::FencePool {
+                    last_completed,
+                    ref active,
+                    free: _,
+                } = *pool;
                 if wait_value <= last_completed {
                     Ok(true)
                 } else {
                     match active.iter().find(|&&(value, _)| value >= wait_value) {
-                        Some(&(_, raw)) => {
-                            match unsafe { self.raw.wait_for_fences(&[raw], true, timeout_ns) } {
+                        Some((_, fence)) => {
+                            // clone to show we are using this fence while the pool is unlocked.
+                            let fence = fence.clone();
+                            drop(pool);
+                            match unsafe {
+                                self.raw.wait_for_fences(
+                                    core::slice::from_ref(&fence),
+                                    true,
+                                    timeout_ns,
+                                )
+                            } {
                                 Ok(()) => Ok(true),
                                 Err(vk::Result::TIMEOUT) => Ok(false),
                                 Err(other) => Err(super::map_host_device_oom_and_lost_err(other)),
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 124df07c8aa..7d18b6bd48d 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -1162,14 +1162,29 @@ pub enum Fence {
     /// for each queue submission we might want to wait for, and remember which
     /// [`FenceValue`] each one represents.
     ///
+    /// One should keep the fence pool read while there are any references to the
+    /// fences inside of them. This ensures there are no race conditions when
+    /// resetting the fences
+    ///
     /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
     /// [`FenceValue`]: crate::FenceValue
-    FencePool {
-        last_completed: crate::FenceValue,
-        /// The pending fence values have to be ascending.
-        active: Vec<(crate::FenceValue, vk::Fence)>,
-        free: Vec<vk::Fence>,
-    },
+    FencePool(RwLock<FencePool>),
+}
+
+/// A shared fence type. The arc is expect to have a ref-count of one once a function has finished being called
+///
+/// A fence should have access synchronised as fence resetting might happen at any point. Resetting checks the ref-count
+/// of the fence, so instead of copying the fence, it should have its `Arc` container cloned which shows not to reset
+/// this fence as it is being used.
+pub(super) type SynchronizedFence = Arc<vk::Fence>;
+
+#[derive(Debug)]
+pub struct FencePool {
+    last_completed: crate::FenceValue,
+    /// The pending fence values have to be ascending.
+    active: Vec<(crate::FenceValue, SynchronizedFence)>,
+    // Don't need extra synchronisation around the fences here, if they are used they should be put into active.
+    free: Vec<vk::Fence>,
 }
 
 impl crate::DynFence for Fence {}
@@ -1186,13 +1201,15 @@ impl Fence {
     fn check_active(
         device: &ash::Device,
         mut last_completed: crate::FenceValue,
-        active: &[(crate::FenceValue, vk::Fence)],
+        active: &[(crate::FenceValue, SynchronizedFence)],
     ) -> Result<crate::FenceValue, crate::DeviceError> {
-        for &(value, raw) in active.iter() {
+        for &(value, ref raw) in active.iter() {
             unsafe {
                 if value > last_completed
                     && device
-                        .get_fence_status(raw)
+                        // Don't need to clone as active should be from a read or
+                        // write lock which means this is already synchronised.
+                        .get_fence_status(**raw)
                         .map_err(map_host_device_oom_and_lost_err)?
                 {
                     last_completed = value;
@@ -1221,11 +1238,14 @@ impl Fence {
                         .map_err(map_host_device_oom_and_lost_err)?,
                 })
             },
-            Self::FencePool {
-                last_completed,
-                ref active,
-                free: _,
-            } => Self::check_active(device, last_completed, active),
+            Self::FencePool(ref pool) => {
+                let FencePool {
+                    last_completed,
+                    ref active,
+                    free: _,
+                } = *pool.read();
+                Self::check_active(device, last_completed, active)
+            }
         }
     }
 
@@ -1241,23 +1261,35 @@ impl Fence {
     ///
     /// [`FencePool`]: Fence::FencePool
     /// [`Queue::submit`]: crate::Queue::submit
-    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
+    fn maintain(&self, device: &ash::Device) -> Result<(), crate::DeviceError> {
         match *self {
             Self::TimelineSemaphore(_) => {}
-            Self::FencePool {
-                ref mut last_completed,
-                ref mut active,
-                ref mut free,
-            } => {
-                let latest = Self::check_active(device, *last_completed, active)?;
+            Self::FencePool(ref pool) => {
+                let FencePool {
+                    ref mut last_completed,
+                    ref mut active,
+                    ref mut free,
+                } = *pool.write();
+
                 let base_free = free.len();
-                for &(value, raw) in active.iter() {
-                    if value <= latest {
-                        free.push(raw);
+                let latest = Self::check_active(device, *last_completed, active)?;
+
+                active.retain_mut(|&mut (value, ref mut fence)| {
+                    if value > latest {
+                        true
+                    } else if let Some(fence) = Arc::get_mut(fence) {
+                        // No other references to these, so we have exclusive access. Add them to free and reset them later,
+                        // but drop them from active immediately
+                        free.push(*fence);
+                        false
+                    } else {
+                        // some other function is using it. Although this shouldn't be to long,
+                        // maintain shouldn't block, and it should be cleared up by the next time it happens
+                        true
                     }
-                }
+                });
+
                 if free.len() != base_free {
-                    active.retain(|&(value, _)| value > latest);
                     unsafe { device.reset_fences(&free[base_free..]) }
                         .map_err(map_device_oom_err)?
                 }
@@ -1275,7 +1307,7 @@ impl crate::Queue for Queue {
         &self,
         command_buffers: &[&CommandBuffer],
         surface_textures: &[&SurfaceTexture],
-        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
+        (signal_fence, signal_value): (&Fence, crate::FenceValue),
     ) -> Result<(), crate::DeviceError> {
         let mut fence_raw = vk::Fence::null();
 
@@ -1343,25 +1375,33 @@ impl crate::Queue for Queue {
 
         // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
         signal_fence.maintain(&self.device.raw)?;
+        // Keeping the Arc around is probably unneeded - the fence should never be signaled as it was reset,
+        // and newer submits should not happen until this submit is done. Therefore, it should be too high
+        // to be reset.
+        let shared_fence;
         match *signal_fence {
             Fence::TimelineSemaphore(raw) => {
                 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
             }
-            Fence::FencePool {
-                ref mut active,
-                ref mut free,
-                ..
-            } => {
-                fence_raw = match free.pop() {
-                    Some(raw) => raw,
+            Fence::FencePool(ref pool) => {
+                let FencePool {
+                    ref mut active,
+                    ref mut free,
+                    ..
+                } = *pool.write();
+                shared_fence = match free.pop() {
+                    Some(raw) => Arc::new(raw),
                     None => unsafe {
-                        self.device
+                        let fence = self
+                            .device
                             .raw
                             .create_fence(&vk::FenceCreateInfo::default(), None)
-                            .map_err(map_host_device_oom_err)?
+                            .map_err(map_host_device_oom_err)?;
+                        Arc::new(fence)
                     },
                 };
-                active.push((signal_value, fence_raw));
+                fence_raw = *shared_fence;
+                active.push((signal_value, shared_fence.clone()));
             }
         }