diff --git a/modules/python/clients/aks_client.py b/modules/python/clients/aks_client.py
index df20965955..4e571b2840 100644
--- a/modules/python/clients/aks_client.py
+++ b/modules/python/clients/aks_client.py
@@ -268,11 +268,12 @@ def _begin_update_with_retry(
         retries: int = 10,
         retry_wait: int = 30,
         poll_interval: int = 30,
-        timeout: int = 1200,
+        timeout: int = 1800,
     ) -> None:
         """
         Call begin_create_or_update with retry on OperationNotAllowed/EtagMismatch,
         polling every poll_interval seconds and raising TimeoutError after timeout seconds.
+        timeout defaults to 1800s (30 min) for slow GPU node provisioning (A100 MIG).
         """
         for attempt in range(retries):
             try:
@@ -363,6 +364,70 @@ def add_managed_gpu_node_pool(
                 )
         logger.info(f"az aks nodepool add succeeded for '{node_pool_name}'")
 
+    @staticmethod
+    def _gpu_mode_metadata(
+        gpu_node_pool: bool,
+        enable_managed_gpu: bool,
+        gpu_instance_profile: Optional[str] = None,
+        gpu_mig_strategy: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Build normalized GPU-mode metadata distinguishing managed vs fully-managed
+        GPU and MIG single vs mixed.
+
+        Derived from the operation INPUT flags rather than the AKS read-back: the
+        stable SDK does not model gpuProfile.nvidia.managementMode, so a
+        fully-managed pool's mode is dropped from nodepool_info. Flag combinations
+        are normalized for consistency: enable_managed_gpu / MIG only apply to a
+        GPU pool, and MIG only to fully-managed pools (dropped otherwise).
+
+        Returns a dict with gpu_mode ("none"|"managed"|"fully_managed"),
+        enable_managed_gpu, mig_enabled, gpu_instance_profile, gpu_mig_strategy.
+        Raises ValueError if gpu_mig_strategy is not None / "single" / "mixed".
+        """
+        strategy = (gpu_mig_strategy or None) and str(gpu_mig_strategy).lower()
+        if strategy not in (None, "single", "mixed"):
+            raise ValueError(
+                f"invalid gpu_mig_strategy {gpu_mig_strategy!r} (want single/mixed/None)"
+            )
+
+        is_gpu = bool(gpu_node_pool)
+        fully_managed = is_gpu and bool(enable_managed_gpu)
+
+        if not is_gpu:
+            gpu_mode = "none"
+        elif fully_managed:
+            gpu_mode = "fully_managed"
+        else:
+            gpu_mode = "managed"
+
+        # MIG only applies to fully-managed pools; drop it otherwise.
+        profile = gpu_instance_profile if fully_managed else None
+        strategy = strategy if fully_managed else None
+
+        return {
+            "gpu_mode": gpu_mode,
+            "enable_managed_gpu": fully_managed,
+            "mig_enabled": bool(profile or strategy),
+            "gpu_instance_profile": profile,
+            "gpu_mig_strategy": strategy,
+        }
+
+    @staticmethod
+    def _log_gpu_mode(metadata: Dict[str, Any]) -> None:
+        """Echo the normalized GPU-mode metadata to the console for traceability."""
+        if metadata.get("gpu_mode") in (None, "none"):
+            return
+        logger.info(
+            "GPU pool metadata: gpu_mode=%s enable_managed_gpu=%s mig_enabled=%s "
+            "gpu_instance_profile=%s gpu_mig_strategy=%s",
+            metadata.get("gpu_mode"),
+            metadata.get("enable_managed_gpu"),
+            metadata.get("mig_enabled"),
+            metadata.get("gpu_instance_profile"),
+            metadata.get("gpu_mig_strategy"),
+        )
+
     def create_node_pool(
         self,
         node_pool_name: str,
@@ -409,8 +474,14 @@ def create_node_pool(
             "vm_size": vm_size,
             "node_count": node_count,
             "gpu_node_pool": gpu_node_pool,
-            "enable_managed_gpu": enable_managed_gpu,
+            **self._gpu_mode_metadata(
+                gpu_node_pool,
+                enable_managed_gpu,
+                gpu_instance_profile,
+                gpu_mig_strategy,
+            ),
         }
+        self._log_gpu_mode(metadata)
 
         # Create operation context to track the operation
         with self._get_operation_context()(
@@ -522,6 +593,7 @@ def scale_node_pool(
         progressive: bool = False,
         scale_step_size: int = 1,
         gpu_instance_profile: Optional[str] = None,
+        gpu_mig_strategy: Optional[str] = None,
     ) -> Any:
         """
         Scale a node pool to the specified node count.
@@ -555,7 +627,14 @@ def scale_node_pool(
             "gpu_node_pool": gpu_node_pool,
             "progressive_scaling": progressive,
             "scale_step_size": scale_step_size,
+            **self._gpu_mode_metadata(
+                gpu_node_pool,
+                enable_managed_gpu,
+                gpu_instance_profile,
+                gpu_mig_strategy,
+            ),
         }
+        self._log_gpu_mode(metadata)
         node_pool = self.get_node_pool(node_pool_name, cluster_name)
 
         current_count = node_pool.count
@@ -583,6 +662,7 @@ def scale_node_pool(
                 enable_managed_gpu=enable_managed_gpu,
                 node_pool=node_pool,
                 gpu_instance_profile=gpu_instance_profile,
+                gpu_mig_strategy=gpu_mig_strategy,
             )
 
         # Create operation context to track the operation
@@ -751,6 +831,7 @@ def _progressive_scale(
         enable_managed_gpu: bool = False,
         node_pool: Optional[Any] = None,
         gpu_instance_profile: Optional[str] = None,
+        gpu_mig_strategy: Optional[str] = None,
     ) -> Any:
         """
         Scale a node pool progressively with specified step size
@@ -816,7 +897,14 @@ def _progressive_scale(
                 "scale_step_size": scale_step_size,
                 "cluster_name": cluster_name or self.get_cluster_name(),
                 "gpu_node_pool": gpu_node_pool,
+                **self._gpu_mode_metadata(
+                    gpu_node_pool,
+                    enable_managed_gpu,
+                    gpu_instance_profile,
+                    gpu_mig_strategy,
+                ),
             }
+            self._log_gpu_mode(step_metadata)
 
             # Create operation context for this specific step
             with self._get_operation_context()(
diff --git a/modules/python/clients/kubernetes_client.py b/modules/python/clients/kubernetes_client.py
index 75caa81bb0..9d7b171353 100644
--- a/modules/python/clients/kubernetes_client.py
+++ b/modules/python/clients/kubernetes_client.py
@@ -683,18 +683,29 @@ def verify_nvidia_smi_on_node(self, nodes, namespace="default"):
                 logger.info(f"Verifying NVIDIA drivers on node {node_name}")
                 node = self.describe_node(node_name)
 
-                # Check if the node has GPUs allocated values (whole GPU or MIG slices)
+                # Wait for the node to advertise a POSITIVE GPU/MIG count. The device
+                # plugin can register nvidia.com/gpu with value "0" before MIG instances
+                # are published, so a MIG-single node briefly looks GPU-less. Waiting on
+                # key presence (rather than a positive count) would race in during that
+                # window and skip the node; wait on the count instead.
                 start_time = time.time()
+                gpu_count = 0
                 while time.time() < start_time + 600:
                     allocatable = node.status.allocatable or {}
-                    if "nvidia.com/gpu" in allocatable or any(k.startswith("nvidia.com/mig-") for k in allocatable):
+                    gpu_count = int(allocatable.get("nvidia.com/gpu", "0"))
+                    mig_count = sum(
+                        int(v) for k, v in allocatable.items()
+                        if k.startswith("nvidia.com/mig-")
+                    )
+                    if gpu_count > 0 or mig_count > 0:
                         break
-                    node = self.describe_node(node_name)
-                    logger.info(f"Node allocatable resources: {node.status.allocatable}")
-                    logger.info(f"Waiting for GPUs to be allocated on node {node_name}...")
+                    logger.info(
+                        f"Waiting for GPUs to be allocated on node {node_name}... "
+                        f"(allocatable: {allocatable})"
+                    )
                     time.sleep(1)
-                gpu_count = int(node.status.allocatable.get("nvidia.com/gpu", "0"))
-                has_mig = any(k.startswith("nvidia.com/mig-") for k in node.status.allocatable)
+                    node = self.describe_node(node_name)
+                has_mig = any(k.startswith("nvidia.com/mig-") for k in (node.status.allocatable or {}))
 
                 logger.info(f"Node {node_name} has {gpu_count} GPUs, requesting all for validation")
 
diff --git a/modules/python/crud/azure/node_pool_crud.py b/modules/python/crud/azure/node_pool_crud.py
index 6f52e69cfd..998299f0cb 100644
--- a/modules/python/crud/azure/node_pool_crud.py
+++ b/modules/python/crud/azure/node_pool_crud.py
@@ -118,6 +118,7 @@ def scale_node_pool(
         gpu_node_pool=False,
         enable_managed_gpu=False,
         gpu_instance_profile=None,
+        gpu_mig_strategy=None,
     ):
         """
         Scale a node pool to specified count
@@ -145,6 +146,7 @@ def scale_node_pool(
                 progressive=progressive,
                 scale_step_size=scale_step_size,
                 gpu_instance_profile=gpu_instance_profile,
+                gpu_mig_strategy=gpu_mig_strategy,
             )
 
             if result is not None:
@@ -253,6 +255,7 @@ def all(
                 gpu_node_pool=gpu_node_pool,
                 enable_managed_gpu=enable_managed_gpu,
                 gpu_instance_profile=gpu_instance_profile,
+                gpu_mig_strategy=gpu_mig_strategy,
             )
             results["scale_up"] = scale_up_result
 
@@ -276,6 +279,8 @@ def all(
                 scale_step_size=scale_step_size,
                 gpu_node_pool=gpu_node_pool,
                 enable_managed_gpu=enable_managed_gpu,
+                gpu_instance_profile=gpu_instance_profile,
+                gpu_mig_strategy=gpu_mig_strategy,
             )
             results["scale_down"] = scale_down_result
 
diff --git a/modules/python/crud/main.py b/modules/python/crud/main.py
index 60b605c1d7..1fad21a6a3 100644
--- a/modules/python/crud/main.py
+++ b/modules/python/crud/main.py
@@ -107,6 +107,16 @@ def handle_node_pool_operation(node_pool_crud, args):
     command = args.command
     result = None
 
+    # gpu_instance_profile / gpu_mig_strategy are Azure-only MIG inputs. The AWS
+    # CRUD does not accept these kwargs (and has no **kwargs), so passing them for
+    # --cloud aws would raise TypeError. Only forward them on Azure.
+    azure_gpu_kwargs = {}
+    if args.cloud == "azure":
+        azure_gpu_kwargs = {
+            "gpu_instance_profile": args.gpu_instance_profile,
+            "gpu_mig_strategy": args.gpu_mig_strategy,
+        }
+
     try:
         if command == "create":
             # Prepare create arguments
@@ -116,8 +126,7 @@ def handle_node_pool_operation(node_pool_crud, args):
                 "node_count": args.node_count,
                 "gpu_node_pool": args.gpu_node_pool,
                 "enable_managed_gpu": args.enable_managed_gpu,
-                "gpu_instance_profile": args.gpu_instance_profile,
-                "gpu_mig_strategy": args.gpu_mig_strategy,
+                **azure_gpu_kwargs,
             }
 
             result = node_pool_crud.create_node_pool(**create_kwargs)
@@ -131,7 +140,7 @@ def handle_node_pool_operation(node_pool_crud, args):
                 "scale_step_size": args.scale_step_size,
                 "gpu_node_pool": args.gpu_node_pool,
                 "enable_managed_gpu": args.enable_managed_gpu,
-                "gpu_instance_profile": args.gpu_instance_profile,
+                **azure_gpu_kwargs,
             }
 
             result = node_pool_crud.scale_node_pool(**scale_kwargs)
@@ -151,6 +160,7 @@ def handle_node_pool_operation(node_pool_crud, args):
                 "gpu_node_pool": args.gpu_node_pool,
                 "enable_managed_gpu": args.enable_managed_gpu,
                 "step_wait_time": args.step_wait_time,
+                **azure_gpu_kwargs,
             }
 
             result = node_pool_crud.all(**all_kwargs)
diff --git a/modules/python/tests/clients/test_aks_client.py b/modules/python/tests/clients/test_aks_client.py
index 1070e3fc32..d9f6a6f11f 100644
--- a/modules/python/tests/clients/test_aks_client.py
+++ b/modules/python/tests/clients/test_aks_client.py
@@ -629,6 +629,109 @@ def test_scale_gpu_node_pool_down_no_verification(self, mock_time):
         # Check that NVIDIA verification was NOT performed for scale-down
         self.mock_k8s.verify_nvidia_smi_on_node.assert_not_called()
 
+    def test_gpu_mode_metadata_variants(self):
+        """_gpu_mode_metadata normalizes managed/fully-managed and MIG single/mixed."""
+        gpu_meta = AKSClient._gpu_mode_metadata  # pylint: disable=protected-access
+        # Non-GPU pool
+        self.assertEqual(
+            gpu_meta(False, False),
+            {
+                "gpu_mode": "none",
+                "enable_managed_gpu": False,
+                "mig_enabled": False,
+                "gpu_instance_profile": None,
+                "gpu_mig_strategy": None,
+            },
+        )
+        # Managed (driver bootstrap only)
+        managed = gpu_meta(True, False)
+        self.assertEqual(managed["gpu_mode"], "managed")
+        self.assertFalse(managed["enable_managed_gpu"])
+        self.assertFalse(managed["mig_enabled"])
+        # Fully managed
+        fully = gpu_meta(True, True)
+        self.assertEqual(fully["gpu_mode"], "fully_managed")
+        self.assertTrue(fully["enable_managed_gpu"])
+        # Fully managed + MIG mixed
+        mixed = gpu_meta(True, True, "MIG1g", "mixed")
+        self.assertEqual(mixed["gpu_mode"], "fully_managed")
+        self.assertTrue(mixed["mig_enabled"])
+        self.assertEqual(mixed["gpu_instance_profile"], "MIG1g")
+        self.assertEqual(mixed["gpu_mig_strategy"], "mixed")
+        # Fully managed + MIG single
+        single = gpu_meta(True, True, "MIG1g", "single")
+        self.assertEqual(single["gpu_mig_strategy"], "single")
+        self.assertTrue(single["mig_enabled"])
+        # Normalization: MIG inputs are dropped for non-fully-managed pools
+        managed_with_mig = gpu_meta(True, False, "MIG1g", "single")
+        self.assertEqual(managed_with_mig["gpu_mode"], "managed")
+        self.assertFalse(managed_with_mig["mig_enabled"])
+        self.assertIsNone(managed_with_mig["gpu_instance_profile"])
+        self.assertIsNone(managed_with_mig["gpu_mig_strategy"])
+        # Normalization: managed flag is meaningless without a GPU pool
+        not_gpu = gpu_meta(False, True)
+        self.assertEqual(not_gpu["gpu_mode"], "none")
+        self.assertFalse(not_gpu["enable_managed_gpu"])
+        # Invalid MIG strategy is rejected
+        with self.assertRaises(ValueError):
+            gpu_meta(True, True, "MIG1g", "bogus")
+
+    def test_log_gpu_mode_console_echo(self):
+        """_log_gpu_mode echoes GPU metadata to the console for GPU pools only."""
+        log_gpu_mode = AKSClient._log_gpu_mode  # pylint: disable=protected-access
+        with self.assertLogs("clients.aks_client", level="INFO") as cm:
+            log_gpu_mode(
+                {
+                    "gpu_mode": "fully_managed",
+                    "enable_managed_gpu": True,
+                    "mig_enabled": True,
+                    "gpu_instance_profile": "MIG1g",
+                    "gpu_mig_strategy": "mixed",
+                }
+            )
+        self.assertTrue(any("gpu_mode=fully_managed" in m for m in cm.output))
+        self.assertTrue(any("gpu_mig_strategy=mixed" in m for m in cm.output))
+        # Non-GPU operations must not emit the GPU metadata line.
+        with self.assertNoLogs("clients.aks_client", level="INFO"):
+            log_gpu_mode({"gpu_mode": "none"})
+
+    @mock.patch("clients.aks_client.time")
+    def test_scale_node_pool_records_gpu_mode_metadata(self, mock_time):
+        """Scale ops persist gpu_mode + MIG fields even though the SDK read-back drops them."""
+        node_pool_name = "h100fullmgd"
+        node_count = 3
+
+        mock_time.time.side_effect = [100, 150]
+
+        mock_node_pool = mock.MagicMock()
+        mock_node_pool.count = 1
+        mock_node_pool.vm_size = "Standard_NC40ads_H100_v5"
+        mock_node_pool.as_dict.return_value = {"count": 1}
+        self.mock_agent_pools.get.return_value = mock_node_pool
+        self.aks_client.get_node_pool = mock.MagicMock(return_value=mock_node_pool)
+        self.mock_k8s.wait_for_nodes_ready.return_value = [mock.MagicMock()] * node_count
+        self.mock_k8s.verify_managed_gpu_systemd_services = mock.MagicMock(return_value={})
+        self.mock_k8s.verify_nvidia_smi_on_node = mock.MagicMock()
+        self.mock_k8s.verify_mig_allocatable = mock.MagicMock(return_value={})
+
+        result = self.aks_client.scale_node_pool(
+            node_pool_name=node_pool_name,
+            node_count=node_count,
+            gpu_node_pool=True,
+            enable_managed_gpu=True,
+            gpu_instance_profile="MIG1g",
+            gpu_mig_strategy="mixed",
+        )
+
+        self.assertTrue(result)
+        # The metadata dict is the 3rd positional arg to OperationContext(...)
+        metadata = self.mock_operation_context.call_args[0][2]
+        self.assertEqual(metadata["gpu_mode"], "fully_managed")
+        self.assertTrue(metadata["enable_managed_gpu"])
+        self.assertTrue(metadata["mig_enabled"])
+        self.assertEqual(metadata["gpu_instance_profile"], "MIG1g")
+        self.assertEqual(metadata["gpu_mig_strategy"], "mixed")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/modules/python/tests/clients/test_kubernetes_client.py b/modules/python/tests/clients/test_kubernetes_client.py
index a990fa06e1..0ab752312e 100644
--- a/modules/python/tests/clients/test_kubernetes_client.py
+++ b/modules/python/tests/clients/test_kubernetes_client.py
@@ -2,6 +2,7 @@
 """
 Unit tests for KubernetesClient class
 """
+import itertools
 import unittest
 from unittest import mock
 from unittest.mock import patch, mock_open, MagicMock
@@ -2113,19 +2114,64 @@ def test_verify_nvidia_smi_general_exception(self, mock_describe_node, _mock_cre
         result = self.client.verify_nvidia_smi_on_node([node])
         self.assertFalse(result)
 
+    @patch("time.time", side_effect=itertools.count(0, 1000))
+    @patch("time.sleep", return_value=None)
     @patch("clients.kubernetes_client.KubernetesClient.describe_node")
-    def test_verify_nvidia_smi_no_gpu_nodes(self, mock_describe_node):
-        """Test nvidia-smi verification skips nodes with no GPUs."""
+    def test_verify_nvidia_smi_no_gpu_nodes(self, mock_describe_node, _mock_sleep, _mock_time):
+        """Nodes that never advertise a positive GPU/MIG count are skipped (after the wait)."""
         node = MagicMock()
         node.metadata.name = "cpu-only-node"
         node.status.allocatable = {"nvidia.com/gpu": "0"}
 
-        # Mock describe_node to return the node with no GPUs
+        # describe_node keeps reporting 0 GPUs; the wait times out and the node is skipped.
         mock_describe_node.return_value = node
 
         result = self.client.verify_nvidia_smi_on_node([node])
         self.assertEqual(result, {})  # Should return empty dict as no nodes processed
 
+    @patch("time.time", side_effect=itertools.count(0, 1))
+    @patch("time.sleep", return_value=None)
+    @patch("clients.kubernetes_client.KubernetesClient.get_pod_logs")
+    @patch("kubernetes.client.CoreV1Api.delete_namespaced_pod")
+    @patch("kubernetes.client.CoreV1Api.read_namespaced_pod")
+    @patch("kubernetes.client.CoreV1Api.create_namespaced_pod")
+    @patch("clients.kubernetes_client.KubernetesClient.describe_node")
+    def test_verify_nvidia_smi_waits_for_mig_registration(
+        self,
+        mock_describe_node,
+        mock_create_pod,
+        mock_read_pod,
+        _mock_delete_pod,
+        mock_get_logs,
+        _mock_sleep,
+        _mock_time,
+    ):
+        """A node reporting 0 GPUs mid-registration is NOT skipped — wait for a positive count."""
+        node_name = "mig-single-node"
+        zero_node = MagicMock()
+        zero_node.metadata.name = node_name
+        zero_node.status.allocatable = {"nvidia.com/gpu": "0"}  # device plugin registered, not yet populated
+        ready_node = MagicMock()
+        ready_node.metadata.name = node_name
+        ready_node.status.allocatable = {"nvidia.com/gpu": "56"}  # MIG-single instances published
+
+        # First describe (pre-loop) sees 0; after one wait iteration it sees 56.
+        mock_describe_node.side_effect = [zero_node, ready_node]
+        mock_read_pod.side_effect = [
+            MagicMock(status=MagicMock(phase="Pending")),
+            MagicMock(status=MagicMock(phase="Succeeded")),
+        ]
+        mock_get_logs.return_value = "NVIDIA-SMI GPU driver info"
+
+        result = self.client.verify_nvidia_smi_on_node([zero_node])
+
+        # The node was verified, not skipped, and a whole GPU (MIG-single) was requested.
+        self.assertIn(node_name, result)
+        self.assertTrue(result[node_name]["device_status"])
+        self.assertGreaterEqual(mock_describe_node.call_count, 2)
+        pod_spec = mock_create_pod.call_args[1]["body"]
+        self.assertEqual(pod_spec.spec.containers[0].resources.limits["nvidia.com/gpu"], "1")
+
     @patch("kubernetes.client.AppsV1Api.create_namespaced_daemon_set")
     @patch("requests.get")
     def test_install_gpu_device_plugin_success(self, mock_requests_get, mock_create_ds):
diff --git a/modules/python/tests/crud/test_azure_node_pool_crud.py b/modules/python/tests/crud/test_azure_node_pool_crud.py
index 4fd41a0310..0889877c79 100644
--- a/modules/python/tests/crud/test_azure_node_pool_crud.py
+++ b/modules/python/tests/crud/test_azure_node_pool_crud.py
@@ -114,6 +114,7 @@ def test_scale_node_pool_up(self):
             progressive=False,
             scale_step_size=1,
             gpu_instance_profile=None,
+            gpu_mig_strategy=None,
         )
 
     def test_scale_node_pool_down(self):
@@ -143,6 +144,7 @@ def test_scale_node_pool_down(self):
             progressive=False,
             scale_step_size=1,
             gpu_instance_profile=None,
+            gpu_mig_strategy=None,
         )
 
     def test_delete_node_pool(self):
diff --git a/modules/python/tests/crud/test_main.py b/modules/python/tests/crud/test_main.py
index f215e21f62..ccbb189ba4 100644
--- a/modules/python/tests/crud/test_main.py
+++ b/modules/python/tests/crud/test_main.py
@@ -68,6 +68,7 @@ def test_handle_node_pool_operation_create(self, mock_azure_crud):
         # Setup
         mock_args = mock.MagicMock()
         mock_args.command = "create"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.vm_size = "Standard_D2s_v3"
         mock_args.node_count = 3
@@ -98,6 +99,7 @@ def test_handle_node_pool_operation_scale(self, mock_azure_crud):
         # Setup
         mock_args = mock.MagicMock()
         mock_args.command = "scale"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.target_count = 5
         mock_args.scale_step_size = (
@@ -122,6 +124,7 @@ def test_handle_node_pool_operation_scale(self, mock_azure_crud):
             gpu_node_pool=False,
             enable_managed_gpu=False,
             gpu_instance_profile=mock_args.gpu_instance_profile,
+            gpu_mig_strategy=mock_args.gpu_mig_strategy,
         )
 
     @mock.patch("crud.main.AzureNodePoolCRUD")
@@ -130,6 +133,7 @@ def test_handle_node_pool_operation_scale_non_progressive(self, mock_azure_crud)
         # Setup - when scale_step_size equals target_count, progressive should be False
         mock_args = mock.MagicMock()
         mock_args.command = "scale"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.target_count = 3
         mock_args.scale_step_size = (
@@ -154,6 +158,7 @@ def test_handle_node_pool_operation_scale_non_progressive(self, mock_azure_crud)
             gpu_node_pool=False,
             enable_managed_gpu=False,
             gpu_instance_profile=mock_args.gpu_instance_profile,
+            gpu_mig_strategy=mock_args.gpu_mig_strategy,
         )
 
     @mock.patch("crud.main.logger")
@@ -170,6 +175,7 @@ def test_handle_node_pool_operation_scale_fails_returns_error(
         # Setup - progressive scaling where operation fails
         mock_args = mock.MagicMock()
         mock_args.command = "scale"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.target_count = 10
         mock_args.scale_step_size = 2  # Progressive scaling
@@ -192,6 +198,7 @@ def test_handle_node_pool_operation_scale_fails_returns_error(
             gpu_node_pool=False,
             enable_managed_gpu=False,
             gpu_instance_profile=mock_args.gpu_instance_profile,
+            gpu_mig_strategy=mock_args.gpu_mig_strategy,
         )
         mock_logger.error.assert_called_with("Operation 'scale' failed")
 
@@ -257,6 +264,7 @@ def test_handle_node_pool_operation_all(self, mock_azure_crud):
         # Setup
         mock_args = mock.MagicMock()
         mock_args.command = "all"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.vm_size = "Standard_D2s_v3"
         mock_args.node_count = 1
@@ -284,14 +292,38 @@ def test_handle_node_pool_operation_all(self, mock_azure_crud):
             gpu_node_pool=True,
             enable_managed_gpu=False,
             step_wait_time=30,
+            gpu_instance_profile=mock_args.gpu_instance_profile,
+            gpu_mig_strategy=mock_args.gpu_mig_strategy,
         )
 
+    def test_handle_node_pool_operation_scale_aws_omits_mig_kwargs(self):
+        """AWS scale must not receive Azure-only MIG kwargs (the AWS CRUD rejects them)."""
+        mock_args = mock.MagicMock()
+        mock_args.command = "scale"
+        mock_args.cloud = "aws"
+        mock_args.node_pool_name = "test-np"
+        mock_args.target_count = 5
+        mock_args.scale_step_size = 1
+        mock_args.gpu_node_pool = False
+        mock_args.enable_managed_gpu = False
+
+        mock_crud = mock.MagicMock()
+        mock_crud.scale_node_pool.return_value = True
+
+        result = handle_node_pool_operation(mock_crud, mock_args)
+
+        self.assertEqual(result, 0)
+        call_kwargs = mock_crud.scale_node_pool.call_args.kwargs
+        self.assertNotIn("gpu_instance_profile", call_kwargs)
+        self.assertNotIn("gpu_mig_strategy", call_kwargs)
+
     @mock.patch("crud.main.AzureNodePoolCRUD")
     def test_handle_node_pool_operation_failure(self, mock_azure_crud):
         """Test handle_node_pool_operation when operation fails"""
         # Setup
         mock_args = mock.MagicMock()
         mock_args.command = "create"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.vm_size = "Standard_D2s_v3"
         mock_args.node_count = 1
@@ -353,6 +385,7 @@ def test_handle_node_pool_operation_exception(self, mock_azure_crud, mock_logger
         # Setup
         mock_args = mock.MagicMock()
         mock_args.command = "create"
+        mock_args.cloud = "azure"
         mock_args.node_pool_name = "test-np"
         mock_args.vm_size = "Standard_D2s_v3"
         mock_args.node_count = 1
diff --git a/steps/engine/crud/k8s/execute.yml b/steps/engine/crud/k8s/execute.yml
index fa666a0023..5ee588cf0c 100644
--- a/steps/engine/crud/k8s/execute.yml
+++ b/steps/engine/crud/k8s/execute.yml
@@ -16,8 +16,8 @@ steps:
       --step-timeout "$STEP_TIME_OUT" \
       ${GPU_NODE_POOL:+--gpu-node-pool} \
       $([[ "${ENABLE_MANAGED_GPU,,}" == "true" ]] && echo "--enable-managed-gpu" || true) \
-      $([[ "${GPU_INSTANCE_PROFILE}" =~ ^MIG ]] && echo "--gpu-instance-profile ${GPU_INSTANCE_PROFILE}" || true) \
-      $([[ "${GPU_MIG_STRATEGY}" =~ ^(mixed|single)$ ]] && echo "--gpu-mig-strategy ${GPU_MIG_STRATEGY}" || true) \
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" && "${GPU_INSTANCE_PROFILE}" =~ ^MIG ]] && echo "--gpu-instance-profile ${GPU_INSTANCE_PROFILE}" || true) \
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" && "${GPU_MIG_STRATEGY}" =~ ^(mixed|single)$ ]] && echo "--gpu-mig-strategy ${GPU_MIG_STRATEGY}" || true) \
       --capacity-type "${CAPACITY_TYPE:-ON_DEMAND}"
 
     # Scale Up Node Pool
@@ -32,7 +32,8 @@ steps:
       --step-timeout "$STEP_TIME_OUT" \
       ${GPU_NODE_POOL:+--gpu-node-pool} \
       $([[ "${ENABLE_MANAGED_GPU,,}" == "true" ]] && echo "--enable-managed-gpu" || true) \
-      $([[ "${GPU_INSTANCE_PROFILE}" =~ ^MIG ]] && echo "--gpu-instance-profile ${GPU_INSTANCE_PROFILE}" || true)
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" && "${GPU_INSTANCE_PROFILE}" =~ ^MIG ]] && echo "--gpu-instance-profile ${GPU_INSTANCE_PROFILE}" || true) \
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" && "${GPU_MIG_STRATEGY}" =~ ^(mixed|single)$ ]] && echo "--gpu-mig-strategy ${GPU_MIG_STRATEGY}" || true)
 
   displayName: 'Execute K8s Create & Scale Up Operations for ${{ parameters.cloud }}'
   workingDirectory: modules/python
@@ -155,7 +156,10 @@ steps:
       --scale-step-size "$SCALE_STEP_SIZE" \
       --step-wait-time "$STEP_WAIT_TIME" \
       --step-timeout "$STEP_TIME_OUT" \
-      ${GPU_NODE_POOL:+--gpu-node-pool}
+      ${GPU_NODE_POOL:+--gpu-node-pool} \
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" ]] && echo "--enable-managed-gpu" || true) \
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" && "${GPU_INSTANCE_PROFILE}" =~ ^MIG ]] && echo "--gpu-instance-profile ${GPU_INSTANCE_PROFILE}" || true) \
+      $([[ "${ENABLE_MANAGED_GPU,,}" == "true" && "${GPU_MIG_STRATEGY}" =~ ^(mixed|single)$ ]] && echo "--gpu-mig-strategy ${GPU_MIG_STRATEGY}" || true)
 
     # Delete Node Pool
     PYTHONPATH=$PYTHONPATH:$(pwd) python3 "$PYTHON_SCRIPT_FILE" delete \
@@ -178,6 +182,9 @@ steps:
     STEP_TIME_OUT: $(STEP_TIME_OUT)
     RESULT_DIR: $(System.DefaultWorkingDirectory)/$(RUN_ID)
     GPU_NODE_POOL: $(GPU_NODE_POOL)
+    ENABLE_MANAGED_GPU: $(ENABLE_MANAGED_GPU)
+    GPU_INSTANCE_PROFILE: $(GPU_INSTANCE_PROFILE)
+    GPU_MIG_STRATEGY: $(GPU_MIG_STRATEGY)
     STEP_WAIT_TIME: $(STEP_WAIT_TIME)
     ${{ if eq(parameters.cloud, 'aws') }}:
       CAPACITY_TYPE: $(CAPACITY_TYPE)