diff --git a/.github/workflows/py-cli-e2e-tests-v2.yml b/.github/workflows/py-cli-e2e-tests-v2.yml
new file mode 100644
index 000000000000..59d2c83aaa26
--- /dev/null
+++ b/.github/workflows/py-cli-e2e-tests-v2.yml
@@ -0,0 +1,99 @@
+#  Copyright 2026 Collate
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  http://www.apache.org/licenses/LICENSE-2.0
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+# CLI E2E v2 — strangler-fig replacement for py-cli-e2e-tests.yml.
+#
+# Each connector lives under ingestion/tests/cli_e2e_v2/<connector>/ and
+# is a self-contained pytest module (no inheritance). The matrix below
+# grows by one entry per connector migration PR; the connector's
+# corresponding entry is removed from py-cli-e2e-tests.yml in the same PR.
+#
+# Triggers: workflow_dispatch only during the stabilization window for
+# the MySQL pilot. The schedule cron will be added once the pilot is
+# consistently green (see spec §7.1).
+
+name: py-cli-e2e-tests-v2
+on:
+  workflow_dispatch:
+    inputs:
+      connectors:
+        description: "Connectors to run (JSON array)"
+        required: true
+        default: '["mysql"]'
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  py-cli-e2e-tests-v2:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        connector: ${{ fromJSON(inputs.connectors || '["mysql"]') }}
+    environment: test
+
+    steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          swap-storage: true
+          docker-images: false
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Openmetadata Test Environment
+        uses: ./.github/actions/setup-openmetadata-test-environment
+        with:
+          python-version: '3.10'
+
+      - name: Run CLI E2E v2 tests
+        id: e2e-v2-test
+        env:
+          # MySQL test data lives in a dedicated MySQL container that the
+          # session-scoped `mysql_container` pytest fixture (testcontainers)
+          # boots, bootstraps with the OM-doc minimum grants, and tears
+          # down. Teammates run the same way locally — no env plumbing.
+          # Only OM-server admin creds (used to mint the ingestion-bot
+          # JWT) need to be exported here; they come from the bundled
+          # docker-compose and are not secrets.
+          OM_ADMIN_EMAIL: admin@open-metadata.org
+          OM_ADMIN_PASSWORD: admin
+        run: |
+          source env/bin/activate
+          cd ingestion
+          mkdir -p junit
+          pytest -v \
+            --junitxml=junit/test-results-v2-${{ matrix.connector }}.xml \
+            tests/cli_e2e_v2/${{ matrix.connector }}
+        shell: bash
+
+      - name: Upload tests artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: tests-v2-${{ matrix.connector }}
+          path: ingestion/junit/test-results-v2-*.xml
+
+      - name: Clean Up
+        if: always()
+        run: |
+          cd ./docker/development
+          docker compose down --remove-orphans
+          sudo rm -rf ${PWD}/docker-volume
diff --git a/ingestion/pyproject.toml b/ingestion/pyproject.toml
index 4992240ae06a..519be8a055a6 100644
--- a/ingestion/pyproject.toml
+++ b/ingestion/pyproject.toml
@@ -222,6 +222,12 @@ ignore = [
 # lands in a later stage tests don't immediately error out.
 "tests/**/*.py" = ["S101", "PLR2004", "PLC0415"]
 "ingestion/tests/**/*.py" = ["S101", "PLR2004", "PLC0415"]
+# v2 CLI E2E framework uses relative imports by design (connector-centric
+# layout — connectors live in subdirs and import from `..core.*` / `.connector`).
+# `T201` (print) is allowed in the top-level conftest for the session-start
+# posture banner. Path listed twice for the dual-cwd pattern above.
+"tests/cli_e2e_v2/**/*.py" = ["S101", "PLR2004", "PLC0415", "TID252", "T201"]
+"ingestion/tests/cli_e2e_v2/**/*.py" = ["S101", "PLR2004", "PLC0415", "TID252", "T201"]
 # Auto-generated from JSON Schema — never edit, never lint.
 "src/metadata/generated/**" = ["ALL"]
 "ingestion/src/metadata/generated/**" = ["ALL"]
diff --git a/ingestion/src/metadata/cli/app.py b/ingestion/src/metadata/cli/app.py
index f101e9c1f956..d4192b1e652f 100644
--- a/ingestion/src/metadata/cli/app.py
+++ b/ingestion/src/metadata/cli/app.py
@@ -17,6 +17,7 @@
 import traceback
 from pathlib import Path
 
+from metadata.cli.common import execute_workflow
 from metadata.config.common import load_config_file
 from metadata.utils.logger import cli_logger
 from metadata.workflow.application import ApplicationWorkflow
@@ -24,13 +25,14 @@
 logger = cli_logger()
 
 
-def run_app(config_path: Path) -> None:
+def run_app(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the application workflow from a config path
     to a JSON or YAML file
     :param config_path: Path to load JSON config
     """
 
+    config_dict = None
     try:
         config_dict = load_config_file(config_path)
         # no logging for config because apps might have custom secrets
@@ -40,7 +42,4 @@ def run_app(config_path: Path) -> None:
         logger.debug(traceback.format_exc())
         sys.exit(1)
 
-    workflow.execute()
-    workflow.stop()
-    workflow.print_status()
-    workflow.raise_from_status()
+    execute_workflow(workflow=workflow, config_dict=config_dict, status_file=status_file)
diff --git a/ingestion/src/metadata/cli/classify.py b/ingestion/src/metadata/cli/classify.py
index 5ae035ea28b4..a98993e5ad12 100644
--- a/ingestion/src/metadata/cli/classify.py
+++ b/ingestion/src/metadata/cli/classify.py
@@ -28,7 +28,7 @@
 logger = cli_logger()
 
 
-def run_classification(config_path: Path) -> None:
+def run_classification(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the sampler workflow from a config path
     to a JSON or YAML file
@@ -48,4 +48,4 @@ def run_classification(config_path: Path) -> None:
         WorkflowInitErrorHandler.print_init_error(exc, config_dict, PipelineType.metadata)
         sys.exit(1)
 
-    execute_workflow(workflow=workflow, config_dict=config_dict)
+    execute_workflow(workflow=workflow, config_dict=config_dict, status_file=status_file)
diff --git a/ingestion/src/metadata/cli/common.py b/ingestion/src/metadata/cli/common.py
index 38a8a82b840c..522bd97ed9b0 100644
--- a/ingestion/src/metadata/cli/common.py
+++ b/ingestion/src/metadata/cli/common.py
@@ -13,14 +13,23 @@
 Handle workflow execution
 """
 
+from pathlib import Path
 from typing import Any, Dict  # noqa: UP035
 
 from metadata.workflow.base import BaseWorkflow
 
 
-def execute_workflow(workflow: BaseWorkflow, config_dict: Dict[str, Any]) -> None:  # noqa: UP006
-    """Execute the workflow and raise if needed"""
-    workflow.execute()
-    workflow.stop()
+def execute_workflow(
+    workflow: BaseWorkflow,
+    config_dict: Dict[str, Any],  # noqa: UP006
+    status_file: Path | None = None,
+) -> None:
+    """Execute the workflow, write status file if requested, raise on failure if configured."""
+    try:
+        workflow.execute()
+    finally:
+        workflow.stop()
+        if status_file is not None:
+            workflow.write_status_file(status_file)
     if config_dict.get("workflowConfig", {}).get("raiseOnError", True):
         workflow.raise_from_status()
diff --git a/ingestion/src/metadata/cli/dataquality.py b/ingestion/src/metadata/cli/dataquality.py
index 65d449d98675..e71781732ab2 100644
--- a/ingestion/src/metadata/cli/dataquality.py
+++ b/ingestion/src/metadata/cli/dataquality.py
@@ -28,7 +28,7 @@
 logger = cli_logger()
 
 
-def run_test(config_path: Path) -> None:
+def run_test(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the Data Quality Test Suites workflow from a config path
     to a JSON or YAML file
@@ -48,4 +48,4 @@ def run_test(config_path: Path) -> None:
         WorkflowInitErrorHandler.print_init_error(exc, workflow_config_dict, PipelineType.TestSuite)
         sys.exit(1)
 
-    execute_workflow(workflow=workflow, config_dict=workflow_config_dict)
+    execute_workflow(workflow=workflow, config_dict=workflow_config_dict, status_file=status_file)
diff --git a/ingestion/src/metadata/cli/ingest.py b/ingestion/src/metadata/cli/ingest.py
index 4b6223773f23..fa83fe24fe2d 100644
--- a/ingestion/src/metadata/cli/ingest.py
+++ b/ingestion/src/metadata/cli/ingest.py
@@ -29,7 +29,7 @@
 logger = cli_logger()
 
 
-def run_ingest(config_path: Path) -> None:
+def run_ingest(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the ingestion workflow from a config path
     to a JSON or YAML file
@@ -46,4 +46,4 @@ def run_ingest(config_path: Path) -> None:
         WorkflowInitErrorHandler.print_init_error(exc, config_dict, PipelineType.metadata)
         sys.exit(1)
 
-    execute_workflow(workflow=workflow, config_dict=config_dict)
+    execute_workflow(workflow=workflow, config_dict=config_dict, status_file=status_file)
diff --git a/ingestion/src/metadata/cli/ingest_dbt.py b/ingestion/src/metadata/cli/ingest_dbt.py
index 54a4aaaaa607..1c055e9756d9 100644
--- a/ingestion/src/metadata/cli/ingest_dbt.py
+++ b/ingestion/src/metadata/cli/ingest_dbt.py
@@ -25,6 +25,7 @@
 from dotenv import load_dotenv
 from pydantic import BaseModel, Field, field_validator
 
+from metadata.cli.common import execute_workflow
 from metadata.ingestion.ometa.credentials import URL
 from metadata.utils.logger import cli_logger
 from metadata.workflow.metadata import MetadataWorkflow
@@ -291,7 +292,7 @@ def create_dbt_workflow_config(dbt_project_path: Path, om_config: OpenMetadataDB
     return config  # noqa: RET504
 
 
-def run_ingest_dbt(dbt_project_path: Path) -> None:
+def run_ingest_dbt(dbt_project_path: Path, status_file: Path | None = None) -> None:
     """
     Run the dbt artifacts ingestion workflow from a dbt project path
 
@@ -321,13 +322,13 @@ def run_ingest_dbt(dbt_project_path: Path) -> None:
         logger.info("Creating workflow configuration...")
         workflow_config = create_dbt_workflow_config(dbt_project_path, om_config)
 
-        # Create and execute the MetadataWorkflow (reusing existing infrastructure)
         logger.info("Starting OpenMetadata ingestion workflow...")
         workflow = MetadataWorkflow.create(workflow_config)
-        workflow.execute()
-        workflow.raise_from_status()
-        workflow.print_status()
-        workflow.stop()
+        execute_workflow(
+            workflow=workflow,
+            config_dict=workflow_config,
+            status_file=status_file,
+        )
 
         logger.info("DBT artifacts ingestion completed successfully")
 
diff --git a/ingestion/src/metadata/cli/lineage.py b/ingestion/src/metadata/cli/lineage.py
index 9deaa7fc8cf1..b069dc8567d7 100644
--- a/ingestion/src/metadata/cli/lineage.py
+++ b/ingestion/src/metadata/cli/lineage.py
@@ -47,7 +47,7 @@ class LineageWorkflow(BaseModel):
     parserType: Optional[QueryParserType] = QueryParserType.Auto  # noqa: N815, UP045
 
 
-def run_lineage(config_path: Path) -> None:
+def run_lineage(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the ingestion workflow from a config path
     to a JSON or YAML file
diff --git a/ingestion/src/metadata/cli/profile.py b/ingestion/src/metadata/cli/profile.py
index 93977318c870..403fd3d20c30 100644
--- a/ingestion/src/metadata/cli/profile.py
+++ b/ingestion/src/metadata/cli/profile.py
@@ -29,7 +29,7 @@
 logger = cli_logger()
 
 
-def run_profiler(config_path: Path) -> None:
+def run_profiler(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the Profiler workflow from a config path
     to a JSON or YAML file
@@ -46,4 +46,4 @@ def run_profiler(config_path: Path) -> None:
         WorkflowInitErrorHandler.print_init_error(exc, workflow_config_dict, PipelineType.profiler)
         sys.exit(1)
 
-    execute_workflow(workflow=workflow, config_dict=workflow_config_dict)
+    execute_workflow(workflow=workflow, config_dict=workflow_config_dict, status_file=status_file)
diff --git a/ingestion/src/metadata/cli/usage.py b/ingestion/src/metadata/cli/usage.py
index 420a8542a8e3..c23a39bee645 100644
--- a/ingestion/src/metadata/cli/usage.py
+++ b/ingestion/src/metadata/cli/usage.py
@@ -29,7 +29,7 @@
 logger = cli_logger()
 
 
-def run_usage(config_path: Path) -> None:
+def run_usage(config_path: Path, status_file: Path | None = None) -> None:
     """
     Run the usage workflow from a config path
     to a JSON or YAML file
@@ -46,4 +46,4 @@ def run_usage(config_path: Path) -> None:
         WorkflowInitErrorHandler.print_init_error(exc, config_dict, PipelineType.usage)
         sys.exit(1)
 
-    execute_workflow(workflow=workflow, config_dict=config_dict)
+    execute_workflow(workflow=workflow, config_dict=config_dict, status_file=status_file)
diff --git a/ingestion/src/metadata/cmd.py b/ingestion/src/metadata/cmd.py
index bf56329ad6f3..7055bbbece8c 100644
--- a/ingestion/src/metadata/cmd.py
+++ b/ingestion/src/metadata/cmd.py
@@ -77,6 +77,13 @@ def create_common_config_parser_args(parser: argparse.ArgumentParser):
         type=Path,
         required=True,
     )
+    parser.add_argument(
+        "--status-file",
+        help="path to write structured JSON status output (optional)",
+        type=Path,
+        required=False,
+        default=None,
+    )
 
 
 def create_dbt_parser_args(parser: argparse.ArgumentParser):
@@ -220,6 +227,7 @@ def metadata(args: Optional[List[str]] = None):  # noqa: UP006, UP045
     metadata_workflow = contains_args.get("command")
     config_file: Optional[Path] = contains_args.get("config")  # noqa: UP045
     dbt_project_path: Optional[Path] = contains_args.get("dbt_project_path")  # noqa: UP045
+    status_file: Optional[Path] = contains_args.get("status_file")  # noqa: UP045
 
     path = None
     if config_file:
@@ -234,7 +242,7 @@ def metadata(args: Optional[List[str]] = None):  # noqa: UP006, UP045
         set_loggers_level(log_level)
 
     if path and metadata_workflow and metadata_workflow in RUN_PATH_METHODS:
-        RUN_PATH_METHODS[metadata_workflow](path)
+        RUN_PATH_METHODS[metadata_workflow](path, status_file)
 
     if metadata_workflow == MetadataCommands.SCAFFOLD_CONNECTOR.value:
         has_name = contains_args.get("name")
diff --git a/ingestion/src/metadata/workflow/base.py b/ingestion/src/metadata/workflow/base.py
index 2bbf93a0c011..b4cf76ac372c 100644
--- a/ingestion/src/metadata/workflow/base.py
+++ b/ingestion/src/metadata/workflow/base.py
@@ -12,10 +12,12 @@
 Base workflow definition.
 """
 
+import json
 import traceback
 import uuid
 from abc import ABC, abstractmethod
 from datetime import datetime
+from pathlib import Path
 from statistics import mean
 from typing import Any, Dict, List, Optional, TypeVar, Union  # noqa: UP035
 
@@ -212,13 +214,21 @@ def get_failures(self) -> List[StackTraceError]:  # noqa: UP006
     def workflow_steps(self) -> List[Step]:  # noqa: UP006
         """Steps to report status from"""
 
+    def _step_meets_success_threshold(self, step: Step) -> bool:
+        """True iff the step has no failures, or its success ratio meets the workflow's threshold.
+
+        Shared by `raise_from_status_internal` (which raises on failure) and
+        `write_status_file` (which reports the CLI's observable success/failure state).
+        """
+        status = step.get_status()
+        if not status.failures:
+            return True
+        return status.calculate_success() >= self.workflow_config.successThreshold  # pyright: ignore[reportOperatorIssue]
+
     def raise_from_status_internal(self, raise_warnings=False) -> None:
         """Based on the internal workflow status, raise a WorkflowExecutionError"""
         for step in self.workflow_steps():
-            if (
-                step.get_status().failures
-                and step.get_status().calculate_success() < self.workflow_config.successThreshold
-            ):
+            if not self._step_meets_success_threshold(step):
                 raise WorkflowExecutionError(f"{step.name} reported errors: {Summary.from_step(step)}")
 
             if raise_warnings and step.status.warnings:
@@ -400,3 +410,28 @@ def print_status(self):
             start_time,
             self._is_debug_enabled(),
         )
+
+    def write_status_file(self, path: Path) -> None:
+        """Serialize per-step status to JSON at the given path.
+
+        The `success` field mirrors the CLI's exit-code semantic: True iff every
+        step meets its success threshold (the same condition under which
+        `raise_from_status_internal` does NOT raise).
+
+        Shape:
+            {
+              "pipeline_type": str,
+              "ingestion_pipeline_fqn": str | None,
+              "success": bool,
+              "steps": [<StepSummary dicts>]
+            }
+        """
+        ingestion_status = self.build_ingestion_status()
+        success = all(self._step_meets_success_threshold(step) for step in self.workflow_steps())
+        payload = {
+            "pipeline_type": self.config.source.type,  # pyright: ignore[reportAttributeAccessIssue]
+            "ingestion_pipeline_fqn": self.config.ingestionPipelineFQN,  # pyright: ignore[reportAttributeAccessIssue]
+            "success": success,
+            "steps": ingestion_status.model_dump(),
+        }
+        path.write_text(json.dumps(payload, indent=2, default=str))
diff --git a/ingestion/src/metadata/workflow/workflow_status_mixin.py b/ingestion/src/metadata/workflow/workflow_status_mixin.py
index a59e55a80e09..66d19b3dcdd3 100644
--- a/ingestion/src/metadata/workflow/workflow_status_mixin.py
+++ b/ingestion/src/metadata/workflow/workflow_status_mixin.py
@@ -159,7 +159,7 @@ def result_status(self) -> WorkflowResultStatus:
             return WorkflowResultStatus.FAILURE
         return WorkflowResultStatus.SUCCESS
 
-    def build_ingestion_status(self) -> Optional[IngestionStatus]:  # noqa: UP045
+    def build_ingestion_status(self) -> IngestionStatus:
         """
         Get the results from the steps and prep the payload
         we'll send to the API
diff --git a/ingestion/tests/cli_e2e_v2/CONNECTORS.md b/ingestion/tests/cli_e2e_v2/CONNECTORS.md
new file mode 100644
index 000000000000..52bf85576969
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/CONNECTORS.md
@@ -0,0 +1,34 @@
+# Adding a new connector
+
+`mysql/` is the reference. Mirror its file layout.
+
+## Scaffold
+
+```
+<connector>/
+  __init__.py            # empty
+  baseline.py            # SQLAlchemy MetaData + seeds + views + SPs + get_policy()
+  connector.py           # service_name() + build_<connector>_config()
+  enforcer.py            # SqlBaselineEnforcer subclass
+  expected.py            # TYPE_MAP extension + <connector>_expected() helper
+  conftest.py            # <connector>_container fixture + thin wiring
+  test_<connector>.py    # tests
+```
+
+## Per file
+
+1. **`baseline.py`** — declare schema with SQLAlchemy Core. Reuse `core/source/common_baseline.py` for portable tables (customers, transactions). Put dialect-specific types on a wide `all_types` table keyed on `BigInteger id`.
+2. **`enforcer.py`** — subclass `SqlBaselineEnforcer`. Usually only override `_stored_procedure_query_sql` (returns `(schema, name)` rows). Other overrides are rare; see `mysql/enforcer.py`.
+3. **`expected.py`** — extend `CORE_TYPE_MAP` with dialect types. Export `<connector>_expected(service_name, tables=None)` calling `derive_expected_service(...)`.
+4. **`connector.py`** — `<connector>_service_name(session_uuid, variant="")` and `build_<connector>_config(service_name, server)`. The config emits `${E2E_<CONNECTOR>_*}` refs — never embed raw secrets.
+5. **`conftest.py`** — session-scoped `<connector>_container` boots the source via testcontainers, creates the scoped ingest user with OM-doc-minimum GRANTs, and populates `E2E_<CONNECTOR>_*` env vars (so `Env(key).ref()` in `connector.py` resolves). Then the thin wiring fixtures (`_source_ready`, `_service`, `_cfg`, `_expected_factory`, `_metadata_ingested`). Mirror `mysql/conftest.py`.
+6. **`test_<connector>.py`** — one `test_vanilla_ingest_structural`, one test per pipeline you ship (profiler / lineage / classification), and a parametrized filter matrix using `COMMON_FILTER_SCENARIOS` + a per-connector `_EXPECTED_TABLES_BY_VARIANT` dict. Mirror `mysql/test_mysql.py`.
+
+## Validate
+
+```bash
+docker compose -f docker/development/docker-compose.yml up -d
+pytest tests/cli_e2e_v2/<connector> -v
+```
+
+Failures: see `README.md`.
diff --git a/ingestion/tests/cli_e2e_v2/README.md b/ingestion/tests/cli_e2e_v2/README.md
new file mode 100644
index 000000000000..e784b2fe27cd
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/README.md
@@ -0,0 +1,53 @@
+# CLI E2E v2
+
+End-to-end tests for the `metadata` CLI against a real OpenMetadata server and a real data source. Each test brings its source into a declared shape, runs CLI pipelines, asserts on what landed in OM.
+
+## Run
+
+```bash
+docker compose -f docker/development/docker-compose.yml up -d
+source env/bin/activate
+cd ingestion
+pytest tests/cli_e2e_v2/mysql -v
+```
+
+Each connector boots its own source via testcontainers. Docker is the only prerequisite — no DB ports, credentials, or grants to manage.
+
+## Layout
+
+```
+tests/cli_e2e_v2/
+  conftest.py     # session fixtures (uuid, server config)
+  core/           # framework internals
+  mysql/          # reference connector
+  CONNECTORS.md   # how to add a new connector
+```
+
+## Debugging
+
+Every CLI run writes three files to pytest's tmp_path; the runner logs the paths at INFO:
+
+```
+cfg_<pipeline>_<n>.yaml     # rendered config (secrets are ${refs})
+status_<pipeline>_<n>.json  # status report — failures[] lives here
+stdout_<pipeline>_<n>.log   # full stdout
+```
+
+First place to look on failure: the exception body, then the status JSON.
+
+| Error | Fix |
+|---|---|
+| `CliExecutionError` | Inspect the embedded failures, stderr, status path |
+| `StructuralMismatch` | Jump to the first diff; rest cascade |
+| `401` / `Invalid token` | `unset OM_JWT_TOKEN` and rerun |
+| `permission denied` from CLI | Add the missing GRANT to the connector's `conftest.py` |
+| `Eventually timed out` | Raise `.eventually(120)` or set `E2E_POLL_VERBOSE=1` |
+
+## Env toggles (rarely needed)
+
+| Var | Default | Effect |
+|---|---|---|
+| `OM_SERVER_URL` | `http://localhost:8585/api` | OM server URL |
+| `OM_JWT_TOKEN` | minted | Pre-minted token; bypasses admin login |
+| `OM_ADMIN_EMAIL` / `OM_ADMIN_PASSWORD` | `admin@open-metadata.org` / `admin` | Admin for token minting |
+| `E2E_POLL_VERBOSE` | unset | `=1` logs every poll attempt |
diff --git a/ingestion/tests/cli_e2e_v2/__init__.py b/ingestion/tests/cli_e2e_v2/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/conftest.py b/ingestion/tests/cli_e2e_v2/conftest.py
new file mode 100644
index 000000000000..ee3285b9a790
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/conftest.py
@@ -0,0 +1,205 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Top-level fixtures for the CLI E2E v2 test package.
+
+Pytest auto-discovers this conftest for all tests under tests/cli_e2e_v2/.
+Per-connector conftests (e.g., mysql/conftest.py) compose on top of these
+session-level primitives.
+
+Fixture graph:
+
+    session_uuid ─────────────┐
+                              ├─→ (consumed by per-connector service names)
+    om_server_config ────┬────┘
+                         │
+                         ├─→ om_http_client ─┬─→ om_client (per-test)
+                         │                   └─→ registered_services (cleanup)
+                         │
+                         └─→ _posture_log (autouse)
+
+    tmp_path (pytest builtin) ─→ cli_runner (per-test)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import uuid
+from typing import TYPE_CHECKING
+
+import pytest
+
+from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
+    AuthProvider,
+    OpenMetadataConnection,
+)
+from metadata.generated.schema.entity.services.databaseService import DatabaseService
+from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
+    OpenMetadataJWTClientConfig,
+)
+from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+from .core.config.server import ServerConfig
+from .core.expected.differ import StructuralMismatch
+from .core.fluent.om_client import OmClient
+from .core.runner.cli_runner import CliRunner
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+# -----------------------------------------------------------------------------
+# pytest hooks
+# -----------------------------------------------------------------------------
+
+
+def pytest_assertrepr_compare(op, left, right):
+    """Render `StructuralMismatch` in full when it appears in an `assert ==` /
+    `assert is` comparison instead of pytest's default short repr.
+
+    `StructuralMismatch` is normally raised, in which case pytest displays
+    its `__str__` directly via the exception path. This hook covers the
+    less-common but still real case where a test compares a captured
+    mismatch against a sentinel (e.g. `assert run_diff() == NO_DIFFS`) —
+    pytest would otherwise truncate the diff body to its short repr and
+    swallow the path-grouped diagnostics we put in `__str__`.
+    """
+    target = (
+        left if isinstance(left, StructuralMismatch) else (right if isinstance(right, StructuralMismatch) else None)
+    )
+    if target is None:
+        return None
+    # Each line of the rendered mismatch becomes its own report line so
+    # pytest's terminal writer wraps cleanly and indentation survives.
+    return [f"StructuralMismatch ({op}):"] + str(target).splitlines()
+
+
+# -----------------------------------------------------------------------------
+# session identity + server
+# -----------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def session_uuid() -> str:
+    """One 8-char hex UUID per pytest session.
+
+    Used to suffix every OM service name so parallel matrix jobs never collide
+    and re-runs start from a clean namespace. Short form (8 hex chars) keeps
+    service names readable.
+    """
+    return uuid.uuid4().hex[:8]
+
+
+@pytest.fixture(scope="session")
+def om_server_config() -> ServerConfig:
+    """Shared OM server URL + JWT, read from env once per session.
+
+    This fixture is also the SINGLE place in the framework that installs
+    the resolved JWT into `os.environ["OM_JWT_TOKEN"]`. CLI subprocesses
+    inherit the parent env, and their rendered YAMLs carry
+    `${OM_JWT_TOKEN}` refs that `os.path.expandvars` resolves at load
+    time — so the install is necessary, but keeping it here (rather than
+    in `ServerConfig.from_env()`) leaves the factory pure and makes the
+    mutation explicit and named.
+    """
+    cfg = ServerConfig.from_env()
+    # Bridge to subprocesses: the rendered cfg_*.yaml uses ${OM_JWT_TOKEN}
+    # so the subprocess needs it in its env. A pre-exported OM_JWT_TOKEN
+    # and a minted one both land at the same key.
+    os.environ["OM_JWT_TOKEN"] = cfg.jwt_token
+    return cfg
+
+
+@pytest.fixture(scope="session")
+def om_http_client(om_server_config: ServerConfig) -> OpenMetadata:
+    """Authenticated OpenMetadata HTTP client, session-scoped.
+
+    Built once, reused by all OmClient wrappers and by the cleanup finalizer.
+    """
+    conn = OpenMetadataConnection(
+        hostPort=om_server_config.server_url,
+        authProvider=AuthProvider.openmetadata,
+        securityConfig=OpenMetadataJWTClientConfig(
+            jwtToken=om_server_config.jwt_token,
+        ),
+    )
+    return OpenMetadata(conn)
+
+
+# -----------------------------------------------------------------------------
+# per-test fluent + runner
+# -----------------------------------------------------------------------------
+
+
+@pytest.fixture
+def om_client(om_http_client: OpenMetadata) -> OmClient:
+    """Fluent OmClient wrapping the shared HTTP client."""
+    return OmClient(om_http_client)
+
+
+@pytest.fixture
+def cli_runner(tmp_path: Path) -> CliRunner:
+    """Per-test CliRunner bound to pytest's tmp_path.
+
+    Each test gets its own tmp_path so cfg_*.yaml and status_*.json artifacts
+    don't collide across parallel or sequential tests.
+    """
+    return CliRunner(tmp_path)
+
+
+# -----------------------------------------------------------------------------
+# session cleanup
+# -----------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def registered_services(om_http_client: OpenMetadata) -> Iterator[list[str]]:
+    """Session-scoped list of service names for end-of-session cleanup.
+
+    Tests append names here when they create services; the finalizer deletes
+    each service via the OM API (hard delete, recursive) when the pytest
+    session ends. Errors during cleanup are logged but don't fail the test
+    run — cleanup is best-effort.
+    """
+    names: list[str] = []
+    yield names
+
+    for name in names:
+        try:
+            svc = om_http_client.get_by_name(entity=DatabaseService, fqn=name)
+            if svc is None:
+                continue
+            om_http_client.delete(
+                entity=DatabaseService,
+                entity_id=str(svc.id.root),
+                hard_delete=True,
+                recursive=True,
+            )
+            logger.info("session teardown: deleted service %s", name)
+        except Exception as exc:
+            logger.warning("session teardown: failed to delete %s: %s", name, exc)
+
+
+# -----------------------------------------------------------------------------
+# session posture log
+# -----------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session", autouse=True)
+def _posture_log(session_uuid: str, om_server_config: ServerConfig) -> None:
+    """Print session UUID + server URL + token provenance at session start.
+
+    The three lines are the minimum needed to answer post-mortem questions
+    like "did that run actually hit the server I expected?" and "was the
+    failure a stale env token or a freshly minted one?" — cheap to log
+    once, invaluable when triaging a flake.
+    """
+    print("\n==== CLI E2E v2 session start ====")
+    print(f"session uuid: {session_uuid}")
+    print(f"server url:   {om_server_config.server_url}")
+    print(f"token source: {om_server_config.token_source}")
+    print("==================================\n")
diff --git a/ingestion/tests/cli_e2e_v2/core/__init__.py b/ingestion/tests/cli_e2e_v2/core/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/core/_om_compat.py b/ingestion/tests/cli_e2e_v2/core/_om_compat.py
new file mode 100644
index 000000000000..50c0f09b9a01
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/_om_compat.py
@@ -0,0 +1,35 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""OM Pydantic compatibility shims for the v2 framework.
+
+OM's generated schema sometimes wraps list-typed fields in `RootModel[list[X]]`
+(notably `owners`) and sometimes uses plain `list[X] | None` (today: `tags`,
+`columns`). The shape can flip between OM minor versions without warning,
+which historically forced sweeping changes through every test that walked
+the field.
+
+`unwrap_root_list` centralizes the read so a future RootModel promotion
+(or demotion) of any list field touches one helper rather than ~12
+callsites scattered across the differ and the fluent layer. It mirrors
+the role `model_str` plays for scalar RootModel fields (tagFQN, name,
+description) — the asymmetry of having a scalar shim but no list shim
+was the smell that motivated this helper.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def unwrap_root_list(field: Any) -> list:
+    """Return a plain list whether `field` is None, a list, or a RootModel[list].
+
+    The defensive branches make the helper safe to drop in at any list
+    access site without checking the field's current Pydantic shape.
+    """
+    if field is None:
+        return []
+    if hasattr(field, "root"):
+        return field.root
+    return field
diff --git a/ingestion/tests/cli_e2e_v2/core/config/__init__.py b/ingestion/tests/cli_e2e_v2/core/config/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/core/config/builder.py b/ingestion/tests/cli_e2e_v2/core/config/builder.py
new file mode 100644
index 000000000000..ad3dd81b1827
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/config/builder.py
@@ -0,0 +1,192 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Immutable WorkflowConfig builder rendered to YAML for the metadata CLI.
+
+Two-step: factory returns a base (connection + service + server);
+`.pipeline(options)` picks the pipeline (options is an OM Pydantic model);
+`.with_filter(...)` layers filter patterns. Filters persist across later
+`.pipeline()` transitions; inline filters on the options take precedence.
+Render fails loudly when no pipeline is set.
+"""
+
+from __future__ import annotations
+
+import copy
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+import yaml
+
+from .pipelines import (
+    AutoClassificationPipeline,
+    PipelineOptions,
+    ProfilerPipeline,
+    cli_subcommand_for,
+    pipeline_identifier,
+    source_type_suffix_for,
+)
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from .server import ServerConfig
+
+_FILTER_KEYS: tuple[str, ...] = (
+    "databaseFilterPattern",
+    "schemaFilterPattern",
+    "tableFilterPattern",
+)
+
+# Pipelines that require a `processor` block in the rendered YAML.
+# OM's Profiler + AutoClassification workflows instantiate an ORM profiler
+# to compute column statistics / PII inference; without a processor entry,
+# workflow init crashes with `'NoneType' object has no attribute 'model_dump'`.
+_PIPELINES_NEEDING_PROCESSOR: tuple[type, ...] = (
+    ProfilerPipeline,
+    AutoClassificationPipeline,
+)
+
+
+class PipelineNotSetError(RuntimeError):
+    """Raised when a WorkflowConfig is rendered or queried before a pipeline
+    has been selected via `.pipeline(...)`."""
+
+
+@dataclass(frozen=True)
+class WorkflowConfig:
+    """Frozen carrier for one workflow's rendered config + active pipeline.
+
+    Two fields:
+      _doc       — the full YAML document as a dict tree
+      _options   — the Pydantic pipeline options model (None on base configs
+                   returned from the factory; set by .pipeline())
+
+    Instances are frozen — overlays return new instances via copy.deepcopy.
+    """
+
+    _doc: dict[str, Any]
+    _options: PipelineOptions | None = None
+
+    # --- construction ---------------------------------------------------
+    @classmethod
+    def build(
+        cls,
+        *,
+        source_type: str,
+        service_name: str,
+        service_connection: dict[str, Any],
+        server: ServerConfig,
+    ) -> WorkflowConfig:
+        """Build a base config without any pipeline selected.
+
+        Callers pass `service_connection` as a plain dict (either model_dump'd
+        from an OM connection class or built manually with env refs).
+        """
+        doc: dict[str, Any] = {
+            "source": {
+                "type": source_type,
+                "serviceName": service_name,
+                "serviceConnection": {"config": dict(service_connection)},
+                "sourceConfig": {"config": {}},
+            },
+            "sink": server.to_sink_config_dict(),
+            "workflowConfig": server.to_workflow_config_dict(),
+        }
+        return cls(_doc=doc, _options=None)
+
+    # --- pipeline transition --------------------------------------------
+    def pipeline(self, options: PipelineOptions) -> WorkflowConfig:
+        """Transition to a concrete pipeline.
+
+        `options` is one of the OM-generated Pydantic pipeline models
+        (re-exported with short aliases in `pipelines.py`). The instance's
+        `.type` field discriminator is carried through into the rendered
+        YAML as `sourceConfig.config.type`.
+
+        Filter patterns already set on this config (via `.with_filter(...)`)
+        persist across the transition. Filters set inline on `options` take
+        precedence over preserved filters.
+        """
+        dumped = options.model_dump(mode="json", exclude_none=True)
+
+        new_doc = copy.deepcopy(self._doc)
+        prior_cfg = new_doc["source"]["sourceConfig"]["config"]
+        for key in _FILTER_KEYS:
+            if key in prior_cfg:
+                dumped.setdefault(key, prior_cfg[key])
+
+        new_doc["source"]["sourceConfig"]["config"] = dumped
+
+        # OM's `import_source_class` selects the connector class by
+        # splitting `source.type` on "-" and dispatching to metadata_source_class,
+        # lineage_source_class, or usage_source_class. The suffix must match
+        # the pipeline: e.g. "mysql-lineage" for a DatabaseLineage run.
+        base_connector = new_doc["source"]["type"].split("-", 1)[0]
+        new_doc["source"]["type"] = base_connector + source_type_suffix_for(options)
+
+        if isinstance(options, _PIPELINES_NEEDING_PROCESSOR):
+            new_doc["processor"] = {"type": "orm-profiler", "config": {}}
+        else:
+            new_doc.pop("processor", None)
+
+        return WorkflowConfig(_doc=new_doc, _options=options)
+
+    # --- filter overlay -------------------------------------------------
+    def with_filter(
+        self,
+        *,
+        databases_include: list[str] | None = None,
+        databases_exclude: list[str] | None = None,
+        schemas_include: list[str] | None = None,
+        schemas_exclude: list[str] | None = None,
+        tables_include: list[str] | None = None,
+        tables_exclude: list[str] | None = None,
+    ) -> WorkflowConfig:
+        """Append include/exclude patterns at database, schema, or table level.
+
+        Multiple calls MERGE (append), not replace. Include AND exclude at the
+        same level are allowed — OM's filter semantic applies exclude over
+        include on overlapping matches.
+        """
+        new_doc = copy.deepcopy(self._doc)
+        cfg = new_doc["source"]["sourceConfig"]["config"]
+
+        def _merge(key: str, includes: list[str] | None, excludes: list[str] | None) -> None:
+            if not includes and not excludes:
+                return
+            pattern = cfg.setdefault(key, {})
+            if includes:
+                pattern.setdefault("includes", []).extend(includes)
+            if excludes:
+                pattern.setdefault("excludes", []).extend(excludes)
+
+        _merge("databaseFilterPattern", databases_include, databases_exclude)
+        _merge("schemaFilterPattern", schemas_include, schemas_exclude)
+        _merge("tableFilterPattern", tables_include, tables_exclude)
+
+        return WorkflowConfig(_doc=new_doc, _options=self._options)
+
+    # --- accessors ------------------------------------------------------
+    @property
+    def pipeline_identifier(self) -> str:
+        """Short id for artifact filenames and invocation counters."""
+        if self._options is None:
+            raise PipelineNotSetError("pipeline not set — call .pipeline(options) before querying identifier")
+        return pipeline_identifier(self._options)
+
+    @property
+    def cli_subcommand(self) -> str:
+        """The `metadata <cmd>` subcommand CliRunner will invoke."""
+        if self._options is None:
+            raise PipelineNotSetError("pipeline not set — call .pipeline(options) before querying subcommand")
+        return cli_subcommand_for(self._options)
+
+    # --- rendering ------------------------------------------------------
+    def write_tmp(self, tmp_path: Path, invocation: int = 0) -> Path:
+        """Dump to `<tmp_path>/cfg_<id>_<invocation>.yaml` and return the path."""
+        if self._options is None:
+            raise PipelineNotSetError("pipeline not set — call .pipeline(options) before rendering")
+        path = tmp_path / f"cfg_{self.pipeline_identifier}_{invocation}.yaml"
+        path.write_text(yaml.safe_dump(self._doc, sort_keys=False))
+        return path
diff --git a/ingestion/tests/cli_e2e_v2/core/config/env.py b/ingestion/tests/cli_e2e_v2/core/config/env.py
new file mode 100644
index 000000000000..c7df7409a8dc
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/config/env.py
@@ -0,0 +1,103 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Env var accessor — class with `Generic[_Req]` + `typing.overload` narrowing.
+
+Construction captures (key, default, required); terminals:
+    .ref() -> "${KEY}"          for YAML embedding
+    .get() -> str               when required=True  (default)
+    .get() -> str | None        when required=False
+
+Runtime is a plain class; the Generic machinery is type-only. See
+`memory/project-v2-env-class-design.md` for the shape's rationale.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Generic, Literal, TypeVar, overload
+
+
+class EnvLoadError(RuntimeError):
+    """Raised when a required env var is unset (or empty)."""
+
+
+# Constrained — callers can only parameterize Env with True or False, matching
+# the two concrete `required` states. Anything else is a type error.
+_Req = TypeVar("_Req", Literal[True], Literal[False])
+
+
+class Env(Generic[_Req]):
+    """Capture an env-var access pattern; ref() and get() are the terminals.
+
+    Generic over the `required` flag so `.get()` returns `str` when
+    required=True and `str | None` when required=False. The required value
+    is kwarg-only to keep the `__new__` overloads unambiguous.
+    """
+
+    key: str
+
+    # Two __new__ overloads — one per Literal[required] value — let the type
+    # checker pick the right `Env[Literal[...]]` specialization at the call
+    # site. The runtime __new__ is just object.__new__; Generic is erased.
+    @overload
+    def __new__(
+        cls,
+        key: str,
+        default: str | None = None,
+        *,
+        required: Literal[True] = True,
+    ) -> Env[Literal[True]]: ...
+
+    @overload
+    def __new__(
+        cls,
+        key: str,
+        default: str | None = None,
+        *,
+        required: Literal[False],
+    ) -> Env[Literal[False]]: ...
+
+    def __new__(
+        cls,
+        key: str,
+        default: str | None = None,
+        *,
+        required: bool = True,
+    ) -> Env:
+        return object.__new__(cls)
+
+    def __init__(
+        self,
+        key: str,
+        default: str | None = None,
+        *,
+        required: bool = True,
+    ) -> None:
+        self.key = key
+        if default is not None:
+            os.environ.setdefault(key, default)
+        if required and not os.environ.get(key):
+            raise EnvLoadError(f"required env var {key} not set. Set it in your shell or GitHub Actions secrets.")
+
+    def ref(self) -> str:
+        """Return '${KEY}' for embedding in YAML.
+
+        The metadata CLI's load_config_file applies os.path.expandvars to
+        the raw YAML before parsing, so the subprocess resolves the reference
+        at load time — the rendered YAML on disk only ever contains the
+        literal reference, keeping secrets out of tmp_path artifacts.
+        """
+        return f"${{{self.key}}}"
+
+    # Two .get() overloads narrow by the specialization of Env:
+    #   Env[Literal[True]].get()  -> str          (construction validated)
+    #   Env[Literal[False]].get() -> str | None   (caller must handle None)
+    @overload
+    def get(self: Env[Literal[True]]) -> str: ...
+
+    @overload
+    def get(self: Env[Literal[False]]) -> str | None: ...
+
+    def get(self) -> str | None:
+        return os.environ.get(self.key)
diff --git a/ingestion/tests/cli_e2e_v2/core/config/pipelines.py b/ingestion/tests/cli_e2e_v2/core/config/pipelines.py
new file mode 100644
index 000000000000..e7608f593bb8
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/config/pipelines.py
@@ -0,0 +1,101 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Pipeline options — re-exports of OM's generated Pydantic pipeline models.
+
+Each pipeline maps to one Pydantic class carrying the full OM schema
+(including filter patterns, incremental flags, and pipeline-specific
+knobs). Short aliases keep test call sites compact; dispatch for CLI
+subcommand + artifact identifier goes through a single `_SPECS` map.
+
+Usage:
+
+    from ..core.config.pipelines import MetadataPipeline
+
+    cfg = base.pipeline(
+        MetadataPipeline(includeStoredProcedures=True),
+    ).with_filter(tables_include=["customers"])
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from metadata.generated.schema.metadataIngestion.databaseServiceAutoClassificationPipeline import (
+    DatabaseServiceAutoClassificationPipeline as AutoClassificationPipeline,
+)
+from metadata.generated.schema.metadataIngestion.databaseServiceMetadataPipeline import (
+    DatabaseServiceMetadataPipeline as MetadataPipeline,
+)
+from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import (
+    DatabaseServiceProfilerPipeline as ProfilerPipeline,
+)
+from metadata.generated.schema.metadataIngestion.databaseServiceQueryLineagePipeline import (
+    DatabaseServiceQueryLineagePipeline as LineagePipeline,
+)
+from metadata.generated.schema.metadataIngestion.databaseServiceQueryUsagePipeline import (
+    DatabaseServiceQueryUsagePipeline as UsagePipeline,
+)
+from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
+    TestSuitePipeline as TestPipeline,
+)
+
+PipelineOptions = (
+    MetadataPipeline | ProfilerPipeline | LineagePipeline | UsagePipeline | TestPipeline | AutoClassificationPipeline
+)
+
+
+@dataclass(frozen=True)
+class _PipelineSpec:
+    """Per-pipeline dispatch.
+
+    source_type_suffix: appended to `source.type` in the rendered YAML so
+    OM's `import_source_class` routes to the right class. For lineage and
+    usage, OM looks up `<connector>-lineage` / `<connector>-usage` in the
+    connector's ServiceSpec; everything else uses the plain connector name.
+    """
+
+    cli_subcommand: str
+    identifier: str
+    source_type_suffix: str = ""
+
+
+# Single source of truth for per-pipeline dispatch. Adding a pipeline
+# touches exactly this dict plus the re-export above.
+_SPECS: dict[type, _PipelineSpec] = {
+    MetadataPipeline: _PipelineSpec("ingest", "metadata", ""),
+    ProfilerPipeline: _PipelineSpec("profile", "profiler", ""),
+    LineagePipeline: _PipelineSpec("ingest", "lineage", "-lineage"),
+    UsagePipeline: _PipelineSpec("usage", "usage", "-usage"),
+    TestPipeline: _PipelineSpec("test", "test", ""),
+    AutoClassificationPipeline: _PipelineSpec("classify", "classify", ""),
+}
+
+
+def cli_subcommand_for(options: PipelineOptions) -> str:
+    """Return the `metadata <cmd>` subcommand to run for these options."""
+    return _SPECS[type(options)].cli_subcommand
+
+
+def pipeline_identifier(options: PipelineOptions) -> str:
+    """Short identifier for artifact filenames and invocation counters."""
+    return _SPECS[type(options)].identifier
+
+
+def source_type_suffix_for(options: PipelineOptions) -> str:
+    """Suffix to append to `source.type` for this pipeline (e.g. `-lineage`)."""
+    return _SPECS[type(options)].source_type_suffix
+
+
+__all__ = [
+    "AutoClassificationPipeline",
+    "LineagePipeline",
+    "MetadataPipeline",
+    "PipelineOptions",
+    "ProfilerPipeline",
+    "TestPipeline",
+    "UsagePipeline",
+    "cli_subcommand_for",
+    "pipeline_identifier",
+    "source_type_suffix_for",
+]
diff --git a/ingestion/tests/cli_e2e_v2/core/config/server.py b/ingestion/tests/cli_e2e_v2/core/config/server.py
new file mode 100644
index 000000000000..163cf7fa6f3e
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/config/server.py
@@ -0,0 +1,169 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Shared OpenMetadata server configuration for ingestion tests.
+
+Instance fields hold resolved values for the session HTTP client (which
+authenticates directly, no YAML indirection). Rendered YAML emits ${OM_*}
+references so cfg_*.yaml artifacts never embed raw JWTs — safe to share.
+
+Token resolution: if OM_JWT_TOKEN is exported, use it. Otherwise mint a
+long-lived ingestion-bot token from the running server (admin login →
+GET /bots/name/ingestion-bot → GET /users/auth-mechanism/{userId}). The
+minted token is signed by THIS server's keystore, so it works against
+any OM instance regardless of how it was bootstrapped — no shared dev
+keypair assumption.
+"""
+
+from __future__ import annotations
+
+import base64
+import os
+from dataclasses import dataclass
+from typing import Any, Literal
+
+import requests
+
+from ..runner.errors import E2ESetupError
+from .env import Env
+
+TokenSource = Literal["env", "minted"]
+
+_DEFAULT_OM_SERVER_URL = "http://localhost:8585/api"
+_DEFAULT_ADMIN_EMAIL = "admin@open-metadata.org"
+_DEFAULT_ADMIN_PASSWORD = "admin"
+_INGESTION_BOT_NAME = "ingestion-bot"
+_HTTP_TIMEOUT_SECONDS = 10
+
+
+class TokenMintError(E2ESetupError):
+    """Raised when the bot-token mint flow fails (login, lookup, or fetch)."""
+
+
+def _mint_ingestion_bot_token(server_url: str, admin_email: str, admin_password: str) -> str:
+    """Mint a server-signed, long-lived ingestion-bot JWT.
+
+    Three hops against the live OM server:
+      1. POST /v1/users/login → short-lived admin access token.
+      2. GET  /v1/bots/name/ingestion-bot → bot's linked user id.
+      3. GET  /v1/users/auth-mechanism/{user_id} → bot's permanent JWT.
+
+    The returned token is signed by THIS server's RSA keypair, so it
+    validates regardless of which keystore the OM instance was
+    bootstrapped with. Bot tokens have `JWTTokenExpiry: Unlimited` per
+    OM's default bot bootstrap, so they survive long test sessions.
+
+    Admin password is base64-encoded in the login payload to match OM's
+    expectation (the server decodes before bcrypt-comparing).
+    """
+    encoded_password = base64.b64encode(admin_password.encode()).decode()
+    try:
+        login = requests.post(
+            f"{server_url}/v1/users/login",
+            json={"email": admin_email, "password": encoded_password},
+            timeout=_HTTP_TIMEOUT_SECONDS,
+        )
+        login.raise_for_status()
+        admin_token = login.json()["accessToken"]
+
+        headers = {"Authorization": f"Bearer {admin_token}"}
+        bot = requests.get(
+            f"{server_url}/v1/bots/name/{_INGESTION_BOT_NAME}",
+            headers=headers,
+            timeout=_HTTP_TIMEOUT_SECONDS,
+        )
+        bot.raise_for_status()
+        bot_user_id = bot.json()["botUser"]["id"]
+
+        auth = requests.get(
+            f"{server_url}/v1/users/auth-mechanism/{bot_user_id}",
+            headers=headers,
+            timeout=_HTTP_TIMEOUT_SECONDS,
+        )
+        auth.raise_for_status()
+        return auth.json()["config"]["JWTToken"]
+    except (requests.RequestException, KeyError, ValueError) as exc:
+        raise TokenMintError(
+            f"failed to mint ingestion-bot token from {server_url}: {exc}. "
+            f"Set OM_JWT_TOKEN to bypass minting, or set OM_ADMIN_EMAIL / "
+            f"OM_ADMIN_PASSWORD if the OM instance uses non-default admin creds."
+        ) from exc
+
+
+@dataclass(frozen=True)
+class ServerConfig:
+    """Shared sinkConfig + workflowConfig.openMetadataServerConfig applied to every test.
+
+    `token_source` records how `jwt_token` was obtained — "env" when
+    OM_JWT_TOKEN was already exported at session start, "minted" when
+    from_env() had to mint via the bot-token flow. Exposed for the
+    session posture log so a developer can see at a glance which auth
+    path was taken without having to instrument the fixture.
+    """
+
+    server_url: str
+    jwt_token: str
+    token_source: TokenSource
+
+    @classmethod
+    def from_env(cls) -> ServerConfig:
+        """Resolve server URL + JWT for the session — PURE: no side effects.
+
+        OM_JWT_TOKEN, if exported, wins (escape hatch for hermetic CI or
+        deliberately scoped tokens). Otherwise mint via the bot-token
+        flow against OM_SERVER_URL using OM_ADMIN_EMAIL / OM_ADMIN_PASSWORD
+        (defaults: admin@open-metadata.org / admin — the docker-compose
+        bootstrap creds).
+
+        This method DOES NOT write OM_JWT_TOKEN back into os.environ. The
+        `om_server_config` fixture in the top-level conftest is the single
+        named place that does that install step — keeping the factory
+        pure and the ambient-env mutation explicit.
+        """
+        server_url = Env("OM_SERVER_URL", default=_DEFAULT_OM_SERVER_URL).get()
+
+        existing = os.environ.get("OM_JWT_TOKEN")
+        if existing:
+            return cls(
+                server_url=server_url,
+                jwt_token=existing,
+                token_source="env",
+            )
+
+        minted = _mint_ingestion_bot_token(
+            server_url=server_url,
+            admin_email=Env("OM_ADMIN_EMAIL", default=_DEFAULT_ADMIN_EMAIL).get(),
+            admin_password=Env("OM_ADMIN_PASSWORD", default=_DEFAULT_ADMIN_PASSWORD).get(),
+        )
+        return cls(
+            server_url=server_url,
+            jwt_token=minted,
+            token_source="minted",
+        )
+
+    def to_workflow_config_dict(self) -> dict[str, Any]:
+        """Builds the workflowConfig block for a rendered config YAML.
+
+        Emits ${OM_*} refs. metadata CLI expands them at subprocess load time;
+        the rendered YAML on disk never embeds the raw JWT.
+        """
+        return {
+            "openMetadataServerConfig": {
+                "hostPort": Env("OM_SERVER_URL").ref(),
+                "authProvider": "openmetadata",
+                "securityConfig": {"jwtToken": Env("OM_JWT_TOKEN").ref()},
+            }
+        }
+
+    def to_sink_config_dict(self) -> dict[str, Any]:
+        """Builds the sink block for a rendered config YAML.
+
+        `bulk_sink_batch_size: 1` forces the OM sink to flush each entity
+        synchronously instead of buffering up to 100. Required for the FK
+        post-process path: `yield_table_constraints` runs BEFORE the final
+        sink flush, so deferred FK lookups (`metadata.get_by_name(...)` on
+        the referred table) otherwise miss entities still sitting in the
+        buffer. Production runs usually cross the buffer threshold and
+        hide this; small E2E fixtures (<100 entities) don't.
+        """
+        return {"type": "metadata-rest", "config": {"bulk_sink_batch_size": 1}}
diff --git a/ingestion/tests/cli_e2e_v2/core/expected/__init__.py b/ingestion/tests/cli_e2e_v2/core/expected/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/core/expected/derive.py b/ingestion/tests/cli_e2e_v2/core/expected/derive.py
new file mode 100644
index 000000000000..4fa9842de50f
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/expected/derive.py
@@ -0,0 +1,112 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Derive Expected* trees from a SQLAlchemy MetaData.
+
+Replaces hand-authored `ExpectedColumn` lists in per-dialect expected
+modules. For each Table in `metadata`, builds an `ExpectedTable` with:
+  - data_type resolved via the dialect's `TypeMap` (SQLAlchemy -> OM)
+  - primary_key / constraint derived from `col.primary_key` / `col.nullable`
+  - description pulled straight from `col.comment` / `tbl.comment`
+
+Stored procedures are NOT derivable (not in MetaData) — callers pass their
+own hand-authored list into `derive_expected_service`.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from metadata.generated.schema.entity.data.table import Constraint
+
+from .type_map import TypeMap, resolve_om_type
+from .types import (
+    ExpectedColumn,
+    ExpectedDatabase,
+    ExpectedSchema,
+    ExpectedService,
+    ExpectedStoredProcedure,
+    ExpectedTable,
+)
+
+if TYPE_CHECKING:
+    from sqlalchemy import MetaData
+    from sqlalchemy.schema import Column as SqlColumn
+
+    from metadata.generated.schema.entity.services.databaseService import (
+        DatabaseServiceType,
+    )
+
+
+def derive_expected_tables(metadata: MetaData, type_map: TypeMap) -> list[ExpectedTable]:
+    """Build one ExpectedTable per Table in `metadata`.
+
+    Columns come straight off the SQLAlchemy Column — name, type (via
+    type_map), primary_key, constraint (from nullable), comment (as
+    description). Tables iterated in FK-safe order via `sorted_tables`.
+    """
+    return [
+        ExpectedTable(
+            name=tbl.name,
+            columns=[_derive_column(col, type_map) for col in tbl.columns],
+            description=tbl.comment,
+        )
+        for tbl in metadata.sorted_tables
+    ]
+
+
+def _derive_column(col: SqlColumn, type_map: TypeMap) -> ExpectedColumn:
+    return ExpectedColumn(
+        name=col.name,
+        data_type=resolve_om_type(col.type, type_map),
+        primary_key=bool(col.primary_key),
+        constraint=_constraint_for(col),
+        description=col.comment,
+    )
+
+
+def _constraint_for(col: SqlColumn) -> Constraint | None:
+    if col.primary_key:
+        return Constraint.PRIMARY_KEY
+    if not col.nullable:
+        return Constraint.NOT_NULL
+    return None
+
+
+def derive_expected_service(
+    *,
+    service_name: str,
+    service_type: DatabaseServiceType,
+    metadata: MetaData,
+    type_map: TypeMap,
+    database: str = "default",
+    schema: str | None = None,
+    views: list[ExpectedTable] | None = None,
+    stored_procedures: list[ExpectedStoredProcedure] | None = None,
+) -> ExpectedService:
+    """Build a full ExpectedService tree (service -> db -> schema -> tables + SPs).
+
+    `schema` defaults to `metadata.schema`. `views` and `stored_procedures`
+    are hand-authored — neither lives in SQLAlchemy MetaData. Views join
+    the regular table list (OM models views as Table entities with
+    tableType=View, so STRICT extras checks see them together).
+    """
+    schema_name = schema or metadata.schema
+    if schema_name is None:
+        raise ValueError("metadata has no schema — pass `schema=` explicitly")
+    return ExpectedService(
+        name=service_name,
+        service_type=service_type,
+        databases=[
+            ExpectedDatabase(
+                name=database,
+                schemas=[
+                    ExpectedSchema(
+                        name=schema_name,
+                        tables=derive_expected_tables(metadata, type_map) + (views or []),
+                        stored_procedures=stored_procedures or [],
+                    ),
+                ],
+            ),
+        ],
+    )
diff --git a/ingestion/tests/cli_e2e_v2/core/expected/differ.py b/ingestion/tests/cli_e2e_v2/core/expected/differ.py
new file mode 100644
index 000000000000..c965f27fb94c
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/expected/differ.py
@@ -0,0 +1,430 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Structural differ — walks an Expected* tree, fetches actual OM state per-level,
+collects path-qualified diffs, raises StructuralMismatch when anything doesn't match.
+
+Public surface: `assert_service_matches(expected, om, mode=SUPERSET)`.
+
+Internal shape: every node-level differ has the **uniform signature**
+`_diff_<x>(node, parent_path, om, mode, diffs)`. The single `_diff_node`
+entry point dispatches on `type(node)` via `_DIFFERS`, and each differ
+recurses into children by calling `_diff_node` on them. Adding a new
+node type (e.g. ExpectedView) is one registry entry plus one function.
+Parent-path threading is uniform: every differ receives the owning FQN
+and builds `self_fqn = f"{parent_path}.{node.name}"` the same way.
+
+Diffs use bracket-path notation (`service[foo].database[bar].table[baz].
+column[qux].dataType`) for readability in pytest failure output.
+"""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Callable
+from typing import TYPE_CHECKING
+
+from metadata.generated.schema.entity.data.database import Database
+from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
+from metadata.generated.schema.entity.data.storedProcedure import StoredProcedure
+from metadata.generated.schema.entity.data.table import Table
+from metadata.generated.schema.entity.services.databaseService import DatabaseService
+from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.ingestion.ometa.utils import model_str
+
+from .._om_compat import unwrap_root_list
+from ..source.types import Diff, DiffKind
+from .types import (
+    ExpectedColumn,
+    ExpectedDatabase,
+    ExpectedSchema,
+    ExpectedService,
+    ExpectedStoredProcedure,
+    ExpectedTable,
+    MatchMode,
+)
+
+if TYPE_CHECKING:
+    from ..fluent.om_client import OmClient
+
+
+class StructuralMismatch(AssertionError):  # noqa: N818  (intentional API surface — public exception name)
+    """Aggregate assertion error carrying all collected diffs.
+
+    Renders with a summary header (counts by category) and path-sorted body
+    grouped by owning entity — so a failure with 20 column diffs is
+    scannable rather than a wall of text.
+    """
+
+    def __init__(self, diffs: list[Diff]) -> None:
+        self.diffs = list(diffs)
+        super().__init__(self._format(self.diffs))
+
+    @staticmethod
+    def _format(diffs: list[Diff]) -> str:
+        if not diffs:
+            return "StructuralMismatch: (no diffs)"
+
+        sorted_diffs = sorted(diffs, key=lambda d: d.path)
+        classified = [(d, *_classify_path(d.path)) for d in sorted_diffs]
+
+        # Header: category counts, most-frequent first, alphabetical on ties.
+        counts: dict[str, int] = {}
+        for _, _, category in classified:
+            counts[category] = counts.get(category, 0) + 1
+        summary = ", ".join(
+            f"{n} {cat}{'' if n == 1 else 's'}" for cat, n in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
+        )
+        header = f"StructuralMismatch: {len(sorted_diffs)} diff{'' if len(sorted_diffs) == 1 else 's'} ({summary})"
+
+        # Body: diffs grouped by owning-entity scope.
+        body_lines: list[str] = []
+        last_scope: str | None = None
+        for d, scope, _ in classified:
+            if last_scope is not None and scope != last_scope:
+                body_lines.append("")  # blank line between entity scopes
+            last_scope = scope
+            body_lines.append(str(d))
+
+        return header + "\n" + "\n".join(body_lines)
+
+
+# One table driving both category tally and scope clustering.
+#   token:    substring searched for in the path string
+#   category: label used in the summary header
+#   is_scope: whether this level counts as an owning-entity scope (the
+#             body groups diffs by the finest scope-level bracket segment).
+#             Column / seed diffs are category buckets but NOT scope
+#             levels — they cluster under their owning table.
+# Ordered from finest-grained to coarsest; both passes walk top-to-bottom
+# so the first hit wins for category and scope alike.
+_PATH_LEVELS: tuple[tuple[str, str, bool], ...] = (
+    (".column[", "column", False),
+    (".seed", "seed", False),
+    ("procedure[", "procedure", True),
+    ("view[", "view", True),
+    ("table[", "table", True),
+    ("schema[", "schema", True),
+    ("database[", "database", True),
+    ("service[", "service", True),
+)
+
+
+def _classify_path(path: str) -> tuple[str, str]:
+    """Return (scope, category) for a diff path in one pass.
+
+    `category` = the finest-grained level token present in the path,
+    used for the summary-line tally.
+    `scope` = the owning-entity bracket segment (e.g. `table[customers]`),
+    used to cluster related diffs in the failure body. Columns and seeds
+    collapse into their owning table's scope rather than introducing a
+    scope of their own. Falls back to the whole path when no bracket
+    token matches.
+    """
+    category: str | None = None
+    scope: str | None = None
+    for token, label, is_scope in _PATH_LEVELS:
+        if token not in path:
+            continue
+        if category is None:
+            category = label
+        if is_scope and scope is None:
+            m = re.search(rf"{re.escape(token)}[^\]]+\]", path)
+            if m:
+                scope = m.group(0)
+    return scope or path, category or "service"
+
+
+def assert_service_matches(
+    expected: ExpectedService,
+    om: OmClient,
+    *,
+    mode: MatchMode = MatchMode.SUPERSET,
+) -> None:
+    """Walk `expected`, fetch actual state via `om.raw`, raise StructuralMismatch on diffs.
+
+    SUPERSET (default): extras in actual are tolerated (cloud drift, unrelated tables).
+    STRICT: actual must equal expected exactly.
+    """
+    diffs: list[Diff] = []
+    _diff_node(expected, parent_path="", om=om.raw, mode=mode, diffs=diffs)
+    if diffs:
+        raise StructuralMismatch(diffs)
+
+
+# -----------------------------------------------------------------------------
+# Node dispatch
+# -----------------------------------------------------------------------------
+
+
+_NodeDiffer = Callable[[object, str, OpenMetadata, MatchMode, list[Diff]], None]
+
+_STRICT_LIST_LIMIT = 1000
+
+
+def _diff_node(
+    node: object,
+    parent_path: str,
+    om: OpenMetadata,
+    mode: MatchMode,
+    diffs: list[Diff],
+) -> None:
+    """Dispatch entry — looks up the per-type differ in `_DIFFERS`.
+
+    Unknown node types are a plan bug, not a runtime condition — raising
+    TypeError surfaces the mismatch at author time.
+    """
+    differ = _DIFFERS.get(type(node))
+    if differ is None:
+        raise TypeError(f"no differ registered for {type(node).__name__}; add an entry to _DIFFERS in differ.py")
+    differ(node, parent_path, om, mode, diffs)
+
+
+# -----------------------------------------------------------------------------
+# Per-node differs — all have the same signature
+#                    (node, parent_path, om, mode, diffs)
+# -----------------------------------------------------------------------------
+
+
+def _diff_service(
+    node: object,
+    parent_path: str,
+    om: OpenMetadata,
+    mode: MatchMode,
+    diffs: list[Diff],
+) -> None:
+    assert isinstance(node, ExpectedService)
+    assert parent_path == "", "ExpectedService must be the root node"
+    self_fqn = node.name
+    path = f"service[{node.name}]"
+
+    actual = om.get_by_name(entity=DatabaseService, fqn=self_fqn)
+    if actual is None:
+        diffs.append(Diff(path=path, kind=DiffKind.MISSING))
+        return
+    if actual.serviceType != node.service_type:
+        diffs.append(Diff(path=f"{path}.serviceType", expected=node.service_type, actual=actual.serviceType))
+
+    for child in node.databases:
+        _diff_node(child, self_fqn, om, mode, diffs)
+
+    if mode == MatchMode.STRICT:
+        _check_strict_extras(
+            entity_cls=Database,
+            expected_names={d.name for d in node.databases},
+            list_params={"service": self_fqn},
+            path_fmt=f"{path}.database[{{name}}](strict)",
+            om=om,
+            diffs=diffs,
+        )
+
+
+def _diff_database(
+    node: object,
+    parent_path: str,
+    om: OpenMetadata,
+    mode: MatchMode,
+    diffs: list[Diff],
+) -> None:
+    assert isinstance(node, ExpectedDatabase)
+    self_fqn = f"{parent_path}.{node.name}"
+    path = f"service[{parent_path}].database[{node.name}]"
+
+    actual = om.get_by_name(entity=Database, fqn=self_fqn)
+    if actual is None:
+        diffs.append(Diff(path=path, kind=DiffKind.MISSING))
+        return
+
+    for child in node.schemas:
+        _diff_node(child, self_fqn, om, mode, diffs)
+
+    if mode == MatchMode.STRICT:
+        _check_strict_extras(
+            entity_cls=DatabaseSchema,
+            expected_names={s.name for s in node.schemas},
+            list_params={"database": self_fqn},
+            path_fmt=f"{self_fqn}.schema[{{name}}](strict)",
+            om=om,
+            diffs=diffs,
+        )
+
+
+def _diff_schema(
+    node: object,
+    parent_path: str,
+    om: OpenMetadata,
+    mode: MatchMode,
+    diffs: list[Diff],
+) -> None:
+    assert isinstance(node, ExpectedSchema)
+    self_fqn = f"{parent_path}.{node.name}"
+    path = f"{parent_path}.schema[{node.name}]"
+
+    actual = om.get_by_name(entity=DatabaseSchema, fqn=self_fqn)
+    if actual is None:
+        diffs.append(Diff(path=path, kind=DiffKind.MISSING))
+        return
+
+    for child in node.tables:
+        _diff_node(child, self_fqn, om, mode, diffs)
+    for child in node.stored_procedures:
+        _diff_node(child, self_fqn, om, mode, diffs)
+
+    if mode == MatchMode.STRICT:
+        _check_strict_extras(
+            entity_cls=Table,
+            expected_names={t.name for t in node.tables},
+            list_params={"databaseSchema": self_fqn},
+            path_fmt=f"{path}.table[{{name}}](strict)",
+            om=om,
+            diffs=diffs,
+        )
+        _check_strict_extras(
+            entity_cls=StoredProcedure,
+            expected_names={sp.name for sp in node.stored_procedures},
+            list_params={"databaseSchema": self_fqn},
+            path_fmt=f"{path}.procedure[{{name}}](strict)",
+            om=om,
+            diffs=diffs,
+        )
+
+
+def _diff_table(
+    node: object,
+    parent_path: str,
+    om: OpenMetadata,
+    mode: MatchMode,
+    diffs: list[Diff],
+) -> None:
+    assert isinstance(node, ExpectedTable)
+    self_fqn = f"{parent_path}.{node.name}"
+    path = f"table[{node.name}]"
+
+    actual = om.get_by_name(entity=Table, fqn=self_fqn, fields=["tags", "owners", "columns"])
+    if actual is None:
+        diffs.append(Diff(path=path, kind=DiffKind.MISSING))
+        return
+
+    # owner (single-owner check — matches when exp.owner appears in any actual owner)
+    if node.owner is not None:
+        actual_owners = {o.name for o in unwrap_root_list(actual.owners)}
+        if node.owner not in actual_owners:
+            diffs.append(Diff(path=f"{path}.owner", expected=node.owner, actual=sorted(actual_owners)))
+
+    # tags (subset match — all expected tags must be present).
+    if node.tags:
+        actual_tags = {model_str(t.tagFQN) for t in unwrap_root_list(actual.tags)}
+        if node.tags - actual_tags:
+            diffs.append(Diff(path=f"{path}.tags", expected=sorted(node.tags), actual=sorted(actual_tags)))
+
+    # description (substring match per Decision #16)
+    if node.description is not None:
+        actual_desc = model_str(actual.description) if actual.description else ""
+        if node.description not in actual_desc:
+            diffs.append(
+                Diff(path=f"{path}.description", expected=f"contains {node.description!r}", actual=actual_desc)
+            )
+
+    # columns — no separate OM fetch; walk the actual.columns set in place.
+    actual_columns_by_name = {model_str(c.name): c for c in unwrap_root_list(actual.columns)}
+    for exp_col in node.columns:
+        _diff_column(exp_col, path, actual_columns_by_name, diffs)
+
+    if mode == MatchMode.STRICT:
+        expected_names = {c.name for c in node.columns}
+        extra = set(actual_columns_by_name.keys()) - expected_names
+        if extra:
+            diffs.append(
+                Diff(
+                    path=f"{path}.columns(strict)",
+                    kind=DiffKind.UNEXPECTED,
+                    actual=sorted(extra),
+                )
+            )
+
+
+def _diff_stored_procedure(
+    node: object,
+    parent_path: str,
+    om: OpenMetadata,
+    mode: MatchMode,
+    diffs: list[Diff],
+) -> None:
+    assert isinstance(node, ExpectedStoredProcedure)
+    self_fqn = f"{parent_path}.{node.name}"
+    path = f"procedure[{node.name}]"
+
+    actual = om.get_by_name(entity=StoredProcedure, fqn=self_fqn)
+    if actual is None:
+        diffs.append(Diff(path=path, kind=DiffKind.MISSING))
+        return
+
+    if node.description is not None:
+        actual_desc = model_str(actual.description) if actual.description else ""
+        if node.description not in actual_desc:
+            diffs.append(
+                Diff(path=f"{path}.description", expected=f"contains {node.description!r}", actual=actual_desc)
+            )
+
+
+# Column-level diffs don't fetch from OM and don't recurse, so they're NOT
+# registered in _DIFFERS. `_diff_table` calls this helper directly for each
+# expected column with the already-fetched `actual.columns` dict.
+def _diff_column(
+    exp_col: ExpectedColumn,
+    table_path: str,
+    actual_columns_by_name: dict,
+    diffs: list[Diff],
+) -> None:
+    path = f"{table_path}.column[{exp_col.name}]"
+    actual = actual_columns_by_name.get(exp_col.name)
+    if actual is None:
+        diffs.append(Diff(path=path, kind=DiffKind.MISSING))
+        return
+    if actual.dataType != exp_col.data_type:
+        diffs.append(Diff(path=f"{path}.dataType", expected=exp_col.data_type, actual=actual.dataType))
+    if exp_col.constraint is not None and actual.constraint != exp_col.constraint:
+        diffs.append(Diff(path=f"{path}.constraint", expected=exp_col.constraint, actual=actual.constraint))
+    if exp_col.tags:
+        actual_tags = {model_str(t.tagFQN) for t in unwrap_root_list(actual.tags)}
+        if exp_col.tags - actual_tags:
+            diffs.append(Diff(path=f"{path}.tags", expected=sorted(exp_col.tags), actual=sorted(actual_tags)))
+    if exp_col.description is not None:
+        actual_desc = model_str(actual.description) if actual.description else ""
+        if exp_col.description not in actual_desc:
+            diffs.append(
+                Diff(path=f"{path}.description", expected=f"contains {exp_col.description!r}", actual=actual_desc)
+            )
+
+
+def _check_strict_extras(
+    *,
+    entity_cls: type,
+    expected_names: set[str],
+    list_params: dict[str, str],
+    path_fmt: str,
+    om: OpenMetadata,
+    diffs: list[Diff],
+) -> None:
+    """Flag actual entities under a parent that weren't declared as expected.
+
+    `path_fmt` must contain a `{name}` slot filled with each extra entity's
+    name at emit time. Pagination: capped at _STRICT_LIST_LIMIT — fine for
+    e2e-sized services.
+    """
+    for actual in om.list_all_entities(entity=entity_cls, params=list_params, limit=_STRICT_LIST_LIMIT):
+        name = model_str(actual.name)
+        if name in expected_names:
+            continue
+        diffs.append(Diff(path=path_fmt.format(name=name), kind=DiffKind.UNEXPECTED))
+
+
+# Registry is declared AFTER the per-node differs so it can reference them
+# by name. Adding a new node type = one function above + one entry here.
+_DIFFERS: dict[type, _NodeDiffer] = {
+    ExpectedService: _diff_service,
+    ExpectedDatabase: _diff_database,
+    ExpectedSchema: _diff_schema,
+    ExpectedTable: _diff_table,
+    ExpectedStoredProcedure: _diff_stored_procedure,
+}
diff --git a/ingestion/tests/cli_e2e_v2/core/expected/type_map.py b/ingestion/tests/cli_e2e_v2/core/expected/type_map.py
new file mode 100644
index 000000000000..8d84f1e075ef
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/expected/type_map.py
@@ -0,0 +1,82 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""SQLAlchemy -> OM DataType map.
+
+Used by `derive_expected_tables` to build Expected trees directly from the
+baseline's SQLAlchemy MetaData. `CORE_TYPE_MAP` covers the portable types
+used in `common_baseline.py`; each dialect's expected module extends it
+with dialect-specific classes (e.g. `mysql.MEDIUMINT`, `mysql.ENUM`).
+
+Resolution walks the SQLAlchemy type's MRO so subclasses inherit parent
+entries unless explicitly overridden.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import (
+    CHAR,
+    JSON,
+    TIMESTAMP,
+    BigInteger,
+    Boolean,
+    Date,
+    DateTime,
+    Enum,
+    Float,
+    Integer,
+    LargeBinary,
+    Numeric,
+    SmallInteger,
+    String,
+    Text,
+    Time,
+)
+
+from metadata.generated.schema.entity.data.table import DataType
+
+TypeMap = dict[type, DataType]
+
+
+# CORE entries marked with (via MRO) are the ones that let dialect maps
+# DROP their equivalent `dialects.<x>.FOO` entry: mysql.JSON / mysql.ENUM /
+# mysql.BLOB / mysql.TIMESTAMP all inherit from these core classes, so the
+# MRO walk in `resolve_om_type` hits the core entry without needing a
+# dialect duplicate. Dialect-specific size variants (MEDIUMTEXT, LONGBLOB,
+# TINYINT, etc.) still need per-dialect entries — they extend PRIVATE
+# bases (`_StringType`, `_Binary`) that MRO skips past the public
+# `String` / `LargeBinary`, or they want a more-specific OM DataType than
+# the core parent yields.
+CORE_TYPE_MAP: TypeMap = {
+    Integer: DataType.INT,
+    BigInteger: DataType.BIGINT,
+    SmallInteger: DataType.SMALLINT,
+    String: DataType.VARCHAR,
+    Text: DataType.TEXT,
+    CHAR: DataType.CHAR,
+    Date: DataType.DATE,
+    DateTime: DataType.DATETIME,
+    Time: DataType.TIME,
+    TIMESTAMP: DataType.TIMESTAMP,  # via MRO: mysql.TIMESTAMP, pg.TIMESTAMP
+    Numeric: DataType.DECIMAL,
+    Float: DataType.FLOAT,
+    Boolean: DataType.BOOLEAN,  # dialect overrides (e.g. MySQL: TINYINT)
+    Enum: DataType.ENUM,  # via MRO: mysql.ENUM, pg.ENUM
+    JSON: DataType.JSON,  # via MRO: mysql.JSON, pg.JSON
+    LargeBinary: DataType.BLOB,  # via MRO: mysql.BLOB
+}
+
+
+def resolve_om_type(col_type: object, type_map: TypeMap) -> DataType:
+    """Return the OM DataType for a SQLAlchemy column-type instance.
+
+    Walks the instance's class MRO, returning the first match in `type_map`.
+    Raises ValueError naming the unmapped class when no ancestor matches —
+    the dialect map just needs one new line added.
+    """
+    for cls in type(col_type).__mro__:
+        if cls in type_map:
+            return type_map[cls]
+    raise ValueError(
+        f"no OM DataType mapping for SQLAlchemy type {type(col_type).__name__}. Add an entry to the dialect's type map."
+    )
diff --git a/ingestion/tests/cli_e2e_v2/core/expected/types.py b/ingestion/tests/cli_e2e_v2/core/expected/types.py
new file mode 100644
index 000000000000..5dde7fe6cda1
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/expected/types.py
@@ -0,0 +1,101 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Declarative dataclasses describing expected OM-side state post-ingestion.
+
+Per Decision #4 of the v2 spec: these reuse OM's Pydantic value types
+(DataType, Constraint, DatabaseServiceType) for fields that map to schema
+enums — automatic drift-safety whenever the generated schema updates.
+They deliberately expose ONLY fields tests assert on, not every field on
+the underlying OM entity (Table alone has 30+ fields, most noise for a
+structural spec).
+
+Rules enforced:
+  - Fields that map to OM schema enums MUST use the OM enum type.
+  - Fields that don't map to enums stay as plain Python types.
+  - Unset / None means "don't assert this field" — differ skips it.
+  - For string fields like description, non-None means substring-match
+    (Decision #16), not exact equality.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from metadata.generated.schema.entity.data.table import Constraint, DataType
+    from metadata.generated.schema.entity.services.databaseService import (
+        DatabaseServiceType,
+    )
+
+
+class MatchMode(Enum):
+    """Controls how strictly the structural differ treats "extra" entities in actual.
+
+    - STRICT: actual must equal expected exactly — any unexpected table or column
+      flags as a diff. Used for filter tests where we care the filter eliminated
+      unwanted entities.
+    - SUPERSET (default): actual ⊇ expected. Extras are tolerated; only missing
+      or mismatched entities flag. Right for cloud accounts where shared schemas
+      may accumulate unrelated tables over time.
+    """
+
+    STRICT = "strict"
+    SUPERSET = "superset"
+
+
+@dataclass(frozen=True)
+class ExpectedColumn:
+    """A single column's expected shape in OM."""
+
+    name: str
+    data_type: DataType
+    tags: frozenset[str] = field(default_factory=frozenset)
+    constraint: Constraint | None = None
+    description: str | None = None  # None = don't assert; str = substring match
+    primary_key: bool = False
+
+
+@dataclass(frozen=True)
+class ExpectedTable:
+    """A single table's expected shape in OM.
+
+    Column matching is always by-name (dict lookup). Use STRICT match mode
+    on the differ to fail when actual tables carry unexpected extra columns.
+    """
+
+    name: str
+    columns: list[ExpectedColumn]
+    owner: str | None = None
+    tags: frozenset[str] = field(default_factory=frozenset)
+    description: str | None = None
+
+
+@dataclass(frozen=True)
+class ExpectedStoredProcedure:
+    """A single stored procedure's expected presence in OM."""
+
+    name: str
+    description: str | None = None  # None = don't assert; str = substring match
+
+
+@dataclass(frozen=True)
+class ExpectedSchema:
+    name: str
+    tables: list[ExpectedTable]
+    stored_procedures: list[ExpectedStoredProcedure] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class ExpectedDatabase:
+    name: str
+    schemas: list[ExpectedSchema]
+
+
+@dataclass(frozen=True)
+class ExpectedService:
+    name: str
+    service_type: DatabaseServiceType
+    databases: list[ExpectedDatabase]
diff --git a/ingestion/tests/cli_e2e_v2/core/filter_scenarios.py b/ingestion/tests/cli_e2e_v2/core/filter_scenarios.py
new file mode 100644
index 000000000000..ef96e0931bf3
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/filter_scenarios.py
@@ -0,0 +1,111 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Portable filter scenarios for per-connector `test_filter` parametrization.
+
+Every SQL connector that ships a `customers` / `transactions` baseline
+pair (all of them — see `core/source/common_baseline.py`) can run the
+same matrix of filter semantics: include-exact, exclude-exact, schema-
+only, and regex include+exclude with exclude priority.
+
+Shape:
+  - `FilterScenario.filter_kwargs` is PORTABLE — only mentions table /
+    schema names that exist in the common baseline.
+  - Expected-tables per variant is NOT portable (baselines add
+    connector-specific tables like MySQL's `all_types`, Postgres's
+    future `geom_table`, etc.). Each connector's test module maps
+    scenario.variant → its own expected-tables list.
+
+This keeps the filter-semantics matrix declared once: when we add a
+fifth scenario (e.g. "include + exclude same pattern"), it's one edit
+here that all connectors pick up.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass(frozen=True)
+class FilterScenario:
+    """One row in the filter-parametrize matrix.
+
+    variant:       short token used as the service-name suffix (keeps
+                   per-variant OM services isolated for STRICT-mode
+                   extras detection).
+    id:            human-readable pytest id for the test report.
+    filter_kwargs: kwargs to pass straight into `WorkflowConfig.with_filter`.
+    """
+
+    variant: str
+    id: str
+    filter_kwargs: dict[str, list[str]] = field(default_factory=dict)
+
+
+def expected_tables_for(
+    scenario: FilterScenario,
+    mapping: dict[str, list[str] | None],
+    *,
+    connector: str,
+) -> list[str] | None:
+    """Safe lookup for per-connector `_EXPECTED_TABLES_BY_VARIANT` dicts.
+
+    When a new `FilterScenario` is added to `COMMON_FILTER_SCENARIOS`, every
+    connector must supply a corresponding entry in its per-connector
+    mapping. A missing entry manifests as `KeyError` at test collection
+    time, which hides the actionable message — this helper turns it into
+    an AssertionError naming the connector, the missing variant, and the
+    fix location so a junior can resolve it in one read.
+    """
+    try:
+        return mapping[scenario.variant]
+    except KeyError as exc:
+        raise AssertionError(
+            f"[{connector}] no expected_tables entry for filter scenario "
+            f"{scenario.variant!r} (pytest id: {scenario.id}). Add it to "
+            f"the connector's _EXPECTED_TABLES_BY_VARIANT mapping."
+        ) from exc
+
+
+# Ordered by increasing complexity so a failing earlier scenario typically
+# points at a more-fundamental issue than a failing later one.
+COMMON_FILTER_SCENARIOS: tuple[FilterScenario, ...] = (
+    FilterScenario(
+        variant="inc_exact",
+        id="tables_include_exact",
+        filter_kwargs={
+            "schemas_include": ["e2e"],
+            "tables_include": ["customers"],
+        },
+    ),
+    FilterScenario(
+        variant="exc_exact",
+        id="tables_exclude_exact",
+        # `transactions` is guaranteed present in every SQL baseline via
+        # common_baseline; connectors that add dialect-specific tables
+        # (e.g. MySQL's `all_types`) should include those in the expected
+        # list for this variant on their side.
+        filter_kwargs={
+            "schemas_include": ["e2e"],
+            "tables_exclude": ["transactions"],
+        },
+    ),
+    FilterScenario(
+        variant="sch_inc",
+        id="schemas_include_only_e2e",
+        filter_kwargs={"schemas_include": ["e2e"]},
+    ),
+    FilterScenario(
+        variant="regex_prio",
+        id="regex_exclude_has_priority_over_include",
+        # include=customer.* matches both `customers` and any view
+        # starting with `customer_` (e.g. MySQL's `customer_txn_summary`);
+        # exclude=customer_txn.* trims the latter. With exclude priority,
+        # only `customers` should survive.
+        filter_kwargs={
+            "schemas_include": ["e2e"],
+            "tables_include": ["customer.*"],
+            "tables_exclude": ["customer_txn.*"],
+        },
+    ),
+)
diff --git a/ingestion/tests/cli_e2e_v2/core/fixtures.py b/ingestion/tests/cli_e2e_v2/core/fixtures.py
new file mode 100644
index 000000000000..2167d709b97e
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fixtures.py
@@ -0,0 +1,81 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Shared helpers for per-connector pytest fixtures.
+
+Per-connector conftests (`<connector>/conftest.py`) wire their own
+`<connector>_source_ready` and `<connector>_metadata_ingested` fixtures on
+top of these helpers instead of copy-pasting the body.
+
+Design:
+  - These are PLAIN FUNCTIONS (not pytest fixtures). The per-connector
+    conftest is still where pytest scoping (`scope="session"` /
+    `scope="module"`) lives — otherwise pytest couldn't build the
+    dependency graph. The helpers carry just the body.
+  - `run_source_baseline` takes a zero-arg policy factory so each
+    connector's `get_policy` stays lazy (its engine shouldn't be
+    constructed at module import time).
+  - `metadata_ingest_once` applies an optional filter overlay so the
+    vast majority of connectors can pass `schemas_include=[...]`
+    without writing a one-off pipeline-chain-and-run code block.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from .runner.cli_runner import CliRunner
+from .source.orchestrator import EnforcementPolicy, ensure_baseline
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    import pytest
+
+    from .config.builder import WorkflowConfig
+    from .config.pipelines import PipelineOptions
+    from .source.types import BaselineSpec
+
+
+def run_source_baseline(
+    policy_factory: Callable[[], EnforcementPolicy],
+    baseline: BaselineSpec,
+    *,
+    connector_name: str,
+) -> None:
+    """Thin wrapper around `ensure_baseline` for per-connector `source_ready` fixtures.
+
+    The factory indirection keeps engine construction lazy — `get_policy`
+    opens a SQLAlchemy engine, and we don't want that happening at module
+    import time (pytest collects conftests eagerly).
+    """
+    ensure_baseline(policy_factory(), baseline, connector_name=connector_name)
+
+
+def metadata_ingest_once(
+    tmp_path_factory: pytest.TempPathFactory,
+    cfg: WorkflowConfig,
+    registered_services: list[str],
+    *,
+    service_name: str,
+    pipeline_options: PipelineOptions,
+    filter_kwargs: dict | None = None,
+    label: str = "metadata",
+) -> None:
+    """Run one metadata CLI ingest and assert success.
+
+    Registers `service_name` for session-end cleanup so individual tests
+    don't need to. `label` controls the tmp-path prefix and failure-
+    message wording — pass the connector name for readable artifacts
+    (e.g. `mysql_ingest0/`).
+    """
+    if service_name not in registered_services:
+        registered_services.append(service_name)
+
+    pipeline_cfg = cfg.pipeline(pipeline_options)
+    if filter_kwargs:
+        pipeline_cfg = pipeline_cfg.with_filter(**filter_kwargs)
+
+    runner = CliRunner(tmp_path_factory.mktemp(f"{label}_ingest"))
+    status = runner.run(pipeline_cfg)
+    assert status.success, f"module-scoped {label} metadata ingest failed: {status.all_failures}"
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/__init__.py b/ingestion/tests/cli_e2e_v2/core/fluent/__init__.py
new file mode 100644
index 000000000000..91e839083a15
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/__init__.py
@@ -0,0 +1,74 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Fluent assertion API for CLI E2E v2 tests.
+
+Entry point: `om_client` fixture → `OmClient`. Every fluent chain starts
+with one of `.table(fqn)`, `.service(name)`, or `.stored_procedure(fqn)`.
+
+Sync vs eventually
+------------------
+Most terminals are **synchronous** — they assume the data is already in OM
+(typical after a completed metadata ingest). A few entity domains are
+**eventually-consistent** and must be wrapped in `.eventually(timeout=60)`
+one-shot arming:
+
+  eventually: profile, lineage, foreign-key constraint, service entity count
+  sync only:  column.has_type, column.has_tag, stored_procedure, structural
+
+Arming is ONE-SHOT: it applies to the very next terminal in the chain and
+resets afterward. Arm again for each eventually-polled assertion.
+
+Assertion catalog
+-----------------
+
+  # Table (om_client.table(fqn) -> TableAssert)
+  .exists()                                              # sync
+  .get() -> Table                                        # escape hatch, returns raw entity
+  .has_description_containing(text)                      # sync or eventually
+  .has_tag(tag_fqn)                                      # sync or eventually
+  .has_owner(name)                                       # sync or eventually
+  .eventually(60).has_foreign_key_constraint(column=..., referenced_table=..., referenced_column=...)
+
+  # Column (via table.column(name) -> ColumnAssert, sync only)
+  .has_type(DataType.X)
+  .has_tag(tag_fqn)
+  .has_description_containing(text)
+
+  # Profile (via table.profile, MUST arm with .eventually())
+  table.profile.eventually(60).row_count().equals(N)
+  table.profile.eventually(60).row_count().at_least(N)
+  table.profile.eventually(60).row_count().between(lo, hi)
+
+  # Lineage (via table.lineage, MUST arm with .eventually())
+  table.lineage.eventually(60).has_upstream(fqn)
+  table.lineage.eventually(60).has_downstream(fqn)
+  table.lineage.eventually(60).has_column_lineage(source_col, target_col)
+
+  # Service (om_client.service(name) -> ServiceAssert)
+  .exists()
+  .has_description_containing(text)
+  .eventually(60).has_entity_count("tables", at_least=N)
+  .eventually(60).has_entity_count("schemas", at_least=N)
+
+  # Stored procedure (om_client.stored_procedure(fqn) -> StoredProcedureAssert)
+  .exists()
+  .has_description_containing(text)
+  .has_code_containing(text)
+
+Structural differ (a different entry point, not fluent)
+-------------------------------------------------------
+  from ..core.expected.differ import assert_service_matches, MatchMode
+  assert_service_matches(expected_tree, om_client)                         # SUPERSET (default)
+  assert_service_matches(expected_tree, om_client, mode=MatchMode.STRICT)  # filter tests
+
+Walks the entire Expected* tree at once and raises `StructuralMismatch`
+collecting every diff — use this over one-off chains when you're
+verifying "the whole catalog looks right."
+
+Extending
+---------
+New entity namespace (e.g. `DqAssert`): inherit `EntityAssert[T]` if the
+class IS the entity; compose an `EventuallyRunner` directly if it's a
+namespace hanging off a parent (see LineageAssert / ProfileAssert).
+"""
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/entity_assert.py b/ingestion/tests/cli_e2e_v2/core/fluent/entity_assert.py
new file mode 100644
index 000000000000..b92e8abee3f0
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/entity_assert.py
@@ -0,0 +1,78 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Shared base for fluent entity-assertion classes.
+
+`EntityAssert[T]` hoists the 20 lines of boilerplate every entity-assert
+class shared into one place: the om/fqn/runner constructor, `_fetch()` +
+`exists()` + `get()`, one-shot `.eventually(timeout)`, and the ubiquitous
+`has_description_containing(text)` terminal.
+
+Subclasses declare:
+  - `_entity_cls: type[T]`   -- the OM Pydantic class (e.g. Table)
+  - `_default_fields: list[str]` -- fields to request from the OM API
+
+Entity-specific terminals (e.g. TableAssert.has_foreign_key_constraint,
+ServiceAssert.has_entity_count, StoredProcedureAssert.has_code_containing)
+stay on the subclass.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, ClassVar, Generic, TypeVar
+
+from metadata.ingestion.ometa.utils import model_str
+
+from .eventually import EventuallyRunner
+
+if TYPE_CHECKING:
+    from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+T = TypeVar("T")
+
+
+class EntityAssert(Generic[T]):
+    """Base class carrying fluent terminals shared by every entity-assert."""
+
+    _entity_cls: type[T]
+    _default_fields: ClassVar[list[str]] = []
+
+    def __init__(self, om: OpenMetadata, fqn: str) -> None:
+        self._om = om
+        self._fqn = fqn
+        self._eventually = EventuallyRunner()
+
+    def eventually(self, timeout: int = 60):
+        """One-shot: the next terminal polls until success/timeout."""
+        self._eventually.arm(timeout)
+        return self
+
+    def _fetch(self, *, fields: list[str] | None = None) -> T:
+        entity = self._om.get_by_name(
+            entity=self._entity_cls,
+            fqn=self._fqn,
+            fields=fields if fields is not None else self._default_fields,
+        )
+        if entity is None:
+            raise AssertionError(f"{self._entity_cls.__name__} not found: {self._fqn}")
+        return entity
+
+    def exists(self) -> None:
+        """Synchronous — primary API is consistent immediately post-ingest."""
+        self._fetch()
+
+    def get(self) -> T:
+        """Escape hatch — returns the raw Pydantic entity."""
+        return self._fetch()
+
+    def has_description_containing(self, text: str):
+        def _check() -> None:
+            entity = self._fetch()
+            desc = model_str(entity.description) if entity.description else ""
+            if text not in desc:
+                raise AssertionError(
+                    f"{self._entity_cls.__name__} {self._fqn} description does not contain {text!r}. Actual: {desc!r}"
+                )
+
+        self._eventually.run(_check, name=f"has_description_containing({text!r})")
+        return self
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/eventually.py b/ingestion/tests/cli_e2e_v2/core/fluent/eventually.py
new file mode 100644
index 000000000000..f36fbb1dcf4f
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/eventually.py
@@ -0,0 +1,120 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Polling primitives for fluent assertion chains.
+
+`retry_until` is the low-level deadline-based retry. `EventuallyRunner` is
+a one-shot arming wrapper held by each fluent assert class to dispatch
+terminal checks either synchronously or via `retry_until`.
+
+Logging levels:
+  - DEBUG   first-attempt failure (the single "starting to retry" signal)
+  - INFO    every attempt when E2E_POLL_VERBOSE=1 — surfaces intermittent
+            flakes that otherwise disappear into DEBUG. Use it in CI when
+            a poll is blinking without obvious cause.
+  - ERROR   final timeout
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import time
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, TypeVar
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECONDS = 60
+DEFAULT_POLL_INTERVAL_SECONDS = 2.0
+
+T = TypeVar("T")
+
+
+def _verbose_polling() -> bool:
+    """Reads E2E_POLL_VERBOSE at call time so the env var can be toggled
+    within a single pytest session via monkeypatch if needed."""
+    return os.environ.get("E2E_POLL_VERBOSE", "").lower() in ("1", "true", "yes")
+
+
+def retry_until(
+    check: Callable[[], T],
+    *,
+    timeout: int = DEFAULT_TIMEOUT_SECONDS,
+    poll_interval: float = DEFAULT_POLL_INTERVAL_SECONDS,
+    name: str = "check",
+) -> T:
+    """Retry `check` until it returns without raising AssertionError.
+
+    AssertionError signals "not ready yet"; any other exception propagates
+    immediately. Returns the first successful check's return value.
+    """
+    start = time.monotonic()
+    deadline = start + timeout
+    attempts = 0
+    verbose = _verbose_polling()
+
+    while True:
+        attempts += 1
+        try:
+            return check()
+        except AssertionError as exc:
+            if attempts == 1:
+                logger.debug(
+                    "[eventually:%s] attempt %d failed: %s (retrying for up to %ds)",
+                    name,
+                    attempts,
+                    exc,
+                    timeout,
+                )
+            if verbose:
+                elapsed = time.monotonic() - start
+                logger.info(
+                    "[eventually:%s] attempt %d failed at %.1fs: %s",
+                    name,
+                    attempts,
+                    elapsed,
+                    exc,
+                )
+            if time.monotonic() >= deadline:
+                elapsed = time.monotonic() - start
+                logger.error(
+                    "[eventually:%s] gave up after %d attempts in %.1fs: %s",
+                    name,
+                    attempts,
+                    elapsed,
+                    exc,
+                )
+                raise AssertionError(
+                    f"eventually[{name}] timed out after {attempts} attempts "
+                    f"over {elapsed:.1f}s (timeout={timeout}s, "
+                    f"poll_interval={poll_interval}s).\n"
+                    f"Last failure:\n{exc}"
+                ) from exc
+            time.sleep(poll_interval)
+
+
+@dataclass
+class EventuallyRunner:
+    """One-shot arming dispatcher shared by every fluent assert class.
+
+    `.arm(timeout)` queues polling for the NEXT terminal; `.run` consumes
+    the arming and reverts to sync for subsequent calls. `.run` returns
+    whatever `check` returns — callers that don't need the value simply
+    ignore it (None-returning checks still type-check as `T=None`).
+    """
+
+    _timeout: int | None = None
+
+    def arm(self, timeout: int) -> None:
+        self._timeout = timeout
+
+    def run(self, check: Callable[[], T], *, name: str) -> T:
+        if self._timeout is not None:
+            timeout = self._timeout
+            self._timeout = None
+            return retry_until(check, timeout=timeout, name=name)
+        return check()
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/lineage_assert.py b/ingestion/tests/cli_e2e_v2/core/fluent/lineage_assert.py
new file mode 100644
index 000000000000..296fdf73b27a
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/lineage_assert.py
@@ -0,0 +1,93 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""LineageAssert — polling-friendly lineage edge and column-lineage checks."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Literal
+
+from metadata.generated.schema.entity.data.table import Table
+
+from .eventually import EventuallyRunner
+
+if TYPE_CHECKING:
+    from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+_Direction = Literal["upstream", "downstream"]
+
+
+class LineageAssert:
+    """Lineage namespace — reached via TableAssert.lineage.
+
+    Lineage propagation is eventually-consistent; all terminals accept
+    `.eventually(timeout)` one-shot arming.
+    """
+
+    def __init__(self, om: OpenMetadata, table_fqn: str) -> None:
+        self._om = om
+        self._fqn = table_fqn
+        self._eventually = EventuallyRunner()
+
+    def eventually(self, timeout: int = 60) -> LineageAssert:
+        self._eventually.arm(timeout)
+        return self
+
+    def _lineage(self) -> dict:
+        return self._om.get_lineage_by_name(entity=Table, fqn=self._fqn) or {}
+
+    def _check_edge(self, direction: _Direction, fqn: str) -> None:
+        """Match direction-typed edges; resolve UUID-only Edge endpoints via nodes/entity FQN map."""
+        data = self._lineage()
+        nodes = data.get("nodes") or []
+        central = data.get("entity") or {}
+        uuid_to_fqn: dict[str, str] = {}
+        for n in [*nodes, central]:
+            uid, ref_fqn = n.get("id"), n.get("fullyQualifiedName")
+            if uid and ref_fqn:
+                uuid_to_fqn[uid] = ref_fqn
+        counterpart_field = "fromEntity" if direction == "upstream" else "toEntity"
+        self_field = "toEntity" if direction == "upstream" else "fromEntity"
+        matched: set[str] = set()
+        for e in data.get(f"{direction}Edges") or []:
+            if uuid_to_fqn.get(e.get(self_field)) == self._fqn:
+                cp = uuid_to_fqn.get(e.get(counterpart_field))
+                if cp:
+                    matched.add(cp)
+        if fqn in matched:
+            return
+        nodes_fqns = sorted(uuid_to_fqn.values())
+        raise AssertionError(
+            f"Table {self._fqn} has no {direction} {fqn!r}. "
+            f"{direction}Edges resolved to FQNs={sorted(matched)} nodes={nodes_fqns}"
+        )
+
+    def has_upstream(self, fqn: str) -> LineageAssert:
+        self._eventually.run(
+            lambda: self._check_edge("upstream", fqn),
+            name=f"has_upstream({fqn})",
+        )
+        return self
+
+    def has_downstream(self, fqn: str) -> LineageAssert:
+        self._eventually.run(
+            lambda: self._check_edge("downstream", fqn),
+            name=f"has_downstream({fqn})",
+        )
+        return self
+
+    def has_column_lineage(self, source: str, target: str) -> LineageAssert:
+        def _check() -> None:
+            data = self._lineage()
+            edges = (data.get("upstreamEdges") or []) + (data.get("downstreamEdges") or [])
+            for edge in edges:
+                lineage_details = edge.get("lineageDetails") or {}
+                for col_edge in lineage_details.get("columnsLineage") or []:
+                    froms = col_edge.get("fromColumns") or []
+                    to = col_edge.get("toColumn") or ""
+                    if any(source in f for f in froms) and target in to:
+                        return
+            raise AssertionError(f"No column lineage {source!r} -> {target!r} on table {self._fqn}")
+
+        self._eventually.run(_check, name=f"has_column_lineage({source}->{target})")
+        return self
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/om_client.py b/ingestion/tests/cli_e2e_v2/core/fluent/om_client.py
new file mode 100644
index 000000000000..d40acffcbe96
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/om_client.py
@@ -0,0 +1,43 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Fluent entry point wrapping the existing OpenMetadata HTTP client.
+
+Per Decision #21 of the v2 spec, OmClient is a thin facade — we do NOT build
+a new HTTP client. All actual REST calls delegate to
+metadata.ingestion.ometa.OpenMetadata, which already handles auth, retries,
+and Pydantic deserialization.
+
+OmClient's public surface is the fluent layer: .table(fqn), .service(name),
+.stored_procedure(fqn), plus .raw for escape-hatch tests that need the
+underlying client directly.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from .service_assert import ServiceAssert
+from .stored_procedure_assert import StoredProcedureAssert
+from .table_assert import TableAssert
+
+if TYPE_CHECKING:
+    from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+
+class OmClient:
+    def __init__(self, om: OpenMetadata) -> None:
+        self._om = om
+
+    @property
+    def raw(self) -> OpenMetadata:
+        return self._om
+
+    def table(self, fqn: str) -> TableAssert:
+        return TableAssert(self._om, fqn)
+
+    def service(self, name: str) -> ServiceAssert:
+        return ServiceAssert(self._om, name)
+
+    def stored_procedure(self, fqn: str) -> StoredProcedureAssert:
+        return StoredProcedureAssert(self._om, fqn)
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/profile_assert.py b/ingestion/tests/cli_e2e_v2/core/fluent/profile_assert.py
new file mode 100644
index 000000000000..1a0dc1a88158
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/profile_assert.py
@@ -0,0 +1,181 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""ProfileAssert + ColumnProfileAssert + NumericAssert.
+
+Two assertion surfaces:
+  - table-level: row count via `.profile.eventually().row_count().equals(N)`
+  - column-level: arbitrary metric subset via
+    `.profile.eventually().column(name).has_metrics(min=600, max=750, ...)`
+
+Both share the same poll-and-fetch primitive — when armed with
+`.eventually(timeout)`, the profile is polled until present. Column-
+metric assertion uses kwargs that map 1:1 to OM's ColumnProfile field
+names; an unknown kwarg raises so a typo doesn't silently pass.
+"""
+
+from __future__ import annotations
+
+from decimal import Decimal
+from typing import TYPE_CHECKING, Any
+
+from metadata.ingestion.ometa.utils import model_str
+
+from .._om_compat import unwrap_root_list
+from .eventually import EventuallyRunner
+
+if TYPE_CHECKING:
+    from metadata.generated.schema.entity.data.table import Column, Table
+    from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+
+class NumericAssert:
+    """Terminal numeric comparators for a single metric value."""
+
+    def __init__(self, value: int | float | None, *, label: str) -> None:
+        self._value = value
+        self._label = label
+
+    def at_least(self, n: int) -> None:
+        if self._value is None or self._value < n:
+            raise AssertionError(f"{self._label}: expected >= {n}, got {self._value}")
+
+    def equals(self, n: int) -> None:
+        if self._value != n:
+            raise AssertionError(f"{self._label}: expected {n}, got {self._value}")
+
+    def between(self, lo: int, hi: int) -> None:
+        if self._value is None or not (lo <= self._value <= hi):
+            raise AssertionError(f"{self._label}: expected in [{lo}, {hi}], got {self._value}")
+
+
+class ColumnProfileAssert:
+    """Per-column profile assertions reached via
+    `.profile.eventually().column(name)`.
+
+    `has_metrics(**expected)` accepts any subset of OM's ColumnProfile
+    field names as kwargs (e.g. `min=600, max=750, distinctCount=5,
+    nullCount=0, mean=680`). Each kwarg is compared against the
+    corresponding profile field; numeric values that come back as
+    Decimal/float are normalized for the comparison.
+    """
+
+    def __init__(
+        self,
+        om: OpenMetadata,
+        table_fqn: str,
+        column_name: str,
+        runner: EventuallyRunner,
+    ) -> None:
+        self._om = om
+        self._fqn = table_fqn
+        self._column_name = column_name
+        self._eventually = runner
+
+    def has_metrics(self, **expected: Any) -> ColumnProfileAssert:
+        """Assert each given metric matches the column's actual profile.
+
+        Unknown kwargs (typos / fields the OM Pydantic model doesn't
+        carry) raise immediately so a misspelled metric name fails loud
+        rather than silently passing.
+        """
+        if not expected:
+            raise ValueError("has_metrics requires at least one kwarg")
+        label = f"column_profile({self._fqn}.{self._column_name})"
+
+        def _check() -> None:
+            col = self._fetch_column_profile()
+            mismatches: list[str] = []
+            for field, want in expected.items():
+                if not hasattr(col, field):
+                    raise AssertionError(
+                        f"{label}: unknown ColumnProfile field {field!r}. "
+                        f"Available fields: "
+                        f"{sorted(col.model_fields.keys())}"
+                    )
+                got = getattr(col, field)
+                if not _values_match(got, want):
+                    mismatches.append(f"{field}: expected {want!r}, got {got!r}")
+            if mismatches:
+                raise AssertionError(f"{label} metric mismatches:\n  " + "\n  ".join(mismatches))
+
+        self._eventually.run(_check, name=f"has_metrics({sorted(expected)})")
+        return self
+
+    def _fetch_column_profile(self) -> Column:
+        table = self._om.get_latest_table_profile(self._fqn)
+        if table is None:
+            raise AssertionError(f"Table not found: {self._fqn}")
+        for c in unwrap_root_list(table.columns):
+            if model_str(c.name) == self._column_name:
+                if c.profile is None:
+                    raise AssertionError(f"Column {self._fqn}.{self._column_name} has no profile yet")
+                return c.profile
+        raise AssertionError(f"Column {self._column_name!r} not found on table {self._fqn}")
+
+
+class ProfileAssert:
+    """Profile namespace — reached via TableAssert.profile.
+
+    Profiler output is eventually-consistent; `.row_count()` and
+    `.column(name)` both compose with `.eventually()` by polling until
+    the data is available.
+    """
+
+    def __init__(self, om: OpenMetadata, table_fqn: str) -> None:
+        self._om = om
+        self._fqn = table_fqn
+        self._eventually = EventuallyRunner()
+
+    def eventually(self, timeout: int = 60) -> ProfileAssert:
+        self._eventually.arm(timeout)
+        return self
+
+    def _fetch_profile(self) -> Table:
+        table = self._om.get_latest_table_profile(self._fqn)
+        if table is None:
+            raise AssertionError(f"Table not found: {self._fqn}")
+        if table.profile is None:
+            raise AssertionError(f"Table {self._fqn} has no profile data")
+        return table
+
+    def row_count(self) -> NumericAssert:
+        """Extract rowCount from the profile, returning a NumericAssert.
+
+        When armed via `.eventually()`, polls until `profile.rowCount` is
+        non-None, then constructs NumericAssert with the polled value.
+        """
+        label = f"rowCount({self._fqn})"
+
+        def _get() -> int:
+            table = self._fetch_profile()
+            if table.profile.rowCount is None:
+                raise AssertionError(f"{label}: no rowCount yet")
+            return int(table.profile.rowCount)
+
+        value = self._eventually.run(_get, name=label)
+        return NumericAssert(value, label=label)
+
+    def column(self, name: str) -> ColumnProfileAssert:
+        """Reach a ColumnProfileAssert scoped to the given column.
+
+        Inherits the parent ProfileAssert's arm — calling
+        `.profile.eventually().column(...)` makes the next column-level
+        terminal poll, just like `.row_count()` does.
+        """
+        return ColumnProfileAssert(self._om, self._fqn, name, runner=self._eventually)
+
+
+def _values_match(actual: Any, expected: Any) -> bool:
+    """Compare profile-metric values tolerating Decimal/float/int crossover.
+
+    OM serializes numeric profile metrics as Decimal in some cases and
+    float in others; tests want to write `min=600` without thinking
+    about which path the value took. Falls back to == for non-numeric
+    types (strings, None, bools).
+    """
+    if actual is None:
+        return False
+    if isinstance(actual, (Decimal, float, int)) and isinstance(expected, (Decimal, float, int)):
+        return float(actual) == float(expected)
+    return actual == expected
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/service_assert.py b/ingestion/tests/cli_e2e_v2/core/fluent/service_assert.py
new file mode 100644
index 000000000000..2ca0b6d59713
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/service_assert.py
@@ -0,0 +1,63 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""ServiceAssert — database-service-level fluent checks."""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
+from metadata.generated.schema.entity.data.table import Table
+from metadata.generated.schema.entity.services.databaseService import DatabaseService
+
+from .entity_assert import EntityAssert
+
+_ENTITY_COUNT_LIMIT = 1000
+
+
+class ServiceAssert(EntityAssert[DatabaseService]):
+    """Service namespace — reached via OmClient.service(name).
+
+    Provides smoke-level checks beyond the shared base: bulk entity counts.
+    Inherits exists / get / eventually / has_description_containing.
+    """
+
+    _entity_cls = DatabaseService
+
+    def _count_entities(self, kind: Literal["tables", "schemas"]) -> int:
+        self._fetch()
+        entity_cls = Table if kind == "tables" else DatabaseSchema
+        items = list(
+            self._om.list_all_entities(
+                entity=entity_cls,
+                limit=_ENTITY_COUNT_LIMIT,
+                params={"service": self._fqn},
+            )
+        )
+        return len(items)
+
+    def has_entity_count(
+        self,
+        kind: Literal["tables", "schemas"],
+        *,
+        at_least: int,
+    ) -> None:
+        """Assert the service has at least `at_least` entities of `kind`.
+
+        Raises ValueError when `at_least` exceeds the list_all_entities cap
+        (pagination is not implemented at this assertion level).
+        """
+        if at_least > _ENTITY_COUNT_LIMIT:
+            raise ValueError(
+                f"has_entity_count(at_least={at_least}) exceeds the "
+                f"list_all_entities cap ({_ENTITY_COUNT_LIMIT}); pagination "
+                f"is not implemented for this assertion."
+            )
+
+        def _check() -> None:
+            actual = self._count_entities(kind)
+            if actual < at_least:
+                raise AssertionError(f"Service {self._fqn}: expected >= {at_least} {kind}, got {actual}")
+
+        self._eventually.run(_check, name=f"has_entity_count({kind},{at_least})")
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/stored_procedure_assert.py b/ingestion/tests/cli_e2e_v2/core/fluent/stored_procedure_assert.py
new file mode 100644
index 000000000000..50a6efeb6e07
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/stored_procedure_assert.py
@@ -0,0 +1,36 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""StoredProcedureAssert — fluent assertions on stored procedure entities."""
+
+from __future__ import annotations
+
+from metadata.generated.schema.entity.data.storedProcedure import StoredProcedure
+
+from .entity_assert import EntityAssert
+
+
+class StoredProcedureAssert(EntityAssert[StoredProcedure]):
+    """Fluent assertions on a single stored procedure by FQN.
+
+    Inherits exists / get / eventually / has_description_containing from
+    EntityAssert; adds `has_code_containing` which reads the SP body.
+    """
+
+    _entity_cls = StoredProcedure
+
+    def has_code_containing(self, text: str) -> StoredProcedureAssert:
+        """Assert the stored procedure's SQL body contains the given substring."""
+
+        def _check() -> None:
+            sp = self._fetch()
+            code = ""
+            if sp.storedProcedureCode is not None and sp.storedProcedureCode.code is not None:
+                code = sp.storedProcedureCode.code
+            if text not in code:
+                raise AssertionError(
+                    f"StoredProcedure {self._fqn} code does not contain {text!r}. Actual code: {code!r}"
+                )
+
+        self._eventually.run(_check, name=f"has_code_containing({text!r})")
+        return self
diff --git a/ingestion/tests/cli_e2e_v2/core/fluent/table_assert.py b/ingestion/tests/cli_e2e_v2/core/fluent/table_assert.py
new file mode 100644
index 000000000000..4ecae1af58b1
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/fluent/table_assert.py
@@ -0,0 +1,266 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""TableAssert + ColumnAssert — fluent assertions on Table entities.
+
+TableAssert inherits shared fluent surface (exists / get / eventually /
+has_description_containing) from `EntityAssert[Table]`. Entity-specific
+terminals (tags, owners, FK constraint, column descent, lineage/profile
+namespaces) live here.
+
+ColumnAssert is synchronous — column checks on fresh ingests are reliable
+in practice; polling chains off TableAssert.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, ClassVar
+
+from metadata.generated.schema.entity.data.table import (
+    Column,
+    ConstraintType,
+    DataType,
+    Table,
+    TableConstraint,
+)
+from metadata.ingestion.ometa.utils import model_str
+
+from .._om_compat import unwrap_root_list
+from .entity_assert import EntityAssert
+from .lineage_assert import LineageAssert
+from .profile_assert import ProfileAssert
+
+if TYPE_CHECKING:
+    from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+
+def _fk_matches(
+    constraint: TableConstraint,
+    column: str,
+    referenced_table: str,
+    referenced_column: str,
+) -> bool:
+    """True if `constraint` is a FOREIGN_KEY on `column` pointing at the
+    named referred column.
+
+    The referredColumns FQNs may be rendered as either the full
+    `service.database.schema.table.column` form or the shorter
+    `table.column` form depending on how OM resolved them at ingest time;
+    both are accepted via tail-match.
+    """
+    if constraint.constraintType != ConstraintType.FOREIGN_KEY:
+        return False
+    own_cols = {model_str(x) for x in unwrap_root_list(constraint.columns)}
+    if column not in own_cols:
+        return False
+    wanted_tail = f".{referenced_table}.{referenced_column}"
+    wanted_short = f"{referenced_table}.{referenced_column}"
+    return any(
+        model_str(ref).endswith(wanted_tail) or model_str(ref) == wanted_short
+        for ref in unwrap_root_list(constraint.referredColumns)
+    )
+
+
+class TableAssert(EntityAssert[Table]):
+    """Fluent assertions on a single Table identified by FQN."""
+
+    _entity_cls = Table
+    _default_fields: ClassVar[list[str]] = ["tags", "owners", "columns"]
+
+    # --- terminals ----------------------------------------------------
+
+    def has_tag(self, fqn: str) -> TableAssert:
+        def _check() -> None:
+            table = self._fetch()
+            actual = {model_str(t.tagFQN) for t in unwrap_root_list(table.tags)}
+            if fqn not in actual:
+                raise AssertionError(f"Table {self._fqn} missing tag {fqn!r}. Actual tags: {sorted(actual)}")
+
+        self._eventually.run(_check, name=f"has_tag({fqn})")
+        return self
+
+    def has_owner(self, name: str) -> TableAssert:
+        def _check() -> None:
+            table = self._fetch()
+            actual = {o.name for o in unwrap_root_list(table.owners)}
+            if name not in actual:
+                raise AssertionError(f"Table {self._fqn} missing owner {name!r}. Actual owners: {sorted(actual)}")
+
+        self._eventually.run(_check, name=f"has_owner({name})")
+        return self
+
+    def has_foreign_key_constraint(
+        self,
+        column: str,
+        referenced_table: str,
+        referenced_column: str,
+    ) -> TableAssert:
+        """Assert the table carries a FOREIGN_KEY TableConstraint on `column`
+        pointing at `referenced_table.referenced_column`.
+
+        MySQL lands FK data here — not as a lineage edge. Matching delegates
+        to `_fk_matches`.
+        """
+
+        def _check() -> None:
+            constraints = unwrap_root_list(self._fetch(fields=["tableConstraints"]).tableConstraints)
+            if any(_fk_matches(c, column, referenced_table, referenced_column) for c in constraints):
+                return
+            raise AssertionError(
+                f"Table {self._fqn} missing FOREIGN_KEY({column}) -> "
+                f"{referenced_table}({referenced_column}). "
+                f"Constraints present: {constraints!r}"
+            )
+
+        self._eventually.run(
+            _check,
+            name=f"has_foreign_key_constraint({column}->{referenced_table}.{referenced_column})",
+        )
+        return self
+
+    def has_schema_definition_containing(self, text: str) -> TableAssert:
+        """Assert `schemaDefinition` (raw DDL stored on the entity) contains
+        `text` — case-insensitive substring match.
+
+        Populated for views when metadata ingest runs with `includeDDL=True`,
+        and for tables when the connector emits CREATE TABLE bodies. Used as
+        the prerequisite check that ingest actually plumbed DDL through —
+        a failed lineage parse with empty `schemaDefinition` is a different
+        bug than a failed parse on present DDL.
+
+        Case insensitivity matters: MySQL normalizes view DDL to lowercase
+        (`left join`, not `LEFT JOIN`); other dialects preserve case. The
+        assertion keeps tests portable across dialects without each one
+        having to know the specific casing.
+        """
+        wanted_lower = text.lower()
+
+        def _check() -> None:
+            entity = self._fetch(fields=["schemaDefinition"])
+            actual = model_str(entity.schemaDefinition) if entity.schemaDefinition else ""
+            if wanted_lower not in actual.lower():
+                raise AssertionError(
+                    f"Table {self._fqn} schemaDefinition does not contain "
+                    f"{text!r} (case-insensitive). Actual: {actual!r}"
+                )
+
+        self._eventually.run(_check, name=f"has_schema_definition_containing({text!r})")
+        return self
+
+    def is_soft_deleted(self) -> TableAssert:
+        """Assert the table exists in OM but is marked `deleted=True`.
+
+        Soft-deleted entities are filtered out of `get_by_name` by default.
+        We use `list_entities` with `include=all` to find the entity even
+        when soft-deleted, then check the `deleted` field. Used by
+        mark-deleted tests after re-ingest with `markDeletedTables=True`.
+        """
+
+        def _check() -> None:
+            if not self._fetch_any_state().deleted:
+                raise AssertionError(f"Table {self._fqn} is not soft-deleted (deleted=False)")
+
+        self._eventually.run(_check, name="is_soft_deleted")
+        return self
+
+    def is_not_deleted(self) -> TableAssert:
+        """Assert the table exists in OM with `deleted=False`."""
+
+        def _check() -> None:
+            entity = self._fetch_any_state()
+            if entity.deleted:
+                raise AssertionError(f"Table {self._fqn} is unexpectedly soft-deleted (deleted=True)")
+
+        self._eventually.run(_check, name="is_not_deleted")
+        return self
+
+    def _fetch_any_state(self) -> Table:
+        """Fetch the table including soft-deleted state (default get_by_name
+        filters those out)."""
+        entity = self._om.get_by_name(
+            entity=Table,
+            fqn=self._fqn,
+            fields=["deleted"],
+            include="all",
+        )
+        if entity is None:
+            raise AssertionError(f"Table not found (in any state): {self._fqn}")
+        return entity
+
+    # --- descent into column / namespaces -----------------------------
+
+    def column(self, name: str) -> ColumnAssert:
+        return ColumnAssert(self._om, self._fqn, name)
+
+    @property
+    def lineage(self) -> LineageAssert:
+        return LineageAssert(self._om, self._fqn)
+
+    @property
+    def profile(self) -> ProfileAssert:
+        return ProfileAssert(self._om, self._fqn)
+
+
+class ColumnAssert:
+    """Synchronous assertions on a named column of a Table."""
+
+    def __init__(self, om: OpenMetadata, table_fqn: str, column_name: str) -> None:
+        self._om = om
+        self._table_fqn = table_fqn
+        self._column_name = column_name
+
+    def _fetch_column(self) -> Column:
+        table = self._om.get_by_name(
+            entity=Table,
+            fqn=self._table_fqn,
+            fields=["tags", "columns"],
+        )
+        if table is None:
+            raise AssertionError(f"Table not found: {self._table_fqn}")
+        for c in unwrap_root_list(table.columns):
+            if model_str(c.name) == self._column_name:
+                return c
+        raise AssertionError(f"Column {self._column_name!r} not found on table {self._table_fqn}")
+
+    def has_tag(self, fqn: str) -> ColumnAssert:
+        column = self._fetch_column()
+        actual = {model_str(t.tagFQN) for t in unwrap_root_list(column.tags)}
+        if fqn not in actual:
+            raise AssertionError(
+                f"Column {self._table_fqn}.{self._column_name} missing tag {fqn!r}. Actual tags: {sorted(actual)}"
+            )
+        return self
+
+    def has_no_tag(self, fqn: str) -> ColumnAssert:
+        """Assert the column does NOT carry the given tag.
+
+        Used as the negative complement to `has_tag` — guards against
+        regressions where a classifier becomes overconfident and tags
+        non-PII columns. Without this, a positive-only suite passes
+        cleanly even when every column gets PII-flagged.
+        """
+        column = self._fetch_column()
+        actual = {model_str(t.tagFQN) for t in unwrap_root_list(column.tags)}
+        if fqn in actual:
+            raise AssertionError(
+                f"Column {self._table_fqn}.{self._column_name} unexpectedly "
+                f"carries tag {fqn!r}. Actual tags: {sorted(actual)}"
+            )
+        return self
+
+    def has_type(self, data_type: DataType) -> ColumnAssert:
+        column = self._fetch_column()
+        if column.dataType != data_type:
+            raise AssertionError(
+                f"Column {self._table_fqn}.{self._column_name} has type {column.dataType}, expected {data_type}"
+            )
+        return self
+
+    def has_description_containing(self, text: str) -> ColumnAssert:
+        column = self._fetch_column()
+        desc = model_str(column.description) if column.description else ""
+        if text not in desc:
+            raise AssertionError(
+                f"Column {self._table_fqn}.{self._column_name} description does not contain {text!r}. Actual: {desc!r}"
+            )
+        return self
diff --git a/ingestion/tests/cli_e2e_v2/core/runner/__init__.py b/ingestion/tests/cli_e2e_v2/core/runner/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/core/runner/cli_runner.py b/ingestion/tests/cli_e2e_v2/core/runner/cli_runner.py
new file mode 100644
index 000000000000..b923bdecab0c
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/runner/cli_runner.py
@@ -0,0 +1,174 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Runs `metadata <subcommand>` via subprocess and returns a typed Status.
+
+One CliRunner per test, bound to tmp_path. Each `.run()` writes numbered
+cfg / status / stdout artifacts. Subprocess has a bounded timeout
+(default 600s, kwarg override). CliExecutionError carries exit_code,
+stderr, stdout, config_path, and argv for post-mortem.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import subprocess
+from typing import TYPE_CHECKING
+
+from .errors import CliExecutionError
+from .status import Status
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from ..config.builder import WorkflowConfig
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECONDS = 600
+
+# Cap inline step failures GLOBALLY across all steps. Failures cascade —
+# step 1's first failure is overwhelmingly the root cause, step 5's third
+# failure is downstream noise. The full list is in the status JSON for
+# deep dives.
+_INLINE_FAILURES_LIMIT = 3
+_INLINE_FAILURE_CHARS = 500
+
+
+def _summarize_step_failures(status_path: Path) -> str | None:
+    """Best-effort: read the status JSON and pull out the first few step
+    failures (across all steps, capped globally) as a short, scannable
+    block. Returns None on any read / parse failure — caller falls back
+    to the raw stdout/stderr dump.
+
+    Output shape (one line per failure, truncated):
+        [StepName::FailureName] first-line-of-error…
+    """
+    if not status_path.exists():
+        return None
+    try:
+        data = json.loads(status_path.read_text())
+    except (json.JSONDecodeError, OSError):
+        return None
+
+    lines: list[str] = []
+    for step in data.get("steps") or []:
+        step_name = step.get("name", "?")
+        for failure in step.get("failures") or []:
+            if len(lines) >= _INLINE_FAILURES_LIMIT:
+                return "\n".join(lines)
+            name = failure.get("name", "?")
+            err = (failure.get("error") or "").splitlines()[0][:_INLINE_FAILURE_CHARS]
+            lines.append(f"    [{step_name}::{name}] {err}")
+    return "\n".join(lines) if lines else None
+
+
+class CliRunner:
+    """Runs `metadata <subcommand>` via subprocess and returns a typed Status.
+
+    Usage:
+        runner = CliRunner(tmp_path)
+        status = runner.run(cfg)                        # ingest
+        status2 = runner.run(cfg.pipeline(Profiler...))  # profile
+    """
+
+    def __init__(self, tmp_path: Path) -> None:
+        self.tmp_path = tmp_path
+        self._invocation_counter: dict[str, int] = {}
+
+    def run(
+        self,
+        config: WorkflowConfig,
+        *,
+        timeout: int = DEFAULT_TIMEOUT_SECONDS,
+    ) -> Status:
+        identifier = config.pipeline_identifier
+        n = self._invocation_counter.get(identifier, 0)
+        self._invocation_counter[identifier] = n + 1
+
+        cfg_path = config.write_tmp(self.tmp_path, invocation=n)
+        status_path = self.tmp_path / f"status_{identifier}_{n}.json"
+        stdout_path = self.tmp_path / f"stdout_{identifier}_{n}.log"
+
+        command = [
+            "metadata",
+            config.cli_subcommand,
+            "-c",
+            str(cfg_path),
+            "--status-file",
+            str(status_path),
+        ]
+
+        try:
+            result = subprocess.run(
+                command,
+                capture_output=True,
+                text=True,
+                check=False,
+                timeout=timeout,
+            )
+        except subprocess.TimeoutExpired as exc:
+            # exc.stdout / exc.stderr may be bytes or str depending on
+            # capture config — normalize to str for consistent logging.
+            out = _coerce_text(exc.stdout)
+            err = _coerce_text(exc.stderr)
+            stdout_path.write_text(out)
+            raise CliExecutionError(
+                exit_code=-1,
+                stderr=(f"CLI timed out after {timeout}s.\nstderr so far:\n{err}"),
+                stdout=out,
+                config_path=cfg_path,
+                status_path=status_path,
+                command=command,
+            ) from exc
+
+        # Persist stdout unconditionally — useful for debugging both
+        # successful runs (checking warnings) and failed ones.
+        stdout_path.write_text(result.stdout or "")
+
+        # One line with the three artifact paths. Invaluable for post-mortem
+        # because pytest's tmp_path lives under a deep auto-generated dir.
+        logger.info(
+            "[cli] %s invocation=%d exit=%d cfg=%s status=%s stdout=%s",
+            identifier,
+            n,
+            result.returncode,
+            cfg_path,
+            status_path,
+            stdout_path,
+        )
+
+        if result.returncode != 0:
+            raise CliExecutionError(
+                exit_code=result.returncode,
+                stderr=result.stderr,
+                stdout=result.stdout,
+                config_path=cfg_path,
+                status_path=status_path,
+                command=command,
+                step_failures_summary=_summarize_step_failures(status_path),
+            )
+
+        # Defensive: CLI reported success but wrote no status file — something
+        # broke between workflow completion and file emission (e.g., a future
+        # BaseWorkflow.write_status_file regression).
+        if not status_path.exists():
+            raise CliExecutionError(
+                exit_code=0,
+                stderr=(f"CLI exited 0 but no status file was written at {status_path}."),
+                stdout=result.stdout,
+                config_path=cfg_path,
+                status_path=status_path,
+                command=command,
+            )
+
+        return Status.from_json(status_path)
+
+
+def _coerce_text(value: object) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, bytes):
+        return value.decode("utf-8", errors="replace")
+    return str(value)
diff --git a/ingestion/tests/cli_e2e_v2/core/runner/errors.py b/ingestion/tests/cli_e2e_v2/core/runner/errors.py
new file mode 100644
index 000000000000..cf72be9865e0
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/runner/errors.py
@@ -0,0 +1,105 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Errors raised by the CLI runner and baseline layers.
+
+`E2ESetupError` is the shared base for any exception that signals
+"the test couldn't run" (CLI failure, source baseline drift in
+check_only mode, JWT mint failure). It inherits `Exception` — NOT
+`AssertionError` — so pytest reports these as test errors (E) rather
+than test failures (F). Assertion failures (`StructuralMismatch` in
+`core/expected/differ.py`) keep the `AssertionError` lineage so pytest
+renders their diffs with introspection.
+
+Two-category rule:
+  - E2ESetupError (→ Exception)  : infrastructure couldn't complete → E
+  - AssertionError               : test assertion failed → F
+
+Downstream code that wants to catch any setup failure (e.g. a retry
+wrapper, a diagnostic collector) imports E2ESetupError rather than
+enumerating the concrete subclasses.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+class E2ESetupError(Exception):
+    """Base for setup-phase failures — the test couldn't run as intended.
+
+    Subclassed by every exception that signals infrastructure trouble:
+    CLI subprocess failure, source-baseline drift, JWT mint failure.
+    """
+
+
+class CliExecutionError(E2ESetupError):
+    """Raised when `metadata` CLI subprocess exits with a non-zero code OR
+    times out OR completes successfully without writing a status file.
+
+    Carries full diagnostic context so pytest's default failure rendering
+    surfaces everything a developer needs for post-mortem:
+      - exit_code:   subprocess return code (-1 for a timeout)
+      - stderr:      complete captured stderr
+      - stdout:      complete captured stdout (often carries step-level
+                     progress logs the CLI doesn't persist elsewhere)
+      - config_path: rendered YAML location — survives test teardown via
+                     pytest's tmp_path
+      - status_path: path where the status JSON was expected to land —
+                     included even when the file wasn't written so a
+                     developer can inspect the (existing or missing) file
+                     directly from the failure message
+      - command:     full argv of the subprocess
+    """
+
+    def __init__(
+        self,
+        exit_code: int,
+        stderr: str,
+        config_path: Path,
+        command: list[str],
+        stdout: str = "",
+        status_path: Path | None = None,
+        step_failures_summary: str | None = None,
+    ) -> None:
+        self.exit_code = exit_code
+        self.stderr = stderr
+        self.stdout = stdout
+        self.config_path = config_path
+        self.status_path = status_path
+        self.command = command
+        self.step_failures_summary = step_failures_summary
+
+        status_line = (
+            f"  status:  {status_path} (exists={status_path.exists() if status_path else 'n/a'})\n"
+            if status_path is not None
+            else ""
+        )
+        # Surface extracted step failures above the raw stdout/stderr dump so
+        # a developer scanning the exception sees the actionable content
+        # first — the wall of capture logs is still below for deep dives.
+        failures_block = (
+            f"  step failures (from status file):\n{step_failures_summary}\n" if step_failures_summary else ""
+        )
+        super().__init__(
+            f"metadata CLI exited with code {exit_code}\n"
+            f"  command: {' '.join(command)}\n"
+            f"  config:  {config_path}\n"
+            f"{status_line}"
+            f"{failures_block}"
+            f"  stdout:\n{stdout}\n"
+            f"  stderr:\n{stderr}"
+        )
+
+
+class SourceBaselineDrift(E2ESetupError):  # noqa: N818  (intentional API surface — public exception name)
+    """Raised by `ensure_baseline` when source state does not match the declared
+    baseline in check_only mode.
+
+    Cloud sources default to check_only so we never mutate shared resources; when
+    drift is detected, the test setup fails loudly with operator instructions
+    rather than silently diverging.
+    """
diff --git a/ingestion/tests/cli_e2e_v2/core/runner/status.py b/ingestion/tests/cli_e2e_v2/core/runner/status.py
new file mode 100644
index 000000000000..17fa7189a2bb
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/runner/status.py
@@ -0,0 +1,91 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Typed status contract between CliRunner and tests.
+
+Mirrors the JSON shape written by BaseWorkflow.write_status_file (see
+ingestion/src/metadata/workflow/base.py). Observed live in smoke testing:
+  {
+    "pipeline_type": "mysql",
+    "ingestion_pipeline_fqn": null,
+    "success": true,
+    "steps": [
+      {"name": "Mysql", "records": 178, "updated_records": 47,
+       "warnings": 0, "errors": 0, "filtered": 0,
+       "failures": null, "progress": null, "operationMetrics": null,
+       "sourceTimeMs": null, "sinkTimeMs": null},
+      ...
+    ]
+  }
+
+Parsing contract:
+  - required keys must be present (pipeline_type, success, steps)
+  - required step keys must be present (name, records, updated_records,
+    warnings, errors, filtered, failures)
+  - step `failures` may be `null` (mapped to empty list) or a list of dicts
+  - A schema change on the CLI side surfaces as a KeyError at parse time,
+    not a silent mis-count — the test halts loudly rather than passing with
+    zeroes it inferred from missing keys.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+@dataclass(frozen=True)
+class StepStatus:
+    name: str
+    records: int
+    updated_records: int
+    warnings: int
+    errors: int
+    filtered: int
+    failures: list[dict] = field(default_factory=list)
+
+    @classmethod
+    def from_dict(cls, step: dict[str, Any]) -> StepStatus:
+        return cls(
+            name=str(step["name"]),
+            records=int(step["records"] or 0),
+            updated_records=int(step["updated_records"] or 0),
+            warnings=int(step["warnings"] or 0),
+            errors=int(step["errors"] or 0),
+            filtered=int(step["filtered"] or 0),
+            failures=list(step["failures"] or []),
+        )
+
+
+@dataclass(frozen=True)
+class Status:
+    pipeline_type: str
+    ingestion_pipeline_fqn: str | None
+    success: bool
+    steps: list[StepStatus]
+
+    @classmethod
+    def from_json(cls, path: Path) -> Status:
+        data: dict[str, Any] = json.loads(path.read_text())
+        return cls(
+            pipeline_type=str(data["pipeline_type"]),
+            ingestion_pipeline_fqn=data.get("ingestion_pipeline_fqn"),
+            success=bool(data["success"]),
+            steps=[StepStatus.from_dict(s) for s in (data.get("steps") or [])],
+        )
+
+    @property
+    def all_failures(self) -> list[dict]:
+        """Flat list of failure detail dicts across all steps."""
+        return [f for step in self.steps for f in step.failures]
+
+    def step(self, name: str) -> StepStatus | None:
+        """Look up a step by name (e.g. 'Mysql', 'OpenMetadata', 'Profiler')."""
+        for s in self.steps:
+            if s.name == name:
+                return s
+        return None
diff --git a/ingestion/tests/cli_e2e_v2/core/source/__init__.py b/ingestion/tests/cli_e2e_v2/core/source/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/core/source/common_baseline.py b/ingestion/tests/cli_e2e_v2/core/source/common_baseline.py
new file mode 100644
index 000000000000..bad6d41697f8
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/source/common_baseline.py
@@ -0,0 +1,258 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Common portable baseline — tables + seed data shared across SQL dialects.
+
+`build_common_metadata(schema)` returns a SQLAlchemy `MetaData` with the
+portable tables (`customers`, `transactions`) declared via Core types so
+`metadata.create_all(conn)` emits dialect-correct DDL everywhere.
+
+`COMMON_CUSTOMER_ROWS` and `COMMON_TRANSACTION_ROWS` are the portable seed
+data. Dialects consume them through a TableSeed with a dialect-specific
+`insert_sql` template (`ON DUPLICATE KEY UPDATE` for MySQL, `ON CONFLICT
+DO UPDATE` for Postgres, etc.) — the base enforcer runs the template
+against these rows via SQLAlchemy's executemany binding, no dialect
+branching needed.
+
+Dialect-specific tables (e.g., MySQL's `all_types`) live in each
+connector's baseline module and are added to its extended MetaData.
+"""
+
+from __future__ import annotations
+
+from datetime import date, datetime
+from decimal import Decimal
+from typing import Any
+
+from sqlalchemy import (
+    CHAR,
+    BigInteger,
+    Boolean,
+    Column,
+    Date,
+    DateTime,
+    ForeignKey,
+    Integer,
+    MetaData,
+    Numeric,
+    String,
+    Table,
+    Text,
+)
+
+
+def build_common_metadata(schema: str = "e2e") -> MetaData:
+    """Build a MetaData carrying portable tables (customers, transactions).
+
+    Connector baselines call this and may add dialect-specific tables to the
+    returned object before handing it to `SqlSourceBaseline`.
+    """
+    md = MetaData(schema=schema)
+
+    Table(
+        "customers",
+        md,
+        Column("id", Integer, primary_key=True, nullable=False, comment="Primary key identifying the customer."),
+        Column("first_name", String(50), nullable=False, comment="Customer first name."),
+        Column("last_name", String(50), nullable=False),
+        Column("full_name", String(100), nullable=False),
+        Column("email", String(255), nullable=False, comment="Customer email address."),
+        Column("address", String(255), nullable=True),
+        Column("city", String(100), nullable=True),
+        Column("country", String(100), nullable=True),
+        Column("zipcode", String(20), nullable=True),
+        Column("date_of_birth", Date, nullable=True),
+        Column("age", Integer, nullable=True),
+        Column("credit_score", Integer, nullable=True),
+        Column("status", String(20), nullable=False),
+        Column("is_active", Boolean, nullable=False),
+        Column("bio", Text, nullable=True),
+        Column("joined_date", Date, nullable=False),
+        comment="Customer master table used by CLI E2E v2 MySQL pilot.",
+    )
+
+    Table(
+        "transactions",
+        md,
+        Column("id", BigInteger, primary_key=True, nullable=False),
+        Column(
+            "customer_id",
+            Integer,
+            ForeignKey(f"{schema}.customers.id"),
+            nullable=False,
+            comment="FK referencing e2e.customers.id.",
+        ),
+        Column("amount", Numeric(10, 2), nullable=False, comment="Transaction amount in the ticker currency."),
+        Column("currency", CHAR(3), nullable=False),
+        Column("exchange_rate", Numeric(10, 4), nullable=True),
+        Column("status", String(20), nullable=False),
+        Column("txn_at", DateTime, nullable=False),
+        Column("reference_number", CHAR(12), nullable=False),
+        Column("ip_address", String(45), nullable=True),
+        Column("notes", Text, nullable=True),
+        comment="Customer transaction events with FK to customers.id.",
+    )
+
+    return md
+
+
+# -----------------------------------------------------------------------------
+# Seed data — portable Python values. Dialects bind via :key placeholders.
+# -----------------------------------------------------------------------------
+
+COMMON_CUSTOMER_ROWS: list[dict[str, Any]] = [
+    {
+        "id": 1,
+        "first_name": "Alice",
+        "last_name": "Anderson",
+        "full_name": "Alice Anderson",
+        "email": "alice@test.com",
+        "address": "100 Main St",
+        "city": "Springfield",
+        "country": "USA",
+        "zipcode": "11111",
+        "date_of_birth": date(1990, 1, 15),
+        "age": 36,
+        "credit_score": 720,
+        "status": "active",
+        "is_active": True,
+        "bio": "Loyal customer since 2026.",
+        "joined_date": date(2026, 1, 1),
+    },
+    {
+        "id": 2,
+        "first_name": "Bob",
+        "last_name": "Brown",
+        "full_name": "Bob Brown",
+        "email": "bob@test.com",
+        "address": "200 Oak Ave",
+        "city": "Portland",
+        "country": "USA",
+        "zipcode": "22222",
+        "date_of_birth": date(1985, 3, 20),
+        "age": 41,
+        "credit_score": 680,
+        "status": "active",
+        "is_active": True,
+        "bio": None,
+        "joined_date": date(2026, 1, 2),
+    },
+    {
+        "id": 3,
+        "first_name": "Charlie",
+        "last_name": "Chen",
+        "full_name": "Charlie Chen",
+        "email": "charlie@test.com",
+        "address": "300 Pine Rd",
+        "city": "Seattle",
+        "country": "USA",
+        "zipcode": "33333",
+        "date_of_birth": date(1992, 6, 10),
+        "age": 34,
+        "credit_score": 650,
+        "status": "inactive",
+        "is_active": False,
+        "bio": "Churned in Q2 2026.",
+        "joined_date": date(2026, 1, 3),
+    },
+    {
+        "id": 4,
+        "first_name": "Diana",
+        "last_name": "Davis",
+        "full_name": "Diana Davis",
+        "email": "diana@test.com",
+        "address": "400 Elm St",
+        "city": "Austin",
+        "country": "USA",
+        "zipcode": "44444",
+        "date_of_birth": date(1988, 11, 2),
+        "age": 38,
+        "credit_score": 750,
+        "status": "active",
+        "is_active": True,
+        "bio": "High-value account.",
+        "joined_date": date(2026, 1, 4),
+    },
+    {
+        "id": 5,
+        "first_name": "Eve",
+        "last_name": "Evans",
+        "full_name": "Eve Evans",
+        "email": "eve@test.com",
+        "address": "500 Birch Ln",
+        "city": "Denver",
+        "country": "USA",
+        "zipcode": "55555",
+        "date_of_birth": date(2000, 5, 25),
+        "age": 26,
+        "credit_score": 600,
+        "status": "pending",
+        "is_active": True,
+        "bio": None,
+        "joined_date": date(2026, 1, 5),
+    },
+]
+
+
+COMMON_TRANSACTION_ROWS: list[dict[str, Any]] = [
+    {
+        "id": 1,
+        "customer_id": 1,
+        "amount": Decimal("125.50"),
+        "currency": "USD",
+        "exchange_rate": Decimal("1.0000"),
+        "status": "completed",
+        "txn_at": datetime(2026, 2, 1, 9, 15, 0),
+        "reference_number": "TXN000000001",
+        "ip_address": "10.0.0.1",
+        "notes": "Monthly subscription renewal.",
+    },
+    {
+        "id": 2,
+        "customer_id": 1,
+        "amount": Decimal("49.99"),
+        "currency": "USD",
+        "exchange_rate": Decimal("1.0000"),
+        "status": "completed",
+        "txn_at": datetime(2026, 2, 5, 14, 30, 0),
+        "reference_number": "TXN000000002",
+        "ip_address": "10.0.0.1",
+        "notes": None,
+    },
+    {
+        "id": 3,
+        "customer_id": 2,
+        "amount": Decimal("250.00"),
+        "currency": "USD",
+        "exchange_rate": Decimal("1.0000"),
+        "status": "completed",
+        "txn_at": datetime(2026, 2, 10, 11, 20, 0),
+        "reference_number": "TXN000000003",
+        "ip_address": "10.0.0.2",
+        "notes": "Premium upgrade.",
+    },
+    {
+        "id": 4,
+        "customer_id": 3,
+        "amount": Decimal("19.99"),
+        "currency": "USD",
+        "exchange_rate": Decimal("1.0000"),
+        "status": "refunded",
+        "txn_at": datetime(2026, 2, 12, 16, 45, 0),
+        "reference_number": "TXN000000004",
+        "ip_address": "10.0.0.3",
+        "notes": "Customer requested refund.",
+    },
+    {
+        "id": 5,
+        "customer_id": 4,
+        "amount": Decimal("125.50"),
+        "currency": "EUR",
+        "exchange_rate": Decimal("1.0850"),
+        "status": "completed",
+        "txn_at": datetime(2026, 2, 18, 13, 10, 0),
+        "reference_number": "TXN000000005",
+        "ip_address": "10.0.0.4",
+        "notes": None,
+    },
+]
diff --git a/ingestion/tests/cli_e2e_v2/core/source/orchestrator.py b/ingestion/tests/cli_e2e_v2/core/source/orchestrator.py
new file mode 100644
index 000000000000..9339840531a8
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/source/orchestrator.py
@@ -0,0 +1,108 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Source baseline orchestrator: ensure_baseline + EnforcementPolicy + trust mode.
+
+`ensure_baseline` is the uniform orchestrator for every source family. Each
+per-connector baseline fixture calls it once per session; the policy decides
+whether drifts apply (local Docker) or raise (shared cloud sources).
+
+Trust mode (policy=None and expected=None) short-circuits with a WARNING,
+letting a connector migrate to v2 before its baseline is fully modeled
+(Decision #18).
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from enum import Enum
+from typing import TYPE_CHECKING
+
+from ..runner.errors import SourceBaselineDrift
+
+if TYPE_CHECKING:
+    from .types import BaselineSpec, Diff, SourceBaselineEnforcer
+
+logger = logging.getLogger(__name__)
+
+
+class EnforcementMode(Enum):
+    """How a policy reconciles detected source-baseline drift.
+
+    Matches the style of `MatchMode` (also an enum) so the two
+    comparison-lifecycle modes in the framework share one idiom.
+    """
+
+    APPLY = "apply"  # drifts trigger enforcer.apply (mutates the source)
+    CHECK_ONLY = "check_only"  # drifts raise SourceBaselineDrift
+
+
+@dataclass(frozen=True)
+class EnforcementPolicy:
+    """Binds an enforcer to a mode.
+
+    APPLY:      drifts trigger enforcer.apply (mutates the source).
+                Default for local Docker-backed connectors.
+    CHECK_ONLY: drifts raise SourceBaselineDrift.
+                Default for shared cloud sources — never mutate.
+    """
+
+    enforcer: SourceBaselineEnforcer
+    mode: EnforcementMode = EnforcementMode.APPLY
+
+
+def ensure_baseline(
+    policy: EnforcementPolicy | None,
+    expected: BaselineSpec | None,
+    *,
+    connector_name: str,
+) -> None:
+    """Three-phase lifecycle with trust-mode short-circuit.
+
+    Trust mode: policy or expected is None → log a warning, do nothing.
+    Lets a connector migrate to v2 before its baseline is declared.
+
+    Otherwise: introspect → compare → apply or raise:
+      - no drifts → log and return
+      - drifts + CHECK_ONLY → raise SourceBaselineDrift listing each drift.
+        The exception message tells the operator to re-run locally with
+        APPLY against a dedicated database — the standalone apply CLI
+        considered in the v2 design was deferred to the first cloud
+        connector (see `project-cloud-baseline-recovery-deferred.md`).
+      - drifts + APPLY → call enforcer.apply(drifts)
+    """
+    if policy is None or expected is None:
+        logger.warning(
+            "[%s] running in TRUST MODE — no source baseline enforced. Source state is assumed correct.",
+            connector_name,
+        )
+        return
+
+    drifts = policy.enforcer.compare(expected)
+
+    if not drifts:
+        logger.info("[%s] source baseline in sync", connector_name)
+        return
+
+    if policy.mode is EnforcementMode.CHECK_ONLY:
+        raise SourceBaselineDrift(
+            f"[{connector_name}] baseline drift detected ({len(drifts)} items):\n"
+            f"{_render_drift_list(drifts)}\n\n"
+            f"This connector runs in check_only mode — baselines must be applied "
+            f"out-of-band (e.g., re-run the test suite locally against this source "
+            f"with EnforcementMode.APPLY on a dedicated DB). Contact the connector owner if unsure."
+        )
+
+    logger.info("[%s] applying %d baseline drift fixes", connector_name, len(drifts))
+    policy.enforcer.apply(drifts)
+
+
+def _render_drift_list(drifts: list[Diff]) -> str:
+    """Inline renderer for a drift list inside the check-only error message.
+
+    Uses Diff's own `__str__` so source-side and OM-side error output share
+    the same `  path:\\n    expected: X\\n    actual: Y` shape — consistent
+    reading across both failure surfaces.
+    """
+    return "\n".join(str(d) for d in drifts)
diff --git a/ingestion/tests/cli_e2e_v2/core/source/sql.py b/ingestion/tests/cli_e2e_v2/core/source/sql.py
new file mode 100644
index 000000000000..6d5402e8109d
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/source/sql.py
@@ -0,0 +1,90 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""SQL-family baseline types.
+
+`SqlSourceBaseline` carries a SQLAlchemy `MetaData` (tables + columns + FKs +
+comments) plus companion data for things Core doesn't model: seed rows,
+view definitions, stored procedures. DDL is emitted by
+`metadata.create_all(conn)` in the enforcer; seed INSERTs are dialect-specific
+(the `TableSeed.insert_sql` template is supplied by each connector's
+baseline).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+from .types import BaselineSpec
+
+if TYPE_CHECKING:
+    from sqlalchemy import MetaData
+
+
+@dataclass(frozen=True)
+class TableSeed:
+    """Deterministic seed rows for a baseline table.
+
+    `rows` is portable data (list of dicts). `insert_sql` is a
+    dialect-specific template with `:key` placeholders that SQLAlchemy binds
+    against each row via executemany — this is where idempotent upsert
+    clauses live (MySQL `ON DUPLICATE KEY UPDATE`, Postgres `ON CONFLICT DO
+    UPDATE`, etc.). The base enforcer runs `insert_sql` against `rows`
+    without knowing the dialect.
+
+    `expected_row_count` is derived — `len(rows)` — so the seed spec has
+    one source of truth.
+    """
+
+    table_name: str
+    rows: list[dict[str, Any]]
+    insert_sql: str
+
+    @property
+    def expected_row_count(self) -> int:
+        return len(self.rows)
+
+
+@dataclass(frozen=True)
+class ViewDefinition:
+    """A single expected view.
+
+    `definition_sql` is executed verbatim at apply time — baselines supply a
+    CREATE OR REPLACE VIEW (or dialect equivalent) statement.
+    """
+
+    schema: str
+    name: str
+    definition_sql: str
+
+
+@dataclass(frozen=True)
+class StoredProcedureDefinition:
+    """A single expected stored procedure.
+
+    Dialect-specific: MySQL drops + creates (no CREATE OR REPLACE PROCEDURE);
+    Postgres uses CREATE OR REPLACE PROCEDURE. The enforcer subclass owns
+    the dialect DDL; `definition_sql` carries the body as supplied by the
+    baseline.
+    """
+
+    schema: str
+    name: str
+    definition_sql: str
+
+
+@dataclass(frozen=True)
+class SqlSourceBaseline(BaselineSpec):
+    """Top-level declarative spec for a SQL-based source.
+
+    `metadata` holds the table DDL via SQLAlchemy Core — one source of truth
+    for column types, nullability, primary keys, foreign keys, and comments.
+    Seeds / views / stored procedures live alongside as companion data.
+    """
+
+    schemas: list[str]
+    metadata: MetaData
+    seeds: list[TableSeed] = field(default_factory=list)
+    views: list[ViewDefinition] = field(default_factory=list)
+    stored_procedures: list[StoredProcedureDefinition] = field(default_factory=list)
diff --git a/ingestion/tests/cli_e2e_v2/core/source/sql_enforcer.py b/ingestion/tests/cli_e2e_v2/core/source/sql_enforcer.py
new file mode 100644
index 000000000000..92698a3d9744
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/source/sql_enforcer.py
@@ -0,0 +1,301 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Dialect-agnostic SQL baseline enforcer via SQLAlchemy Inspector + Core.
+
+Introspection goes through `sqlalchemy.inspect(conn)` — dialect-agnostic.
+DDL emission goes through `metadata.create_all(conn)` — also dialect-aware
+via SQLAlchemy Core. Seeds apply via a dialect-specific INSERT template
+carried on each `TableSeed`, so the base enforcer runs them without
+knowing the dialect. Stored procedures and their listing query stay
+subclass responsibility (SQLAlchemy doesn't model SPs uniformly).
+"""
+
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, TypedDict
+
+from sqlalchemy import bindparam, inspect, text
+
+from .sql import (
+    SqlSourceBaseline,
+    StoredProcedureDefinition,
+    TableSeed,
+    ViewDefinition,
+)
+from .types import BaselineSpec, Diff, DiffKind
+
+if TYPE_CHECKING:
+    from sqlalchemy.engine import Connection, Engine
+    from sqlalchemy.schema import Table
+
+logger = logging.getLogger(__name__)
+
+
+class _TableSnapshot(TypedDict):
+    """Per-table metadata collected by the Inspector snapshot."""
+
+    columns: dict[str, dict[str, Any]]
+
+
+class _SqlSnapshot(TypedDict):
+    """Typed shape of `_snapshot()`'s return payload.
+
+    Lets the `_diff_*` methods take a real type (not `dict`) so typos like
+    `state["tabels"]` are caught by the type checker rather than silently
+    at runtime.
+    """
+
+    schemas: set[str]
+    tables: dict[tuple[str, str], _TableSnapshot]
+    views: set[tuple[str, str]]
+    stored_procedures: set[tuple[str, str]]
+
+
+_TYPE_ALIASES: dict[str, str] = {
+    "INTEGER": "INT",
+    "NUMERIC": "DECIMAL",
+}
+
+_INTEGER_TYPES: frozenset[str] = frozenset({"TINYINT", "SMALLINT", "MEDIUMINT", "INT", "BIGINT"})
+
+
+class SqlBaselineEnforcer(ABC):
+    """SQL-family SourceBaselineEnforcer via SQLAlchemy Inspector + Core.
+
+    Subclasses customize only:
+      - `_stored_procedure_query_sql`: raw SQL returning `(schema, name)`
+        rows for procedures; binds a `:schemas` IN-list (expanding).
+      - `_apply_stored_procedure(conn, sp)`: dialect-specific procedure DDL.
+        Required override (abstract) — subclasses without stored procedures
+        in their baseline can implement as a `pass` no-op.
+      - `_apply_view` default runs `view.definition_sql` verbatim — override
+        only if the dialect needs special plumbing.
+
+    Tables, columns, FKs, comments, and PK come from the baseline's
+    SQLAlchemy `MetaData` — `metadata.create_all(conn)` emits the right
+    DDL per dialect. Seed INSERTs are dialect-specific templates on each
+    `TableSeed`, bound against the (portable) row data at apply time.
+
+    Marking `_apply_stored_procedure` abstract surfaces missing overrides at
+    enforcer instantiation (fixture setup) rather than at first SP-apply
+    inside a running test, where the failure context is harder to triage.
+    """
+
+    _stored_procedure_query_sql: str | None = None
+
+    def __init__(self, engine: Engine, baseline: SqlSourceBaseline) -> None:
+        self._engine = engine
+        self._baseline = baseline
+
+    # --- internal snapshot ----------------------------------------------
+
+    def _snapshot(self, conn: Connection) -> _SqlSnapshot:
+        inspector = inspect(conn)
+        wanted = set(self._baseline.schemas)
+        logger.debug("[sql] snapshotting schemas=%s", sorted(wanted))
+
+        schemas = {s for s in inspector.get_schema_names() if s in wanted}
+
+        tables: dict[tuple[str, str], _TableSnapshot] = {}
+        for schema in schemas:
+            for table in inspector.get_table_names(schema=schema):
+                pk_cols = set(inspector.get_pk_constraint(table, schema=schema).get("constrained_columns", []))
+                tables[(schema, table)] = {
+                    "columns": {
+                        col["name"]: {
+                            "sql_type": str(col["type"]).upper(),
+                            "nullable": col["nullable"],
+                            "primary_key": col["name"] in pk_cols,
+                        }
+                        for col in inspector.get_columns(table, schema=schema)
+                    }
+                }
+
+        views = {(s, v) for s in schemas for v in inspector.get_view_names(schema=s)}
+
+        stored_procedures = self._query_stored_procedures(conn, schemas)
+
+        return {
+            "schemas": schemas,
+            "tables": tables,
+            "views": views,
+            "stored_procedures": stored_procedures,
+        }
+
+    def _query_stored_procedures(self, conn: Connection, schemas: set[str]) -> set[tuple[str, str]]:
+        if not self._stored_procedure_query_sql or not schemas:
+            return set()
+        query = text(self._stored_procedure_query_sql).bindparams(bindparam("schemas", expanding=True))
+        return {(row[0], row[1]) for row in conn.execute(query, {"schemas": sorted(schemas)})}
+
+    # --- compare --------------------------------------------------------
+
+    def compare(self, expected: BaselineSpec) -> list[Diff]:
+        assert isinstance(expected, SqlSourceBaseline), f"expected SqlSourceBaseline, got {type(expected).__name__}"
+        if not expected.schemas:
+            return []
+
+        drifts: list[Diff] = []
+        with self._engine.connect() as conn:
+            state = self._snapshot(conn)
+            drifts.extend(self._diff_schemas(expected, state))
+            drifts.extend(self._diff_tables(expected, state))
+            drifts.extend(self._diff_seeds(expected, state, conn))
+            drifts.extend(self._diff_views(expected, state))
+            drifts.extend(self._diff_stored_procedures(expected, state))
+
+        logger.debug("[sql] compare produced %d drifts", len(drifts))
+        return drifts
+
+    @staticmethod
+    def _diff_schemas(expected: SqlSourceBaseline, state: _SqlSnapshot) -> list[Diff]:
+        return [Diff(path=f"schema[{s}]", kind=DiffKind.MISSING) for s in expected.schemas if s not in state["schemas"]]
+
+    def _diff_tables(self, expected: SqlSourceBaseline, state: _SqlSnapshot) -> list[Diff]:
+        drifts: list[Diff] = []
+        actual_tables = state["tables"]
+        for tbl in expected.metadata.sorted_tables:
+            fqn = tbl.fullname
+            actual_tbl = actual_tables.get((tbl.schema, tbl.name))
+            if actual_tbl is None:
+                drifts.append(Diff(path=f"table[{fqn}]", kind=DiffKind.MISSING))
+                continue
+            drifts.extend(self._diff_columns(tbl, actual_tbl["columns"], fqn))
+        return drifts
+
+    @staticmethod
+    def _diff_columns(tbl: Table, actual_cols: dict[str, dict[str, Any]], fqn: str) -> list[Diff]:
+        drifts: list[Diff] = []
+        for col in tbl.columns:
+            actual_col = actual_cols.get(col.name)
+            col_path = f"table[{fqn}].column[{col.name}]"
+            if actual_col is None:
+                drifts.append(Diff(path=col_path, kind=DiffKind.MISSING))
+                continue
+            expected_type_str = str(col.type).upper()
+            if _normalize_type(actual_col["sql_type"]) != _normalize_type(expected_type_str):
+                drifts.append(
+                    Diff(
+                        path=f"{col_path}.type",
+                        expected=expected_type_str,
+                        actual=actual_col["sql_type"],
+                    )
+                )
+            if actual_col["primary_key"] != col.primary_key:
+                drifts.append(
+                    Diff(
+                        path=f"{col_path}.primary_key",
+                        expected=col.primary_key,
+                        actual=actual_col["primary_key"],
+                    )
+                )
+        return drifts
+
+    def _diff_seeds(self, expected: SqlSourceBaseline, state: _SqlSnapshot, conn: Connection) -> list[Diff]:
+        """Compare seed row counts for tables that already exist.
+
+        Skips seeds whose target table isn't in the snapshot — the missing
+        table is already flagged by `_diff_tables`, and issuing COUNT(*)
+        against a nonexistent table (or schema) would raise. The apply()
+        pass creates the tables + seeds them; next compare() can then
+        verify row counts.
+        """
+        drifts: list[Diff] = []
+        actual_tables = state["tables"]
+        schema = expected.metadata.schema
+        for seed in expected.seeds:
+            if (schema, seed.table_name) not in actual_tables:
+                continue
+            fqn = self._seed_fqn(seed)
+            count = conn.execute(text(f"SELECT COUNT(*) FROM {fqn}")).scalar_one()
+            if count != seed.expected_row_count:
+                drifts.append(
+                    Diff(
+                        path=f"table[{fqn}].seed.row_count",
+                        expected=seed.expected_row_count,
+                        actual=count,
+                    )
+                )
+        return drifts
+
+    @staticmethod
+    def _diff_views(expected: SqlSourceBaseline, state: _SqlSnapshot) -> list[Diff]:
+        return [
+            Diff(path=f"view[{v.schema}.{v.name}]", kind=DiffKind.MISSING)
+            for v in expected.views
+            if (v.schema, v.name) not in state["views"]
+        ]
+
+    @staticmethod
+    def _diff_stored_procedures(expected: SqlSourceBaseline, state: _SqlSnapshot) -> list[Diff]:
+        return [
+            Diff(path=f"procedure[{sp.schema}.{sp.name}]", kind=DiffKind.MISSING)
+            for sp in expected.stored_procedures
+            if (sp.schema, sp.name) not in state["stored_procedures"]
+        ]
+
+    # --- apply orchestration --------------------------------------------
+
+    def apply(self, drifts: list[Diff]) -> None:
+        logger.debug("[sql] applying %d drifts", len(drifts))
+        with self._engine.begin() as conn:
+            for schema_name in self._baseline.schemas:
+                conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {schema_name}"))
+            # metadata.create_all emits CREATE TABLE IF NOT EXISTS + FKs +
+            # column comments + table comments in the engine's dialect.
+            self._baseline.metadata.create_all(conn)
+            for seed in self._baseline.seeds:
+                self._apply_seed(conn, seed)
+            for view in self._baseline.views:
+                self._apply_view(conn, view)
+            for sp in self._baseline.stored_procedures:
+                self._apply_stored_procedure(conn, sp)
+
+    def _apply_seed(self, conn: Connection, seed: TableSeed) -> None:
+        fqn = self._seed_fqn(seed)
+        count = conn.execute(text(f"SELECT COUNT(*) FROM {fqn}")).scalar_one()
+        if count == seed.expected_row_count:
+            return
+        logger.info(
+            "[seed] %s: inserting (current=%d, expected=%d)",
+            fqn,
+            count,
+            seed.expected_row_count,
+        )
+        conn.execute(text(seed.insert_sql), seed.rows)
+
+    def _seed_fqn(self, seed: TableSeed) -> str:
+        schema = self._baseline.metadata.schema
+        return f"{schema}.{seed.table_name}" if schema else seed.table_name
+
+    @staticmethod
+    def _apply_view(conn: Connection, view: ViewDefinition) -> None:
+        """Default: run `view.definition_sql` verbatim."""
+        conn.execute(text(view.definition_sql))
+
+    @abstractmethod
+    def _apply_stored_procedure(self, conn: Connection, sp: StoredProcedureDefinition) -> None:
+        """Dialect-specific procedure DDL. Implement as a `pass` no-op
+        if the connector's baseline declares no stored procedures."""
+
+
+def _normalize_type(t: str) -> str:
+    """Canonicalize a SQL native-type string for cross-dialect comparison.
+
+    - upper case
+    - strip `UNSIGNED`
+    - collapse whitespace, including "DECIMAL(10, 2)" -> "DECIMAL(10,2)"
+    - strip single quotes (enum/set members: "ENUM('a','b')" -> "ENUM(A,B)")
+    - alias INTEGER -> INT, NUMERIC -> DECIMAL
+    - drop display width for integer family (INT(11) -> INT)
+    """
+    raw = " ".join(t.upper().replace("UNSIGNED", "").split())
+    raw = raw.replace(", ", ",").replace("'", "")
+    head, paren, rest = raw.partition("(")
+    head = _TYPE_ALIASES.get(head, head)
+    if head in _INTEGER_TYPES:
+        return head
+    return f"{head}({rest}" if paren else head
diff --git a/ingestion/tests/cli_e2e_v2/core/source/types.py b/ingestion/tests/cli_e2e_v2/core/source/types.py
new file mode 100644
index 000000000000..6fe1e717be71
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/core/source/types.py
@@ -0,0 +1,87 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Protocol and base types for source baseline enforcement.
+
+Per Decision #18 of the v2 spec, baseline enforcement is a compare-then-apply
+lifecycle that's uniform across source families (SQL, Dashboard, Pipeline).
+MVP ships only the SQL family; the Protocol is defined here so future families
+plug in without rework.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Protocol
+
+
+class BaselineSpec:
+    """Marker base for family-specific baseline specs (SqlSourceBaseline, etc.).
+
+    Deliberately minimal — subclasses carry the real declarative shape. This
+    class exists so the orchestrator can type `expected: BaselineSpec` without
+    depending on any specific family module.
+    """
+
+
+class DiffKind(Enum):
+    """Why a `Diff` was produced.
+
+    Replaces brittle string sentinels (``expected="present", actual="missing"``)
+    with a typed discriminator. Lets downstream code filter diffs by kind
+    (``[d for d in diffs if d.kind is DiffKind.MISSING]``) without re-parsing
+    the human-readable expected/actual fields, and lets the renderer pick a
+    one-liner vs. expected/actual block per kind.
+    """
+
+    MISSING = "missing"  # entity declared expected, not found in actual
+    UNEXPECTED = "unexpected"  # STRICT mode: actual entity not in expected set
+    VALUE_MISMATCH = "value"  # both sides present, a field differs
+
+
+@dataclass(frozen=True)
+class Diff:
+    """One path-qualified discrepancy between expected and actual.
+
+    Used for both source-side baseline drift (schema / tables / seeds)
+    and OM-side catalog diffing (service / database / schema / table /
+    column). Path uses bracket notation — `schema[e2e].table[users].column
+    [email].type` — so failure output from either domain is scannable by
+    eye and sortable for grouping.
+
+    `expected` / `actual` are the human-readable values for VALUE_MISMATCH
+    kinds; for MISSING / UNEXPECTED they are usually omitted (the kind
+    itself carries the meaning). `__str__` renders accordingly.
+    """
+
+    path: str
+    kind: DiffKind = DiffKind.VALUE_MISMATCH
+    expected: Any = None
+    actual: Any = None
+
+    def __str__(self) -> str:
+        if self.kind is DiffKind.MISSING:
+            return f"  {self.path}: missing"
+        if self.kind is DiffKind.UNEXPECTED:
+            extra = f" ({self.actual!r})" if self.actual is not None else ""
+            return f"  {self.path}: unexpected{extra}"
+        return f"  {self.path}:\n    expected: {self.expected!r}\n    actual:   {self.actual!r}"
+
+
+class SourceBaselineEnforcer(Protocol):
+    """Compare-then-apply lifecycle implemented per connector family.
+
+    Enforcers are constructed by the per-connector baseline module (e.g.,
+    `<connector>/baseline.py`) and handed to the orchestrator via an
+    EnforcementPolicy. The orchestrator calls `compare` first; if drifts
+    are returned and the policy mode is APPLY, it then calls `apply`.
+
+    Implementations are free to do their own internal snapshotting — the
+    framework doesn't prescribe a separate "introspect" phase. Engine-
+    specific state caching belongs inside the enforcer.
+    """
+
+    def compare(self, expected: BaselineSpec) -> list[Diff]: ...
+
+    def apply(self, drifts: list[Diff]) -> None: ...
diff --git a/ingestion/tests/cli_e2e_v2/meta/__init__.py b/ingestion/tests/cli_e2e_v2/meta/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/meta/test_differ.py b/ingestion/tests/cli_e2e_v2/meta/test_differ.py
new file mode 100644
index 000000000000..925aa7378b1c
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/meta/test_differ.py
@@ -0,0 +1,420 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Meta-tests: prove StructuralDiffer detects each documented failure mode.
+
+These run synthetically against a stub OM client — no testcontainers, no
+network. They are the safety net that catches regressions in the differ
+itself: if a real connector test ever silently passes when OM diverges
+from Expected, one of these will already have failed in CI.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from metadata.generated.schema.entity.data.database import Database
+from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
+from metadata.generated.schema.entity.data.storedProcedure import StoredProcedure
+from metadata.generated.schema.entity.data.table import DataType, Table
+from metadata.generated.schema.entity.services.databaseService import (
+    DatabaseService,
+    DatabaseServiceType,
+)
+
+from ..core.expected.differ import StructuralMismatch, assert_service_matches
+from ..core.expected.types import (
+    ExpectedColumn,
+    ExpectedDatabase,
+    ExpectedSchema,
+    ExpectedService,
+    ExpectedStoredProcedure,
+    ExpectedTable,
+    MatchMode,
+)
+from ..core.fluent.om_client import OmClient
+from ..core.source.types import DiffKind
+
+# --------------------------------------------------------------------------- #
+# Stubs                                                                       #
+# --------------------------------------------------------------------------- #
+
+
+class _FakeOM:
+    """Minimal OpenMetadata stand-in.
+
+    Stores canned `get_by_name` responses keyed on `(entity_cls, fqn)` and
+    canned `list_all_entities` responses keyed on `(entity_cls, parent_key, parent_value)`.
+    Anything not registered returns None / [].
+    """
+
+    def __init__(self) -> None:
+        self.entities: dict[tuple[type, str], Any] = {}
+        self.listings: dict[tuple[type, str, str], list] = {}
+
+    def register(self, entity_cls: type, fqn: str, value: Any) -> None:
+        self.entities[(entity_cls, fqn)] = value
+
+    def register_list(self, entity_cls: type, parent_key: str, parent_value: str, items: list) -> None:
+        self.listings[(entity_cls, parent_key, parent_value)] = items
+
+    # --- OpenMetadata API surface used by the differ -----------------------
+
+    def get_by_name(self, *, entity, fqn, fields=None, include=None):
+        return self.entities.get((entity, fqn))
+
+    def list_all_entities(self, *, entity, params, limit=1000):
+        (parent_key, parent_value) = next(iter(params.items()))
+        return self.listings.get((entity, parent_key, parent_value), [])
+
+
+def _stub(**kwargs: Any) -> SimpleNamespace:
+    """Build a SimpleNamespace with given attributes — drop-in for Pydantic
+    entities the differ reads via attribute access. Defaults cover the
+    fields touched by the differ for any entity type."""
+    defaults = {
+        "tags": [],
+        "owners": [],
+        "columns": [],
+        "description": None,
+        "deleted": False,
+    }
+    return SimpleNamespace(**{**defaults, **kwargs})
+
+
+def _column(name: str, data_type: DataType, **extra: Any) -> SimpleNamespace:
+    return _stub(name=name, dataType=data_type, constraint=None, **extra)
+
+
+SERVICE_FQN = "svc"
+DB_FQN = "svc.default"
+SCHEMA_FQN = "svc.default.e2e"
+
+
+def _seed_happy_path(fake: _FakeOM, expected: ExpectedService) -> None:
+    """Register OM responses that exactly match `expected` so the differ
+    sees zero drift. Negative tests build on top of this by overwriting
+    one entry to introduce a single, isolated discrepancy."""
+    fake.register(DatabaseService, expected.name, _stub(serviceType=expected.service_type))
+    for db in expected.databases:
+        db_fqn = f"{expected.name}.{db.name}"
+        fake.register(Database, db_fqn, _stub(name=db.name))
+        for schema in db.schemas:
+            schema_fqn = f"{db_fqn}.{schema.name}"
+            fake.register(DatabaseSchema, schema_fqn, _stub(name=schema.name))
+            for table in schema.tables:
+                fake.register(
+                    Table,
+                    f"{schema_fqn}.{table.name}",
+                    _stub(
+                        name=table.name,
+                        columns=[_column(c.name, c.data_type) for c in table.columns],
+                    ),
+                )
+            for sp in schema.stored_procedures:
+                fake.register(StoredProcedure, f"{schema_fqn}.{sp.name}", _stub(name=sp.name))
+
+
+def _baseline_expected() -> ExpectedService:
+    """Reference Expected tree the negative tests perturb."""
+    return ExpectedService(
+        name="svc",
+        service_type=DatabaseServiceType.Mysql,
+        databases=[
+            ExpectedDatabase(
+                name="default",
+                schemas=[
+                    ExpectedSchema(
+                        name="e2e",
+                        tables=[
+                            ExpectedTable(
+                                name="customers",
+                                columns=[
+                                    ExpectedColumn("id", DataType.BIGINT),
+                                    ExpectedColumn("email", DataType.VARCHAR),
+                                ],
+                            ),
+                            ExpectedTable(
+                                name="transactions",
+                                columns=[ExpectedColumn("id", DataType.BIGINT)],
+                            ),
+                        ],
+                        stored_procedures=[ExpectedStoredProcedure("sp_count")],
+                    )
+                ],
+            )
+        ],
+    )
+
+
+def _client(fake: _FakeOM) -> OmClient:
+    return OmClient(fake)  # type: ignore[arg-type]
+
+
+# --------------------------------------------------------------------------- #
+# Happy path — the differ should NOT raise when OM matches Expected.          #
+# --------------------------------------------------------------------------- #
+
+
+def test_happy_path_no_diffs() -> None:
+    expected = _baseline_expected()
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    assert_service_matches(expected, _client(fake))
+
+
+# --------------------------------------------------------------------------- #
+# Each parametrize row injects ONE corruption and asserts it is caught.       #
+# `mutate(fake)` mutates the registered actuals; the Expected tree stays the  #
+# canonical baseline. `expected_path_fragment` is a substring search against  #
+# the rendered StructuralMismatch — looser than DiffKind matching but reads   #
+# closer to the failure message a developer would actually see.               #
+# --------------------------------------------------------------------------- #
+
+
+def _drop(fake: _FakeOM, entity_cls: type, fqn: str) -> None:
+    fake.entities[(entity_cls, fqn)] = None
+
+
+def _patch_table(fake: _FakeOM, fqn: str, **kwargs: Any) -> None:
+    table = fake.entities[(Table, fqn)]
+    for k, v in kwargs.items():
+        setattr(table, k, v)
+
+
+@pytest.mark.parametrize(
+    "label,mutate,expected_kind,path_fragment",
+    [
+        (
+            "missing_service",
+            lambda fake: _drop(fake, DatabaseService, SERVICE_FQN),
+            DiffKind.MISSING,
+            "service[svc]",
+        ),
+        (
+            "missing_database",
+            lambda fake: _drop(fake, Database, DB_FQN),
+            DiffKind.MISSING,
+            "database[default]",
+        ),
+        (
+            "missing_schema",
+            lambda fake: _drop(fake, DatabaseSchema, SCHEMA_FQN),
+            DiffKind.MISSING,
+            "schema[e2e]",
+        ),
+        (
+            "missing_table",
+            lambda fake: _drop(fake, Table, f"{SCHEMA_FQN}.customers"),
+            DiffKind.MISSING,
+            "table[customers]",
+        ),
+        (
+            "missing_stored_procedure",
+            lambda fake: _drop(fake, StoredProcedure, f"{SCHEMA_FQN}.sp_count"),
+            DiffKind.MISSING,
+            "procedure[sp_count]",
+        ),
+        (
+            "missing_column",
+            lambda fake: _patch_table(
+                fake,
+                f"{SCHEMA_FQN}.customers",
+                columns=[_column("id", DataType.BIGINT)],
+            ),
+            DiffKind.MISSING,
+            "column[email]",
+        ),
+        (
+            "wrong_column_type",
+            lambda fake: _patch_table(
+                fake,
+                f"{SCHEMA_FQN}.customers",
+                columns=[_column("id", DataType.INT), _column("email", DataType.VARCHAR)],
+            ),
+            DiffKind.VALUE_MISMATCH,
+            "column[id].dataType",
+        ),
+        (
+            "wrong_service_type",
+            lambda fake: setattr(
+                fake.entities[(DatabaseService, SERVICE_FQN)],
+                "serviceType",
+                DatabaseServiceType.Postgres,
+            ),
+            DiffKind.VALUE_MISMATCH,
+            "service[svc].serviceType",
+        ),
+    ],
+    ids=lambda v: v if isinstance(v, str) else "",
+)
+def test_diff_detected(label, mutate, expected_kind, path_fragment) -> None:
+    expected = _baseline_expected()
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    mutate(fake)
+
+    with pytest.raises(StructuralMismatch) as exc_info:
+        assert_service_matches(expected, _client(fake))
+
+    diffs = exc_info.value.diffs
+    assert any(d.kind is expected_kind and path_fragment in d.path for d in diffs), (
+        f"expected a {expected_kind.name} diff containing {path_fragment!r}; got: {diffs!r}"
+    )
+
+
+# --------------------------------------------------------------------------- #
+# Field-level assertions that don't fit the parametrize matrix cleanly        #
+# (each needs additional setup: tags, descriptions, owners).                  #
+# --------------------------------------------------------------------------- #
+
+
+def test_missing_column_tag() -> None:
+    expected = ExpectedService(
+        name="svc",
+        service_type=DatabaseServiceType.Mysql,
+        databases=[
+            ExpectedDatabase(
+                name="default",
+                schemas=[
+                    ExpectedSchema(
+                        name="e2e",
+                        tables=[
+                            ExpectedTable(
+                                name="customers",
+                                columns=[
+                                    ExpectedColumn(
+                                        "email",
+                                        DataType.VARCHAR,
+                                        tags=frozenset({"PII.Sensitive"}),
+                                    ),
+                                ],
+                            )
+                        ],
+                    )
+                ],
+            )
+        ],
+    )
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    # Overwrite the auto-seeded column to drop the tag.
+    fake.entities[(Table, f"{SCHEMA_FQN}.customers")] = _stub(
+        name="customers",
+        columns=[_stub(name="email", dataType=DataType.VARCHAR, constraint=None, tags=[])],
+    )
+
+    with pytest.raises(StructuralMismatch, match=r"column\[email\].tags"):
+        assert_service_matches(expected, _client(fake))
+
+
+def test_missing_table_description() -> None:
+    expected = _baseline_expected()
+    expected = ExpectedService(
+        name=expected.name,
+        service_type=expected.service_type,
+        databases=[
+            ExpectedDatabase(
+                name="default",
+                schemas=[
+                    ExpectedSchema(
+                        name="e2e",
+                        tables=[
+                            ExpectedTable(
+                                name="customers",
+                                columns=[ExpectedColumn("id", DataType.BIGINT)],
+                                description="Customer records",
+                            )
+                        ],
+                    )
+                ],
+            )
+        ],
+    )
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    _patch_table(fake, f"{SCHEMA_FQN}.customers", description="other text")
+
+    with pytest.raises(StructuralMismatch, match=r"table\[customers\].description"):
+        assert_service_matches(expected, _client(fake))
+
+
+def test_missing_owner() -> None:
+    expected = ExpectedService(
+        name="svc",
+        service_type=DatabaseServiceType.Mysql,
+        databases=[
+            ExpectedDatabase(
+                name="default",
+                schemas=[
+                    ExpectedSchema(
+                        name="e2e",
+                        tables=[
+                            ExpectedTable(
+                                name="customers",
+                                columns=[ExpectedColumn("id", DataType.BIGINT)],
+                                owner="alice",
+                            )
+                        ],
+                    )
+                ],
+            )
+        ],
+    )
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    # Default seeded owners is []; assertion requires "alice" → diff fires.
+
+    with pytest.raises(StructuralMismatch, match=r"table\[customers\].owner"):
+        assert_service_matches(expected, _client(fake))
+
+
+# --------------------------------------------------------------------------- #
+# STRICT mode catches extras that SUPERSET tolerates.                         #
+# --------------------------------------------------------------------------- #
+
+
+def test_strict_flags_extra_table_unexpected() -> None:
+    expected = _baseline_expected()
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    fake.register_list(
+        Table,
+        "databaseSchema",
+        SCHEMA_FQN,
+        [
+            _stub(name="customers"),
+            _stub(name="transactions"),
+            _stub(name="phantom"),
+        ],
+    )
+
+    # SUPERSET tolerates the extra.
+    assert_service_matches(expected, _client(fake), mode=MatchMode.SUPERSET)
+
+    # STRICT flags it.
+    with pytest.raises(StructuralMismatch, match=r"phantom"):
+        assert_service_matches(expected, _client(fake), mode=MatchMode.STRICT)
+
+
+def test_strict_flags_extra_column() -> None:
+    expected = _baseline_expected()
+    fake = _FakeOM()
+    _seed_happy_path(fake, expected)
+    _patch_table(
+        fake,
+        f"{SCHEMA_FQN}.customers",
+        columns=[
+            _column("id", DataType.BIGINT),
+            _column("email", DataType.VARCHAR),
+            _column("phantom", DataType.VARCHAR),
+        ],
+    )
+
+    assert_service_matches(expected, _client(fake), mode=MatchMode.SUPERSET)
+
+    with pytest.raises(StructuralMismatch, match=r"phantom"):
+        assert_service_matches(expected, _client(fake), mode=MatchMode.STRICT)
diff --git a/ingestion/tests/cli_e2e_v2/meta/test_eventually.py b/ingestion/tests/cli_e2e_v2/meta/test_eventually.py
new file mode 100644
index 000000000000..19cf28f05d82
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/meta/test_eventually.py
@@ -0,0 +1,145 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Meta-tests: prove the eventually polling primitives behave correctly.
+
+retry_until is the foundation of every fluent `.eventually()` chain in
+the framework — a regression here silently turns flaky-but-eventually-
+correct ingestion into spurious test passes (or false failures).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from ..core.fluent.eventually import EventuallyRunner, retry_until
+
+
+def _attempt_counter():
+    """Return a list whose `len()` is the number of times `check` has been called.
+
+    Mutable container so closures can append on each invocation without a
+    `nonlocal` declaration on every check. Tests inspect the length to
+    assert how many attempts retry_until made.
+    """
+    return []
+
+
+# --------------------------------------------------------------------------- #
+# retry_until — the low-level primitive                                       #
+# --------------------------------------------------------------------------- #
+
+
+def test_retry_until_returns_value_on_first_success() -> None:
+    attempts = _attempt_counter()
+
+    def _check() -> str:
+        attempts.append(None)
+        return "ok"
+
+    assert retry_until(_check, timeout=2, poll_interval=0.01, name="t") == "ok"
+    assert len(attempts) == 1
+
+
+def test_retry_until_retries_until_success() -> None:
+    attempts = _attempt_counter()
+
+    def _check() -> int:
+        attempts.append(None)
+        if len(attempts) < 3:
+            raise AssertionError("not yet")
+        return 42
+
+    assert retry_until(_check, timeout=2, poll_interval=0.01, name="converge") == 42
+    assert len(attempts) == 3
+
+
+def test_retry_until_times_out_with_last_failure() -> None:
+    def _check() -> None:
+        raise AssertionError("specific failure text")
+
+    with pytest.raises(AssertionError, match="specific failure text") as exc_info:
+        retry_until(_check, timeout=0, poll_interval=0.01, name="never")
+
+    msg = str(exc_info.value)
+    assert "timed out" in msg
+    assert "never" in msg
+
+
+def test_retry_until_propagates_non_assertion_errors() -> None:
+    def _check() -> None:
+        raise RuntimeError("hard error")
+
+    with pytest.raises(RuntimeError, match="hard error"):
+        retry_until(_check, timeout=2, poll_interval=0.01, name="t")
+
+
+# --------------------------------------------------------------------------- #
+# EventuallyRunner — the per-assert dispatcher                                #
+# --------------------------------------------------------------------------- #
+
+
+def test_runner_unarmed_runs_sync() -> None:
+    runner = EventuallyRunner()
+    attempts = _attempt_counter()
+
+    def _check() -> str:
+        attempts.append(None)
+        return "value"
+
+    assert runner.run(_check, name="sync") == "value"
+    assert len(attempts) == 1
+
+
+def test_runner_unarmed_propagates_assertion_error_without_retry() -> None:
+    runner = EventuallyRunner()
+    attempts = _attempt_counter()
+
+    def _check() -> None:
+        attempts.append(None)
+        raise AssertionError("immediate")
+
+    with pytest.raises(AssertionError, match="immediate"):
+        runner.run(_check, name="sync")
+    assert len(attempts) == 1
+
+
+def test_runner_armed_retries_until_success() -> None:
+    runner = EventuallyRunner()
+    runner.arm(timeout=2)
+    attempts = _attempt_counter()
+
+    def _check() -> str:
+        attempts.append(None)
+        if len(attempts) < 2:
+            raise AssertionError("not yet")
+        return "done"
+
+    # Note: EventuallyRunner uses retry_until's default poll interval (2s).
+    # We rely on the check converging fast enough that the natural sleep
+    # is acceptable. Two attempts ⇒ one ~2s sleep between them.
+    assert runner.run(_check, name="armed") == "done"
+    assert len(attempts) == 2
+
+
+def test_runner_arming_is_one_shot() -> None:
+    """After a successful armed run, the next call reverts to sync — the
+    timeout is consumed, not sticky. This is the contract that prevents
+    accidental cross-test polling state."""
+    runner = EventuallyRunner()
+    runner.arm(timeout=2)
+
+    def _ok() -> str:
+        return "ok"
+
+    runner.run(_ok, name="first")  # consumes the arm
+
+    attempts = _attempt_counter()
+
+    def _fail_once() -> None:
+        attempts.append(None)
+        raise AssertionError("immediate")
+
+    with pytest.raises(AssertionError, match="immediate"):
+        runner.run(_fail_once, name="second")
+    assert len(attempts) == 1, "second run should have been sync, not retried"
diff --git a/ingestion/tests/cli_e2e_v2/meta/test_fluent.py b/ingestion/tests/cli_e2e_v2/meta/test_fluent.py
new file mode 100644
index 000000000000..5243fb82fd74
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/meta/test_fluent.py
@@ -0,0 +1,296 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Meta-tests: prove the fluent assertion classes raise the right error
+on mismatched OM state — and pass on matching state.
+
+Runs synthetically against a stub OM client. Each test pairs a positive
+case (correct state → no raise) with a negative case (mismatched state →
+AssertionError with a useful message).
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from metadata.generated.schema.entity.data.storedProcedure import StoredProcedure
+from metadata.generated.schema.entity.data.table import (
+    ConstraintType,
+    DataType,
+    Table,
+)
+
+from ..core.fluent.stored_procedure_assert import StoredProcedureAssert
+from ..core.fluent.table_assert import TableAssert
+
+# --------------------------------------------------------------------------- #
+# Stubs                                                                       #
+# --------------------------------------------------------------------------- #
+
+
+class _FakeOM:
+    """Stub for the OpenMetadata client.
+
+    Stores canned entities keyed on `(entity_cls, fqn, include_filter)`. The
+    `include` kwarg distinguishes default `get_by_name` (deleted=False) from
+    `include="all"` used by `is_soft_deleted` / `is_not_deleted` so each
+    test can set the right view independently.
+    """
+
+    def __init__(self) -> None:
+        self.entities: dict[tuple[type, str, str | None], Any] = {}
+
+    def register(self, entity_cls: type, fqn: str, value: Any, *, include: str | None = None) -> None:
+        self.entities[(entity_cls, fqn, include)] = value
+
+    def get_by_name(self, *, entity, fqn, fields=None, include=None):
+        return self.entities.get((entity, fqn, include))
+
+
+def _table(
+    *,
+    columns: list[Any] | None = None,
+    tags: list[str] | None = None,
+    owners: list[str] | None = None,
+    description: str | None = None,
+    constraints: list[Any] | None = None,
+    schema_definition: str | None = None,
+    deleted: bool = False,
+) -> SimpleNamespace:
+    return SimpleNamespace(
+        columns=columns or [],
+        tags=[SimpleNamespace(tagFQN=t) for t in (tags or [])],
+        owners=[SimpleNamespace(name=o) for o in (owners or [])],
+        description=description,
+        tableConstraints=constraints or [],
+        schemaDefinition=schema_definition,
+        deleted=deleted,
+    )
+
+
+def _column(name: str, data_type: DataType, *, tags: list[str] | None = None, description: str | None = None):
+    return SimpleNamespace(
+        name=name,
+        dataType=data_type,
+        tags=[SimpleNamespace(tagFQN=t) for t in (tags or [])],
+        description=description,
+    )
+
+
+def _fk(column: str, ref_table: str, ref_column: str) -> SimpleNamespace:
+    return SimpleNamespace(
+        constraintType=ConstraintType.FOREIGN_KEY,
+        columns=[column],
+        referredColumns=[f"{ref_table}.{ref_column}"],
+    )
+
+
+FQN = "svc.default.e2e.customers"
+
+
+# --------------------------------------------------------------------------- #
+# TableAssert — entity-level terminals                                        #
+# --------------------------------------------------------------------------- #
+
+
+def test_exists_raises_when_entity_missing() -> None:
+    fake = _FakeOM()
+    # Nothing registered → get_by_name returns None → exists() raises.
+    with pytest.raises(AssertionError, match=r"not found"):
+        TableAssert(fake, FQN).exists()
+
+
+def test_exists_passes_when_entity_present() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table())
+    TableAssert(fake, FQN).exists()
+
+
+def test_has_description_containing_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(description="Customer records — primary table"))
+    TableAssert(fake, FQN).has_description_containing("Customer records")
+
+
+def test_has_description_containing_raises_on_mismatch() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(description="something else"))
+    with pytest.raises(AssertionError, match=r"does not contain"):
+        TableAssert(fake, FQN).has_description_containing("Customer records")
+
+
+def test_has_tag_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(tags=["PII.Sensitive"]))
+    TableAssert(fake, FQN).has_tag("PII.Sensitive")
+
+
+def test_has_tag_raises_when_tag_missing() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(tags=["Other.Tag"]))
+    with pytest.raises(AssertionError, match=r"missing tag 'PII.Sensitive'"):
+        TableAssert(fake, FQN).has_tag("PII.Sensitive")
+
+
+def test_has_owner_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(owners=["alice"]))
+    TableAssert(fake, FQN).has_owner("alice")
+
+
+def test_has_owner_raises_when_owner_missing() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(owners=["bob"]))
+    with pytest.raises(AssertionError, match=r"missing owner 'alice'"):
+        TableAssert(fake, FQN).has_owner("alice")
+
+
+def test_has_foreign_key_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(constraints=[_fk("customer_id", "customers", "id")]))
+    TableAssert(fake, FQN).has_foreign_key_constraint("customer_id", "customers", "id")
+
+
+def test_has_foreign_key_raises_when_constraint_absent() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(constraints=[]))
+    with pytest.raises(AssertionError, match=r"missing FOREIGN_KEY"):
+        TableAssert(fake, FQN).has_foreign_key_constraint("customer_id", "customers", "id")
+
+
+def test_has_schema_definition_containing_is_case_insensitive() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(schema_definition="SELECT * FROM customers LEFT JOIN transactions"))
+    # MySQL emits lowercase keywords — assertion's lower-cased substring match handles it.
+    TableAssert(fake, FQN).has_schema_definition_containing("left join")
+
+
+def test_has_schema_definition_containing_raises_on_mismatch() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(schema_definition="SELECT 1"))
+    with pytest.raises(AssertionError, match=r"does not contain"):
+        TableAssert(fake, FQN).has_schema_definition_containing("LEFT JOIN")
+
+
+def test_is_soft_deleted_passes_when_deleted() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(deleted=True), include="all")
+    TableAssert(fake, FQN).is_soft_deleted()
+
+
+def test_is_soft_deleted_raises_when_alive() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(deleted=False), include="all")
+    with pytest.raises(AssertionError, match=r"not soft-deleted"):
+        TableAssert(fake, FQN).is_soft_deleted()
+
+
+def test_is_not_deleted_passes_when_alive() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(deleted=False), include="all")
+    TableAssert(fake, FQN).is_not_deleted()
+
+
+def test_is_not_deleted_raises_when_soft_deleted() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(deleted=True), include="all")
+    with pytest.raises(AssertionError, match=r"unexpectedly soft-deleted"):
+        TableAssert(fake, FQN).is_not_deleted()
+
+
+# --------------------------------------------------------------------------- #
+# ColumnAssert — descended via TableAssert.column(name)                       #
+# --------------------------------------------------------------------------- #
+
+
+def test_column_has_type_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(columns=[_column("id", DataType.BIGINT)]))
+    TableAssert(fake, FQN).column("id").has_type(DataType.BIGINT)
+
+
+def test_column_has_type_raises_on_mismatch() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(columns=[_column("id", DataType.INT)]))
+    with pytest.raises(AssertionError, match=r"has type DataType.INT"):
+        TableAssert(fake, FQN).column("id").has_type(DataType.BIGINT)
+
+
+def test_column_lookup_raises_when_column_missing() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(columns=[_column("id", DataType.BIGINT)]))
+    with pytest.raises(AssertionError, match=r"not found on table"):
+        TableAssert(fake, FQN).column("missing").has_type(DataType.BIGINT)
+
+
+def test_column_has_tag_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(
+        Table,
+        FQN,
+        _table(columns=[_column("email", DataType.VARCHAR, tags=["PII.Sensitive"])]),
+    )
+    TableAssert(fake, FQN).column("email").has_tag("PII.Sensitive")
+
+
+def test_column_has_tag_raises_when_missing() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(columns=[_column("email", DataType.VARCHAR)]))
+    with pytest.raises(AssertionError, match=r"missing tag 'PII.Sensitive'"):
+        TableAssert(fake, FQN).column("email").has_tag("PII.Sensitive")
+
+
+def test_column_has_no_tag_raises_when_unexpectedly_present() -> None:
+    """has_no_tag is the negative complement — guards against
+    over-classification by PII recognizers."""
+    fake = _FakeOM()
+    fake.register(
+        Table,
+        FQN,
+        _table(columns=[_column("id", DataType.BIGINT, tags=["PII.Sensitive"])]),
+    )
+    with pytest.raises(AssertionError, match=r"unexpectedly carries tag 'PII.Sensitive'"):
+        TableAssert(fake, FQN).column("id").has_no_tag("PII.Sensitive")
+
+
+def test_column_has_no_tag_passes_when_absent() -> None:
+    fake = _FakeOM()
+    fake.register(Table, FQN, _table(columns=[_column("id", DataType.BIGINT)]))
+    TableAssert(fake, FQN).column("id").has_no_tag("PII.Sensitive")
+
+
+# --------------------------------------------------------------------------- #
+# StoredProcedureAssert.has_code_containing                                   #
+# --------------------------------------------------------------------------- #
+
+
+def _sp(*, code: str | None) -> SimpleNamespace:
+    return SimpleNamespace(
+        storedProcedureCode=SimpleNamespace(code=code) if code is not None else None,
+    )
+
+
+def test_sp_has_code_containing_passes_on_match() -> None:
+    fake = _FakeOM()
+    fake.register(StoredProcedure, "svc.default.e2e.sp_count", _sp(code="SELECT COUNT(*) FROM customers"))
+    StoredProcedureAssert(fake, "svc.default.e2e.sp_count").has_code_containing("SELECT COUNT(*)")
+
+
+def test_sp_has_code_containing_raises_on_empty_body() -> None:
+    """The exact regression that motivated `SHOW_ROUTINE` in conftest.py:
+    body returns empty string when the OM connector lacks routine-read
+    privilege."""
+    fake = _FakeOM()
+    fake.register(StoredProcedure, "svc.default.e2e.sp_count", _sp(code=""))
+    with pytest.raises(AssertionError, match=r"code does not contain 'SELECT COUNT"):
+        StoredProcedureAssert(fake, "svc.default.e2e.sp_count").has_code_containing("SELECT COUNT(*)")
+
+
+def test_sp_has_code_containing_raises_on_missing_body() -> None:
+    fake = _FakeOM()
+    fake.register(StoredProcedure, "svc.default.e2e.sp_count", _sp(code=None))
+    with pytest.raises(AssertionError, match=r"code does not contain"):
+        StoredProcedureAssert(fake, "svc.default.e2e.sp_count").has_code_containing("BEGIN")
diff --git a/ingestion/tests/cli_e2e_v2/mysql/__init__.py b/ingestion/tests/cli_e2e_v2/mysql/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/ingestion/tests/cli_e2e_v2/mysql/baseline.py b/ingestion/tests/cli_e2e_v2/mysql/baseline.py
new file mode 100644
index 000000000000..e45d2eb3d2d0
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/mysql/baseline.py
@@ -0,0 +1,298 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""MySQL source baseline — common portable tables + MySQL-specific all_types.
+
+Structure:
+  - Portable tables (customers, transactions) + their seed rows come from
+    `core/source/common_baseline.py`.
+  - MySQL adds a dialect-specific `all_types` table exercising every native
+    type the connector maps (TINYINT / MEDIUMINT / TEXT variants / blobs /
+    BIT / ENUM / SET etc.). Seed is trivial — id=1..3 with everything else
+    NULL; tests only assert on row count + type mappings.
+  - INSERT templates carry MySQL's `ON DUPLICATE KEY UPDATE` idempotency;
+    the base enforcer binds them against common row data via executemany.
+  - One view + one stored procedure for lineage and SP-ingestion coverage.
+
+Schema evolution caveat:
+  metadata.create_all uses CREATE TABLE IF NOT EXISTS — no ALTER migration.
+  When baseline shape changes (column add/drop, FK, comments), drop first:
+      DROP SCHEMA IF EXISTS e2e;
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Any
+
+from sqlalchemy import (
+    BigInteger,
+    Column,
+    Date,
+    DateTime,
+    Float,
+    Integer,
+    MetaData,
+    Numeric,
+    SmallInteger,
+    Table,
+    Time,
+    create_engine,
+)
+from sqlalchemy.dialects import mysql
+from sqlalchemy.engine import URL, Engine
+
+from ..core.config.env import Env
+from ..core.source.common_baseline import (
+    COMMON_CUSTOMER_ROWS,
+    COMMON_TRANSACTION_ROWS,
+    build_common_metadata,
+)
+from ..core.source.orchestrator import EnforcementMode, EnforcementPolicy
+from ..core.source.sql import (
+    SqlSourceBaseline,
+    StoredProcedureDefinition,
+    TableSeed,
+    ViewDefinition,
+)
+from .enforcer import MySqlEnforcer
+
+# -----------------------------------------------------------------------------
+# all_types — MySQL-specific native types (exercises connector type mapping)
+# -----------------------------------------------------------------------------
+
+
+def _declare_all_types(md: MetaData) -> Table:
+    return Table(
+        "all_types",
+        md,
+        Column("id", Integer, primary_key=True, nullable=False),
+        Column("tiny_int_col", mysql.TINYINT, nullable=True),
+        Column("small_int_col", SmallInteger, nullable=True),
+        Column("medium_int_col", mysql.MEDIUMINT, nullable=True),
+        Column("int_col", Integer, nullable=True),
+        Column("big_int_col", BigInteger, nullable=True),
+        Column("float_col", Float, nullable=True),
+        Column("double_col", mysql.DOUBLE, nullable=True),
+        Column("decimal_col", Numeric(10, 2), nullable=True),
+        Column("char_col", mysql.CHAR(10), nullable=True),
+        Column("varchar_col", mysql.VARCHAR(255), nullable=True),
+        Column("tinytext_col", mysql.TINYTEXT, nullable=True),
+        Column("text_col", mysql.TEXT, nullable=True),
+        Column("mediumtext_col", mysql.MEDIUMTEXT, nullable=True),
+        Column("longtext_col", mysql.LONGTEXT, nullable=True),
+        Column("binary_col", mysql.BINARY(16), nullable=True),
+        Column("varbinary_col", mysql.VARBINARY(255), nullable=True),
+        Column("tinyblob_col", mysql.TINYBLOB, nullable=True),
+        Column("blob_col", mysql.BLOB, nullable=True),
+        Column("mediumblob_col", mysql.MEDIUMBLOB, nullable=True),
+        Column("longblob_col", mysql.LONGBLOB, nullable=True),
+        Column("date_col", Date, nullable=True),
+        Column("time_col", Time, nullable=True),
+        Column("datetime_col", DateTime, nullable=True),
+        Column("timestamp_col", mysql.TIMESTAMP, nullable=True),
+        Column("year_col", mysql.YEAR, nullable=True),
+        Column("bit_col", mysql.BIT(8), nullable=True),
+        Column("json_col", mysql.JSON, nullable=True),
+        Column("enum_col", mysql.ENUM("alpha", "beta", "gamma"), nullable=True),
+        Column("set_col", mysql.SET("x", "y", "z"), nullable=True),
+    )
+
+
+# all_types seed — one row per id, NULL elsewhere. Tests assert row count
+# and column type mappings, not cell content, so this is sufficient.
+_ALL_TYPES_ROWS: list[dict[str, Any]] = [{"id": 1}, {"id": 2}, {"id": 3}]
+
+
+# -----------------------------------------------------------------------------
+# Dialect-specific INSERT templates (MySQL `ON DUPLICATE KEY UPDATE` idempotency)
+# -----------------------------------------------------------------------------
+
+
+_MYSQL_CUSTOMERS_INSERT = """
+INSERT INTO e2e.customers
+    (id, first_name, last_name, full_name, email,
+     address, city, country, zipcode, date_of_birth, age,
+     credit_score, status, is_active, bio, joined_date)
+VALUES
+    (:id, :first_name, :last_name, :full_name, :email,
+     :address, :city, :country, :zipcode, :date_of_birth, :age,
+     :credit_score, :status, :is_active, :bio, :joined_date)
+ON DUPLICATE KEY UPDATE
+    first_name = VALUES(first_name), last_name = VALUES(last_name),
+    full_name = VALUES(full_name), email = VALUES(email),
+    address = VALUES(address), city = VALUES(city),
+    country = VALUES(country), zipcode = VALUES(zipcode),
+    date_of_birth = VALUES(date_of_birth), age = VALUES(age),
+    credit_score = VALUES(credit_score), status = VALUES(status),
+    is_active = VALUES(is_active), bio = VALUES(bio),
+    joined_date = VALUES(joined_date)
+"""
+
+_MYSQL_TRANSACTIONS_INSERT = """
+INSERT INTO e2e.transactions
+    (id, customer_id, amount, currency, exchange_rate, status,
+     txn_at, reference_number, ip_address, notes)
+VALUES
+    (:id, :customer_id, :amount, :currency, :exchange_rate, :status,
+     :txn_at, :reference_number, :ip_address, :notes)
+ON DUPLICATE KEY UPDATE
+    customer_id = VALUES(customer_id), amount = VALUES(amount),
+    currency = VALUES(currency), exchange_rate = VALUES(exchange_rate),
+    status = VALUES(status), txn_at = VALUES(txn_at),
+    reference_number = VALUES(reference_number),
+    ip_address = VALUES(ip_address), notes = VALUES(notes)
+"""
+
+_MYSQL_ALL_TYPES_INSERT = """
+INSERT INTO e2e.all_types (id) VALUES (:id)
+ON DUPLICATE KEY UPDATE id = VALUES(id)
+"""
+
+
+# -----------------------------------------------------------------------------
+# View + stored procedure (dialect-specific DDL)
+# -----------------------------------------------------------------------------
+
+
+_CUSTOMER_TXN_SUMMARY_VIEW = ViewDefinition(
+    schema="e2e",
+    name="customer_txn_summary",
+    definition_sql="""
+        CREATE OR REPLACE VIEW e2e.customer_txn_summary AS
+        SELECT
+            c.id AS customer_id,
+            c.full_name,
+            c.status AS customer_status,
+            COUNT(t.id) AS txn_count,
+            COALESCE(SUM(t.amount), 0) AS total_amount
+        FROM e2e.customers c
+        LEFT JOIN e2e.transactions t ON c.id = t.customer_id
+        GROUP BY c.id, c.full_name, c.status
+    """,
+)
+
+
+_SP_ACTIVE_CUSTOMER_COUNT = StoredProcedureDefinition(
+    schema="e2e",
+    name="sp_active_customer_count",
+    definition_sql="""
+        CREATE PROCEDURE e2e.sp_active_customer_count()
+        BEGIN
+            SELECT COUNT(*) AS active_count
+            FROM e2e.customers
+            WHERE status = 'active';
+        END
+    """,
+)
+
+
+# A second SP exercising parameterized DML — covers a different code path
+# than the read-only `sp_active_customer_count`. The body intentionally
+# carries an UPDATE statement so OM's stored-procedure ingestion stores
+# DML text, not just SELECT text.
+_SP_UPDATE_CUSTOMER_STATUS = StoredProcedureDefinition(
+    schema="e2e",
+    name="sp_update_customer_status",
+    definition_sql="""
+        CREATE PROCEDURE e2e.sp_update_customer_status(
+            IN p_customer_id INT,
+            IN p_status VARCHAR(20)
+        )
+        BEGIN
+            UPDATE e2e.customers
+            SET status = p_status
+            WHERE id = p_customer_id;
+        END
+    """,
+)
+
+
+# -----------------------------------------------------------------------------
+# Top-level baseline
+# -----------------------------------------------------------------------------
+
+
+def _build_metadata() -> MetaData:
+    """Common portable tables + MySQL-specific all_types."""
+    md = build_common_metadata("e2e")
+    _declare_all_types(md)
+    return md
+
+
+MYSQL_BASELINE = SqlSourceBaseline(
+    schemas=["e2e"],
+    metadata=_build_metadata(),
+    seeds=[
+        TableSeed(
+            table_name="customers",
+            rows=COMMON_CUSTOMER_ROWS,
+            insert_sql=_MYSQL_CUSTOMERS_INSERT,
+        ),
+        TableSeed(
+            table_name="transactions",
+            rows=COMMON_TRANSACTION_ROWS,
+            insert_sql=_MYSQL_TRANSACTIONS_INSERT,
+        ),
+        TableSeed(
+            table_name="all_types",
+            rows=_ALL_TYPES_ROWS,
+            insert_sql=_MYSQL_ALL_TYPES_INSERT,
+        ),
+    ],
+    views=[_CUSTOMER_TXN_SUMMARY_VIEW],
+    stored_procedures=[_SP_ACTIVE_CUSTOMER_COUNT, _SP_UPDATE_CUSTOMER_STATUS],
+)
+
+
+# -----------------------------------------------------------------------------
+# Policy factory
+# -----------------------------------------------------------------------------
+
+
+@lru_cache(maxsize=1)
+def get_admin_engine() -> Engine:
+    """Build (and cache) the SQLAlchemy engine bound to ADMIN credentials.
+
+    Distinct from the ingest credentials `build_mysql_config` uses for the
+    CLI subprocess: the ingest user (`om_user`) is a scoped account whose
+    GRANTs match the OM MySQL connector's documented minimum (SELECT,
+    SHOW VIEW, EXECUTE on the target schema; PROCESS globally). ADMIN
+    credentials are the container's `root` user, which the enforcer needs
+    for CREATE SCHEMA / CREATE TABLE / INSERT / SELECT.
+
+    Tests that need to mutate the source out-of-band (e.g. drop a table
+    to test mark-deleted; create a poisoned view to test error
+    containment) consume this helper directly — keeps engine construction
+    centralized so admin DSN never lives in two places.
+
+    E2E_MYSQL_ADMIN_USER / E2E_MYSQL_ADMIN_PASSWORD / E2E_MYSQL_HOST_PORT
+    are populated automatically by the session-scoped `mysql_container`
+    fixture in conftest.py, which boots a dedicated MySQL via
+    testcontainers and creates the scoped `om_user` post-startup.
+    Teammates do not set these vars manually.
+    """
+    user = Env("E2E_MYSQL_ADMIN_USER", default="root").get()
+    password = Env("E2E_MYSQL_ADMIN_PASSWORD", default="password").get()
+    host_port = Env("E2E_MYSQL_HOST_PORT").get()
+    host, _, port_str = host_port.partition(":")
+    port = int(port_str) if port_str else None
+    url = URL.create(
+        drivername="mysql+pymysql",
+        username=user,
+        password=password,
+        host=host,
+        port=port,
+    )
+    return create_engine(url)
+
+
+@lru_cache(maxsize=1)
+def get_policy() -> EnforcementPolicy:
+    """Lazy-build and cache the MySQL EnforcementPolicy.
+
+    Reuses `get_admin_engine` for engine construction so the admin DSN
+    has a single source of truth.
+    """
+    enforcer = MySqlEnforcer(get_admin_engine(), MYSQL_BASELINE)
+    return EnforcementPolicy(enforcer=enforcer, mode=EnforcementMode.APPLY)
diff --git a/ingestion/tests/cli_e2e_v2/mysql/conftest.py b/ingestion/tests/cli_e2e_v2/mysql/conftest.py
new file mode 100644
index 000000000000..60ec8e4a7107
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/mysql/conftest.py
@@ -0,0 +1,231 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""MySQL-specific pytest fixtures.
+
+Pytest auto-discovers this conftest for tests under `tests/cli_e2e_v2/mysql/`.
+Session-scoped `mysql_container` boots a dedicated MySQL via testcontainers
+(no shared infra dependency, no teammate-managed admin creds), bootstraps
+the `e2e` target schema, and creates a scoped ingest user `om_user` with
+the production-minimum permissions documented for the OpenMetadata MySQL
+connector. Subsequent fixtures consume that container.
+
+Two users live inside the container:
+
+  - `root` (testcontainers default) — used by the framework's
+    ``SqlBaselineEnforcer`` to seed and reconcile the ``e2e`` schema
+    (CREATE TABLE / DROP / INSERT / SELECT). Ephemeral and disposable.
+  - ``om_user`` — the scoped ingest account whose GRANTs match the minimum
+    OM MySQL connector permissions:
+
+        GRANT SELECT, SHOW VIEW, EXECUTE ON e2e.* TO 'om_user'@'%';
+        GRANT PROCESS, SHOW_ROUTINE ON *.* TO 'om_user'@'%';
+
+    Used by the CLI metadata subprocess so ingestion is exercised against
+    a production-realistic privilege set, not against the framework's
+    DDL-capable admin user.
+
+The ``mysql_container`` fixture also populates the ``E2E_MYSQL_*`` environment
+variables so the existing ``Env(key).ref()`` config-builder pattern keeps
+rendering ``${E2E_MYSQL_*}`` placeholders into the workflow YAML — secrets
+never leak to tmp_path even though they are now generated per-session.
+
+Filter tests that need isolated services do NOT use ``mysql_cfg`` or
+``mysql_metadata_ingested`` — they call ``build_mysql_config(mysql_service_name(
+session_uuid, variant="..."), om_server_config)`` directly and run their own
+ingest with the variant filter config.
+
+Depends on ``session_uuid``, ``om_server_config``, and ``registered_services``
+fixtures from the top-level conftest.py.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import TYPE_CHECKING
+
+import pytest
+from sqlalchemy import create_engine, text
+from testcontainers.mysql import MySqlContainer
+
+from ..core.config.pipelines import MetadataPipeline
+from ..core.fixtures import metadata_ingest_once, run_source_baseline
+from .baseline import MYSQL_BASELINE, get_admin_engine, get_policy
+from .connector import build_mysql_config, mysql_service_name
+from .expected import mysql_expected
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Generator
+
+    from sqlalchemy.engine import Engine
+
+    from ..core.config.builder import WorkflowConfig
+    from ..core.config.server import ServerConfig
+    from ..core.expected.types import ExpectedService
+
+
+_INGEST_USER = "om_user"
+_INGEST_PASSWORD = "om_password"
+_TARGET_SCHEMA = "e2e"
+_MYSQL_IMAGE = "mysql:8.0"
+
+_ENV_VARS = (
+    "E2E_MYSQL_USER",
+    "E2E_MYSQL_PASSWORD",
+    "E2E_MYSQL_HOST_PORT",
+    "E2E_MYSQL_ADMIN_USER",
+    "E2E_MYSQL_ADMIN_PASSWORD",
+    "E2E_MYSQL_DATABASE",
+)
+
+
+@pytest.fixture(scope="session")
+def mysql_container() -> Generator[MySqlContainer, None, None]:
+    """Boot a dedicated MySQL via testcontainers and bootstrap the OM-doc users.
+
+    Creates ``e2e`` and a scoped ``om_user`` whose GRANTs match the minimum
+    OM MySQL connector documentation (SELECT, SHOW VIEW, EXECUTE on the
+    target schema; PROCESS globally for connection-test; SHOW_ROUTINE
+    globally so stored-procedure bodies are readable). Also populates
+    ``E2E_MYSQL_*`` environment variables for the rest of the session so
+    the existing ``Env(key).ref()`` YAML pattern is preserved unchanged.
+    """
+    container = MySqlContainer(_MYSQL_IMAGE)
+    with container as running:
+        host = running.get_container_host_ip()
+        port = running.get_exposed_port(3306)
+        # MySqlContainer wires MYSQL_ROOT_PASSWORD to the same value as
+        # the user password, so `running.password` IS the root password.
+        root_url = f"mysql+pymysql://root:{running.password}@{host}:{port}/"
+        engine = create_engine(root_url)
+        try:
+            with engine.begin() as conn:
+                conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {_TARGET_SCHEMA}"))
+                conn.execute(text(f"CREATE USER IF NOT EXISTS '{_INGEST_USER}'@'%' IDENTIFIED BY '{_INGEST_PASSWORD}'"))
+                conn.execute(text(f"GRANT SELECT, SHOW VIEW, EXECUTE ON {_TARGET_SCHEMA}.* TO '{_INGEST_USER}'@'%'"))
+                conn.execute(text(f"GRANT PROCESS, SHOW_ROUTINE ON *.* TO '{_INGEST_USER}'@'%'"))
+                conn.execute(text("FLUSH PRIVILEGES"))
+        finally:
+            engine.dispose()
+
+        # Populate Env-readable vars from the running container so neither
+        # connector.py (Env(...).ref()) nor baseline.py:get_admin_engine
+        # (Env(...).get()) needs to know about testcontainers.
+        previous: dict[str, str | None] = {var: os.environ.get(var) for var in _ENV_VARS}
+        os.environ["E2E_MYSQL_USER"] = _INGEST_USER
+        os.environ["E2E_MYSQL_PASSWORD"] = _INGEST_PASSWORD
+        os.environ["E2E_MYSQL_HOST_PORT"] = f"{host}:{port}"
+        os.environ["E2E_MYSQL_ADMIN_USER"] = "root"
+        os.environ["E2E_MYSQL_ADMIN_PASSWORD"] = running.password
+        os.environ["E2E_MYSQL_DATABASE"] = _TARGET_SCHEMA
+        try:
+            yield running
+        finally:
+            for var, prev in previous.items():
+                if prev is None:
+                    os.environ.pop(var, None)
+                else:
+                    os.environ[var] = prev
+            # Clear the lru_cache'd engine so a second pytest run in the
+            # same Python process rebuilds against a freshly booted
+            # container instead of reusing a stale URL.
+            get_admin_engine.cache_clear()
+            get_policy.cache_clear()
+
+
+@pytest.fixture(scope="session")
+def mysql_service(session_uuid: str) -> str:
+    """Session-shared MySQL service name (``e2e_mysql_<uuid>``).
+
+    Eliminates ``service = mysql_service_name(session_uuid)`` from every
+    test body. Filter tests still build their own variant-named services
+    via ``mysql_service_name(session_uuid, variant=...)`` directly — this
+    fixture is only the default, session-shared name.
+    """
+    return mysql_service_name(session_uuid)
+
+
+@pytest.fixture(scope="module")
+def mysql_expected_factory(
+    mysql_service: str,
+) -> Callable[..., ExpectedService]:
+    """Factory for ExpectedService trees bound to the session's service name.
+
+    Usage: ``mysql_expected_factory()`` returns the full expected catalog;
+    ``mysql_expected_factory(tables=[...])`` returns a projection (used by
+    filter tests to pass a pre-built expected tree into the differ).
+    """
+
+    def _factory(*, tables: list[str] | None = None) -> ExpectedService:
+        return mysql_expected(mysql_service, tables=tables)
+
+    return _factory
+
+
+@pytest.fixture(scope="session")
+def mysql_admin_engine(mysql_container: MySqlContainer) -> Engine:
+    """Admin-credentials SQLAlchemy engine for tests that need to mutate
+    the source out-of-band (drop a baseline table to test mark-deleted,
+    create a poisoned view to test error containment, etc.).
+
+    Shares the cached engine that ``get_policy()`` builds — single DSN
+    source of truth — and depends on ``mysql_container`` so the env vars
+    feeding ``get_admin_engine`` are populated before first use.
+    """
+    return get_admin_engine()
+
+
+@pytest.fixture(scope="session")
+def mysql_source_ready(mysql_container: MySqlContainer) -> None:
+    """Reconcile MySQL source with MYSQL_BASELINE once per pytest session.
+
+    Fires before any MySQL test runs because ``mysql_cfg`` (and test-local
+    variant configs) declare this as a dependency. Depends on
+    ``mysql_container`` so admin creds + schema exist before the enforcer
+    runs CREATE TABLE.
+    """
+    run_source_baseline(get_policy, MYSQL_BASELINE, connector_name="mysql")
+
+
+@pytest.fixture(scope="module")
+def mysql_cfg(
+    om_server_config: ServerConfig,
+    mysql_service: str,
+    mysql_source_ready: None,
+) -> WorkflowConfig:
+    """Default module-scoped MySQL config, using the session-shared service name.
+
+    For tests that can share service state across the module (vanilla ingest,
+    profiler — both operate on the same ingested entities). Filter tests
+    should build their own variant-named config via build_mysql_config rather
+    than relying on this shared fixture.
+    """
+    return build_mysql_config(mysql_service, om_server_config)
+
+
+@pytest.fixture(scope="module")
+def mysql_metadata_ingested(
+    tmp_path_factory: pytest.TempPathFactory,
+    mysql_cfg: WorkflowConfig,
+    mysql_service: str,
+    registered_services: list[str],
+) -> None:
+    """Run the MySQL metadata CLI once per module against the shared service.
+
+    Cuts ~6 redundant CLI subprocess runs per module pass. Tests that just
+    need entities ingested (profiler, lineage, classification, structural,
+    stored-procedure, descriptions) depend on this fixture instead of
+    invoking their own metadata ingest.
+    """
+    metadata_ingest_once(
+        tmp_path_factory,
+        mysql_cfg,
+        registered_services,
+        service_name=mysql_service,
+        pipeline_options=MetadataPipeline(
+            includeStoredProcedures=True,
+            includeDDL=True,  # parses view definitions for view->table lineage
+        ),
+        filter_kwargs={"schemas_include": ["e2e"]},
+        label="mysql",
+    )
diff --git a/ingestion/tests/cli_e2e_v2/mysql/connector.py b/ingestion/tests/cli_e2e_v2/mysql/connector.py
new file mode 100644
index 000000000000..0026a8ddbf16
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/mysql/connector.py
@@ -0,0 +1,67 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Helpers for building MySQL WorkflowConfigs and deriving service names.
+
+Split out from conftest.py because pytest discourages importing from
+conftest modules; filter tests need build_mysql_config to construct
+variant-named services for isolation.
+
+Secrets handling: every env-backed YAML field uses Env(key).ref() — the
+rendered cfg_*.yaml carries ${E2E_MYSQL_*} literal references, not real
+credentials. Env's construction validates presence (raises EnvLoadError
+at build time if a required var is unset). The metadata CLI expands the
+references at subprocess load time via os.path.expandvars.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ..core.config.builder import WorkflowConfig
+from ..core.config.env import Env
+
+if TYPE_CHECKING:
+    from ..core.config.server import ServerConfig
+
+
+def mysql_service_name(session_uuid: str, variant: str = "") -> str:
+    """Build the MySQL service name for a given pytest session and optional variant.
+
+    Default variant "" returns the session-shared service (used by tests that
+    accept shared state across the test module, e.g. vanilla ingest + profiler).
+
+    A non-empty variant (e.g., "filter_inc") produces a sibling service
+    (e.g., e2e_mysql_abc123_filter_inc) — filter tests use this for isolation
+    so prior-test residue doesn't pollute "extras" assertions.
+    """
+    base = f"e2e_mysql_{session_uuid}"
+    return f"{base}_{variant}" if variant else base
+
+
+def build_mysql_config(service_name: str, server: ServerConfig) -> WorkflowConfig:
+    """Build a base MySQL WorkflowConfig with the given service name.
+
+    All env-backed fields emit ${E2E_MYSQL_*} references. Presence validation
+    happens in Env's constructor; missing required vars raise EnvLoadError at
+    build time with a clear message. Real values never enter the dict.
+
+    E2E_MYSQL_DATABASE is optional — instance constructs without raising;
+    the field is added to the config only when the env var is actually set.
+    """
+    service_connection: dict = {
+        "type": "Mysql",
+        "username": Env("E2E_MYSQL_USER").ref(),
+        "authType": {"password": Env("E2E_MYSQL_PASSWORD").ref()},
+        "hostPort": Env("E2E_MYSQL_HOST_PORT").ref(),
+    }
+    db = Env("E2E_MYSQL_DATABASE", required=False)
+    if db.get():
+        service_connection["databaseSchema"] = db.ref()
+
+    return WorkflowConfig.build(
+        source_type="mysql",
+        service_name=service_name,
+        service_connection=service_connection,
+        server=server,
+    )
diff --git a/ingestion/tests/cli_e2e_v2/mysql/enforcer.py b/ingestion/tests/cli_e2e_v2/mysql/enforcer.py
new file mode 100644
index 000000000000..e77d07a5647e
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/mysql/enforcer.py
@@ -0,0 +1,45 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""MySQL dialect specifics for SqlBaselineEnforcer.
+
+Introspection + table DDL (CREATE TABLE + FK + COMMENT) live in the base
+via SQLAlchemy Inspector + `metadata.create_all`. This subclass supplies
+only what Core doesn't model:
+  - stored-procedure listing (`INFORMATION_SCHEMA.ROUTINES`)
+  - DROP + CREATE for procedures (MySQL has no CREATE OR REPLACE PROCEDURE)
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from sqlalchemy import create_engine, text
+
+from ..core.source.sql_enforcer import SqlBaselineEnforcer
+
+if TYPE_CHECKING:
+    from sqlalchemy.engine import URL, Connection
+
+    from ..core.source.sql import SqlSourceBaseline, StoredProcedureDefinition
+
+logger = logging.getLogger(__name__)
+
+
+class MySqlEnforcer(SqlBaselineEnforcer):
+    _stored_procedure_query_sql = (
+        "SELECT ROUTINE_SCHEMA, ROUTINE_NAME "
+        "FROM INFORMATION_SCHEMA.ROUTINES "
+        "WHERE ROUTINE_SCHEMA IN :schemas AND ROUTINE_TYPE = 'PROCEDURE'"
+    )
+
+    @classmethod
+    def from_url(cls, url: str | URL, baseline: SqlSourceBaseline) -> MySqlEnforcer:
+        """Construct with a SQLAlchemy engine built from a connection URL."""
+        return cls(create_engine(url), baseline)
+
+    def _apply_stored_procedure(self, conn: Connection, sp: StoredProcedureDefinition) -> None:
+        logger.debug("[mysql] DROP+CREATE PROCEDURE %s.%s", sp.schema, sp.name)
+        conn.execute(text(f"DROP PROCEDURE IF EXISTS {sp.schema}.{sp.name}"))
+        conn.execute(text(sp.definition_sql))
diff --git a/ingestion/tests/cli_e2e_v2/mysql/expected.py b/ingestion/tests/cli_e2e_v2/mysql/expected.py
new file mode 100644
index 000000000000..bcb5845bed7d
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/mysql/expected.py
@@ -0,0 +1,129 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""Expected OM-side catalog for the MySQL baseline.
+
+Derived from `MYSQL_BASELINE.metadata` via `MYSQL_TYPE_MAP`. The hand-authored
+column lists disappear — column types, descriptions, constraints, and
+primary keys all come off the SQLAlchemy Column declarations in
+`core/source/common_baseline.py` + `mysql/baseline.py`.
+
+Views aren't in MetaData (they're raw SQL), so the view's ExpectedTable is
+appended manually. Stored procedures are passed in as a hand-authored list
+to `derive_expected_service`.
+
+Entries in `MYSQL_TYPE_MAP` that may need adjustment after Task 25's live
+run are marked inline.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Boolean
+from sqlalchemy.dialects import mysql
+
+from metadata.generated.schema.entity.data.table import DataType
+from metadata.generated.schema.entity.services.databaseService import (
+    DatabaseServiceType,
+)
+
+from ..core.expected.derive import derive_expected_service
+from ..core.expected.type_map import CORE_TYPE_MAP, TypeMap
+from ..core.expected.types import (
+    ExpectedColumn,
+    ExpectedService,
+    ExpectedStoredProcedure,
+    ExpectedTable,
+)
+from .baseline import MYSQL_BASELINE
+
+# -----------------------------------------------------------------------------
+# MYSQL_TYPE_MAP — extends CORE with MySQL dialect classes + Boolean override.
+# Entries flagged TASK25 may need correction after the first live ingest.
+# -----------------------------------------------------------------------------
+
+MYSQL_TYPE_MAP: TypeMap = {
+    **CORE_TYPE_MAP,
+    # Core overrides (dialect behaves differently than the generic mapping).
+    Boolean: DataType.TINYINT,  # MySQL stores BOOL as TINYINT(1); TASK25
+    # Integer variants — MRO walks through Integer first, so we must
+    # override before it resolves to DataType.INT.
+    mysql.TINYINT: DataType.TINYINT,
+    mysql.MEDIUMINT: DataType.INT,  # no MEDIUMINT in OM DataType
+    # Float variants — same reasoning; mysql.DOUBLE extends Float.
+    mysql.DOUBLE: DataType.DOUBLE,
+    # String-family size variants — mysql.MEDIUMTEXT / LONGTEXT / TINYTEXT
+    # extend `_StringType`, which MRO-walks to String (not Text), so CORE's
+    # `String → VARCHAR` would give the wrong answer without these entries.
+    mysql.TINYTEXT: DataType.TEXT,  # no TINYTEXT in OM DataType
+    mysql.MEDIUMTEXT: DataType.MEDIUMTEXT,
+    mysql.LONGTEXT: DataType.TEXT,  # LONGTEXT absent from enum; TASK25
+    # Binary-family — mysql.BINARY / VARBINARY / *BLOB extend `_Binary`,
+    # which MRO skips past `LargeBinary`, so CORE's `LargeBinary → BLOB`
+    # doesn't help the binary/varbinary/tiny/medium/long variants.
+    mysql.BINARY: DataType.BINARY,
+    mysql.VARBINARY: DataType.VARBINARY,
+    mysql.TINYBLOB: DataType.BLOB,  # no TINYBLOB in OM DataType
+    mysql.MEDIUMBLOB: DataType.MEDIUMBLOB,
+    mysql.LONGBLOB: DataType.LONGBLOB,
+    # Dialect-only types with no generic SQLAlchemy parent in CORE.
+    mysql.YEAR: DataType.YEAR,
+    mysql.BIT: DataType.BIT,
+    mysql.SET: DataType.SET,
+    # NOTE: mysql.JSON / mysql.ENUM / mysql.BLOB / mysql.TIMESTAMP are
+    # resolved via CORE_TYPE_MAP through the MRO walk (see type_map.py).
+    # mysql.VARCHAR / mysql.CHAR / mysql.TEXT likewise — no entries needed.
+}
+
+
+def mysql_expected(
+    service_name: str,
+    *,
+    tables: list[str] | None = None,
+) -> ExpectedService:
+    """Build the expected MySQL catalog for a given service name.
+
+    Structural portion (tables + columns + types + PKs + comments) is
+    derived from `MYSQL_BASELINE.metadata`. The view and stored procedure
+    are appended since neither lives in MetaData.
+
+    `tables=None` -> full catalog. `tables=[...]` -> only the named tables
+    survive (used by filter tests with MatchMode.STRICT).
+    """
+    expected = derive_expected_service(
+        service_name=service_name,
+        service_type=DatabaseServiceType.Mysql,
+        metadata=MYSQL_BASELINE.metadata,
+        type_map=MYSQL_TYPE_MAP,
+        database="default",
+        views=[_expected_customer_txn_summary_view()],
+        stored_procedures=[
+            ExpectedStoredProcedure(name="sp_active_customer_count"),
+            ExpectedStoredProcedure(name="sp_update_customer_status"),
+        ],
+    )
+
+    if tables is not None:
+        kept = set(tables)
+        schema = expected.databases[0].schemas[0]
+        schema.tables[:] = [t for t in schema.tables if t.name in kept]
+
+    return expected
+
+
+def _expected_customer_txn_summary_view() -> ExpectedTable:
+    """View treated as Table entity (OM uses tableType=View).
+
+    View columns are declared manually since the view body is raw SQL and
+    not in our SQLAlchemy MetaData. MySQL's `COUNT(*)` returns BIGINT;
+    `COALESCE(SUM(DECIMAL), 0)` returns DECIMAL.
+    """
+    return ExpectedTable(
+        name="customer_txn_summary",
+        columns=[
+            ExpectedColumn("customer_id", DataType.INT),
+            ExpectedColumn("full_name", DataType.VARCHAR),
+            ExpectedColumn("customer_status", DataType.VARCHAR),
+            ExpectedColumn("txn_count", DataType.BIGINT),
+            ExpectedColumn("total_amount", DataType.DECIMAL),
+        ],
+    )
diff --git a/ingestion/tests/cli_e2e_v2/mysql/test_mysql.py b/ingestion/tests/cli_e2e_v2/mysql/test_mysql.py
new file mode 100644
index 000000000000..c59065820e75
--- /dev/null
+++ b/ingestion/tests/cli_e2e_v2/mysql/test_mysql.py
@@ -0,0 +1,530 @@
+#  Copyright 2026 Collate
+#  Licensed under the Collate Community License, Version 1.0 (the "License");
+#  you may not use this file except in compliance with the License.
+"""MySQL pilot — CLI E2E v2 tests.
+
+Exercises the v2 framework end-to-end against a MySQL source. Covers all
+the pipelines the MVP ships (metadata, profiler, auto-classification, and
+view lineage via SQL parsing; DQ deferred to post-MVP) plus four filter
+scenarios, FK/description coverage, mark-deleted re-ingest, error
+containment, and column-level lineage.
+
+Lineage note: MySQL FK constraints produce TableConstraint entries on the
+table entity, not lineage edges (see `project-mysql-fk-no-lineage.md`).
+The only real lineage MySQL surfaces is view-to-table lineage derived from
+parsing the view definition SQL. The FK assertion targets
+`tableConstraints`, not upstream edges.
+
+Module-scoped `mysql_metadata_ingested` runs the metadata CLI once for
+tests that consume the shared service — profiler, lineage,
+classification, structural, description, FK. That fixture also registers
+the service name for session-end cleanup.
+
+Tests that mutate the source state (mark-deleted, error containment) use
+their own isolated services and clean up after themselves so they
+don't perturb the shared fixture.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from sqlalchemy import text
+
+from metadata.generated.schema.configuration.profilerConfiguration import MetricType
+
+from ..core.config.pipelines import (
+    AutoClassificationPipeline,
+    LineagePipeline,
+    MetadataPipeline,
+    ProfilerPipeline,
+)
+from ..core.expected.differ import MatchMode, assert_service_matches
+from ..core.filter_scenarios import (
+    COMMON_FILTER_SCENARIOS,
+    FilterScenario,
+    expected_tables_for,
+)
+from .connector import build_mysql_config, mysql_service_name
+from .expected import mysql_expected
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from sqlalchemy.engine import Engine
+
+    from ..core.config.builder import WorkflowConfig
+    from ..core.config.server import ServerConfig
+    from ..core.expected.types import ExpectedService
+    from ..core.fluent.om_client import OmClient
+    from ..core.runner.cli_runner import CliRunner
+
+# ---------------------------------------------------------------------------
+# Structural (metadata pipeline) — full Expected* tree walk
+# ---------------------------------------------------------------------------
+
+
+def test_vanilla_ingest_structural(
+    om_client: OmClient,
+    mysql_expected_factory: Callable[..., ExpectedService],
+    # `mysql_metadata_ingested: None` is a pytest idiom: declaring the
+    # fixture as a parameter triggers its setup side-effect (here, running
+    # the metadata CLI once per module). The value itself is always None.
+    # Every test below that asserts on OM state does the same.
+    mysql_metadata_ingested: None,
+) -> None:
+    """Metadata ingest produces the declared OM catalog (SUPERSET).
+
+    Walks the full Expected* tree — table structure, every column's
+    DataType, constraints, descriptions, stored procedures. Subsumes
+    per-column type spot-checks and per-entity count smoke tests, so we
+    don't repeat those at the test-function level.
+    """
+    assert_service_matches(mysql_expected_factory(), om_client)
+
+
+# ---------------------------------------------------------------------------
+# Profiler — exhaustive metric coverage on representative columns
+# ---------------------------------------------------------------------------
+
+
+# Explicit "compute all stat-type metrics" list. Default profiler
+# metrics (`get_default_metrics` in OM) omit minLength/maxLength so
+# string-length stats come back None — passing this list overrides the
+# default. We exclude parameterized metrics (countInSet, *LikeCount,
+# regexCount, etc.) that need user-supplied values; they're applicable
+# for DQ-style checks, not the "compute everything we can off raw
+# rows" coverage this test wants.
+_ALL_PROFILER_METRICS: list[MetricType] = [
+    # Table-level
+    MetricType.rowCount,
+    MetricType.columnCount,
+    MetricType.columnNames,
+    # Column counts / proportions
+    MetricType.valuesCount,
+    MetricType.nullCount,
+    MetricType.nullProportion,
+    MetricType.distinctCount,
+    MetricType.distinctProportion,
+    MetricType.uniqueCount,
+    MetricType.uniqueProportion,
+    MetricType.duplicateCount,
+    # Numeric stats
+    MetricType.min,
+    MetricType.max,
+    MetricType.mean,
+    MetricType.sum,
+    MetricType.stddev,
+    MetricType.median,
+    MetricType.firstQuartile,
+    MetricType.thirdQuartile,
+    MetricType.interQuartileRange,
+    MetricType.nonParametricSkew,
+    MetricType.histogram,
+    # String stats
+    MetricType.minLength,
+    MetricType.maxLength,
+]
+
+
+def test_profiler_metrics(
+    cli_runner: CliRunner,
+    om_client: OmClient,
+    mysql_cfg: WorkflowConfig,
+    mysql_service: str,
+    mysql_metadata_ingested: None,
+) -> None:
+    """Profiler emits the full metric suite — table-level + per-column.
+
+    Lean: ONE pipeline run, multiple assertions off the produced state.
+    Exhaustive: covers numeric + string + count metric paths in one pass.
+
+    Scope:
+      - Table-level rowCount on three seeded tables.
+      - Numeric metrics on `customers.credit_score` (deterministic ints
+        720, 680, 650, 750, 600 → min=600, max=750, mean=680, sum=3400,
+        median=680, distinct=5, unique=5, null=0).
+      - String length metrics on `customers.first_name` (5 values, min
+        length 3 ("Bob"/"Eve"), max length 7 ("Charlie")).
+
+    Pipeline runs with `metrics=_ALL_PROFILER_METRICS` so OM doesn't
+    fall through to the default-set which omits minLength/maxLength.
+    """
+    status = cli_runner.run(
+        mysql_cfg.pipeline(ProfilerPipeline(metrics=_ALL_PROFILER_METRICS)).with_filter(schemas_include=["e2e"])
+    )
+    assert status.success, f"profiler failures: {status.all_failures}"
+
+    customers_fqn = f"{mysql_service}.default.e2e.customers"
+    transactions_fqn = f"{mysql_service}.default.e2e.transactions"
+    all_types_fqn = f"{mysql_service}.default.e2e.all_types"
+
+    # Table-level row counts (seeded determinism).
+    om_client.table(customers_fqn).profile.eventually().row_count().equals(5)
+    om_client.table(transactions_fqn).profile.eventually().row_count().equals(5)
+    om_client.table(all_types_fqn).profile.eventually().row_count().equals(3)
+
+    # Numeric column — credit_score sorted: [600, 650, 680, 720, 750].
+    # min=600, max=750, mean=680, sum=3400, distinct=5, unique=5, null=0,
+    # median=680 (textbook middle of 5-element sample).
+    om_client.table(customers_fqn).profile.eventually().column("credit_score").has_metrics(
+        valuesCount=5,
+        nullCount=0,
+        distinctCount=5,
+        uniqueCount=5,
+        min=600,
+        max=750,
+        mean=680,
+        sum=3400,
+        median=680,
+    )
+
+    # String column — first_name: Alice(5), Bob(3), Charlie(7), Diana(5), Eve(3).
+    om_client.table(customers_fqn).profile.eventually().column("first_name").has_metrics(
+        valuesCount=5,
+        nullCount=0,
+        minLength=3,
+        maxLength=7,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Stored procedures — body content (presence covered by structural walk)
+# ---------------------------------------------------------------------------
+
+
+def test_stored_procedure_bodies(
+    om_client: OmClient,
+    mysql_service: str,
+    mysql_metadata_ingested: None,
+) -> None:
+    """Both SP bodies survive ingestion intact.
+
+    Existence of each SP is asserted by the structural walk via
+    `ExpectedStoredProcedure` entries; this test adds the body-content
+    coverage that the structural walk doesn't do (and exercises the
+    parameterized-SP code path on `sp_update_customer_status`).
+    """
+    base = f"{mysql_service}.default.e2e"
+
+    om_client.stored_procedure(f"{base}.sp_active_customer_count").has_code_containing("SELECT COUNT(*)")
+
+    # Parameterized SP with DML body — different code path than the
+    # parameterless SELECT-only procedure above.
+    sp_update = om_client.stored_procedure(f"{base}.sp_update_customer_status")
+    sp_update.has_code_containing("p_customer_id")
+    sp_update.has_code_containing("UPDATE")
+
+
+# ---------------------------------------------------------------------------
+# Lineage — table-level + column-level + schemaDefinition
+# ---------------------------------------------------------------------------
+
+
+def test_lineage_view_references_tables(
+    cli_runner: CliRunner,
+    om_client: OmClient,
+    mysql_cfg: WorkflowConfig,
+    mysql_service: str,
+    mysql_metadata_ingested: None,
+) -> None:
+    """View → base-table lineage (table-level + column-level) and view DDL.
+
+    The view's `schemaDefinition` is the prerequisite for OM's SQL parser
+    to produce lineage at all — assert it's present BEFORE asserting the
+    parsed edges, so a "DDL didn't plumb through" regression points at
+    the right root cause instead of looking like a parser bug.
+    """
+    view_fqn = f"{mysql_service}.default.e2e.customer_txn_summary"
+    customers_fqn = f"{mysql_service}.default.e2e.customers"
+    transactions_fqn = f"{mysql_service}.default.e2e.transactions"
+
+    # Prereq: includeDDL=True actually plumbed the CREATE VIEW body into OM.
+    om_client.table(view_fqn).has_schema_definition_containing("LEFT JOIN")
+
+    status = cli_runner.run(
+        mysql_cfg.pipeline(
+            # processQueryLineage defaults True and needs SELECT on
+            # mysql.general_log (the slow-query table) — a privilege the
+            # scoped ingest user deliberately doesn't hold. View lineage
+            # is what we care about; disable the query-log path.
+            LineagePipeline(processQueryLineage=False)
+        ).with_filter(schemas_include=["e2e"])
+    )
+    assert status.success, f"lineage failures: {status.all_failures}"
+
+    # Table-level lineage edges.
+    om_client.table(view_fqn).lineage.eventually().has_upstream(customers_fqn)
+    om_client.table(view_fqn).lineage.eventually().has_upstream(transactions_fqn)
+
+    # Column-level lineage — proves the SQL parser actually parsed,
+    # not just that "some lineage edge was emitted" via a fallback.
+    # `customer_id` is `c.id AS customer_id` (identity); `total_amount`
+    # is `COALESCE(SUM(t.amount), 0)` (aggregate over transactions.amount).
+    om_client.table(view_fqn).lineage.eventually().has_column_lineage(source="customers.id", target="customer_id")
+    om_client.table(view_fqn).lineage.eventually().has_column_lineage(
+        source="transactions.amount", target="total_amount"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Foreign key TableConstraint (no lineage edge for MySQL)
+# ---------------------------------------------------------------------------
+
+
+def test_transactions_foreign_key_constraint(
+    om_client: OmClient,
+    mysql_service: str,
+    mysql_metadata_ingested: None,
+) -> None:
+    """FK on transactions.customer_id -> customers.id lands as TableConstraint.
+
+    Uses eventually because OM processes FK constraints as a post-ingest
+    PATCH (connector iterates tables, defers FK when referenced table isn't
+    yet in OM, then patches at end).
+    """
+    transactions_fqn = f"{mysql_service}.default.e2e.transactions"
+    om_client.table(transactions_fqn).eventually(60).has_foreign_key_constraint(
+        column="customer_id",
+        referenced_table="customers",
+        referenced_column="id",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Auto-classification (PII via column-name regex) + negative assertion
+# ---------------------------------------------------------------------------
+
+
+def test_auto_classification_tags_pii_columns(
+    cli_runner: CliRunner,
+    om_client: OmClient,
+    mysql_cfg: WorkflowConfig,
+    mysql_service: str,
+    mysql_metadata_ingested: None,
+) -> None:
+    """Auto-classification tags PII columns AND leaves non-PII columns alone.
+
+    Positive: `email` and `date_of_birth` get the expected PII tags.
+    Negative: `id` and `status` stay untagged — guards against a
+    "classifier became trigger-happy" regression that would silently
+    pass a positive-only suite.
+    """
+    status = cli_runner.run(
+        mysql_cfg.pipeline(
+            AutoClassificationPipeline(
+                storeSampleData=True,
+                enableAutoClassification=True,
+                # Lowered from default 80; with only 5 seed rows per
+                # column the combined score sits at the edge of 80%.
+                # 60 aligns with PII's server-side `minimumConfidence`.
+                confidence=60,
+            )
+        ).with_filter(schemas_include=["e2e"])
+    )
+    assert status.success, f"auto-classification failures: {status.all_failures}"
+
+    customers_fqn = f"{mysql_service}.default.e2e.customers"
+
+    # Positive — deterministic regex-based recognizers.
+    om_client.table(customers_fqn).column("email").has_tag("PII.Sensitive")
+    om_client.table(customers_fqn).column("date_of_birth").has_tag("PII.NonSensitive")
+
+    # Negative — primary key and status enum should never be PII-flagged.
+    # Catches regressions where the classifier becomes overconfident on
+    # column-name matching across non-PII columns.
+    om_client.table(customers_fqn).column("id").has_no_tag("PII.Sensitive")
+    om_client.table(customers_fqn).column("id").has_no_tag("PII.NonSensitive")
+    om_client.table(customers_fqn).column("status").has_no_tag("PII.Sensitive")
+    om_client.table(customers_fqn).column("status").has_no_tag("PII.NonSensitive")
+
+
+# ---------------------------------------------------------------------------
+# Mark-deleted on re-ingest
+# ---------------------------------------------------------------------------
+
+
+def test_mark_deleted_tables_on_reingest(
+    cli_runner: CliRunner,
+    om_client: OmClient,
+    om_server_config: ServerConfig,
+    session_uuid: str,
+    registered_services: list[str],
+    mysql_admin_engine: Engine,
+    mysql_source_ready: None,
+) -> None:
+    """Dropping a source table + re-ingesting marks the OM entity deleted.
+
+    Lifecycle, end-to-end:
+      1. Ingest baseline → all_types present in OM, deleted=False.
+      2. Drop e2e.all_types via admin engine (out-of-band of the framework).
+      3. Re-ingest with markDeletedTables=True (fixture default).
+      4. Assert all_types now has deleted=True in OM.
+      5. Restore e2e.all_types via the baseline policy (apply re-runs full
+         baseline DDL + seeds — idempotent CREATE IF NOT EXISTS path).
+
+    Uses an isolated service so the shared `mysql_metadata_ingested`
+    fixture's catalog is untouched.
+    """
+    service = mysql_service_name(session_uuid, variant="mark_deleted")
+    registered_services.append(service)
+    cfg = build_mysql_config(service, om_server_config)
+    pipeline_options = MetadataPipeline(
+        markDeletedTables=True,
+        includeStoredProcedures=False,  # not needed for this test; cuts run time
+    )
+
+    all_types_fqn = f"{service}.default.e2e.all_types"
+
+    # Phase 1: initial ingest — all_types present, alive.
+    status = cli_runner.run(cfg.pipeline(pipeline_options).with_filter(schemas_include=["e2e"]))
+    assert status.success, f"initial ingest: {status.all_failures}"
+    om_client.table(all_types_fqn).is_not_deleted()
+
+    # Phase 2: drop the source table — out-of-band mutation via admin engine.
+    with mysql_admin_engine.begin() as conn:
+        conn.execute(text("DROP TABLE e2e.all_types"))
+
+    try:
+        # Phase 3: re-ingest — markDeletedTables flips the entity to deleted=True.
+        status = cli_runner.run(cfg.pipeline(pipeline_options).with_filter(schemas_include=["e2e"]))
+        assert status.success, f"re-ingest after drop: {status.all_failures}"
+
+        # Phase 4: verify the soft-delete landed on the OM entity.
+        om_client.table(all_types_fqn).eventually(30).is_soft_deleted()
+    finally:
+        # Phase 5: restore the source so subsequent test sessions start
+        # from a clean baseline. The policy's apply() is idempotent —
+        # CREATE TABLE IF NOT EXISTS + the seed insert template's
+        # ON DUPLICATE KEY UPDATE handle the re-create path.
+        from .baseline import get_policy
+
+        get_policy().enforcer.apply([])
+
+
+# ---------------------------------------------------------------------------
+# Error containment — one broken view doesn't tank the rest of ingest
+# ---------------------------------------------------------------------------
+
+
+def test_error_containment_one_broken_view(
+    cli_runner: CliRunner,
+    om_client: OmClient,
+    om_server_config: ServerConfig,
+    session_uuid: str,
+    registered_services: list[str],
+    mysql_admin_engine: Engine,
+    mysql_source_ready: None,
+) -> None:
+    """A broken view doesn't abort the whole metadata pipeline.
+
+    Setup: create a helper table, create a view referencing one of its
+    columns, then DROP that column. The view becomes "invalid" — MySQL
+    blocks DESCRIBE on it but SHOW CREATE VIEW still works. OM's
+    metadata ingestion should:
+      - successfully ingest customers, transactions, all_types
+      - log an error / fail on the broken view
+      - NOT crash the whole workflow
+
+    Uses an isolated service so the broken view doesn't pollute other
+    tests' OM state. Cleans up the source-side artefacts in `finally`.
+    """
+    service = mysql_service_name(session_uuid, variant="error_containment")
+    registered_services.append(service)
+    cfg = build_mysql_config(service, om_server_config)
+
+    # Phase 1: synthesize a broken view.
+    with mysql_admin_engine.begin() as conn:
+        conn.execute(
+            text("CREATE TABLE IF NOT EXISTS e2e._helper_for_broken_view (id INT PRIMARY KEY, doomed_col INT)")
+        )
+        conn.execute(
+            text("CREATE OR REPLACE VIEW e2e._broken_view AS SELECT id, doomed_col FROM e2e._helper_for_broken_view")
+        )
+        conn.execute(text("ALTER TABLE e2e._helper_for_broken_view DROP COLUMN doomed_col"))
+        # _broken_view now references a non-existent column — DESCRIBE fails.
+
+    try:
+        # Phase 2: run metadata ingest. Don't assert success — the broken
+        # view is expected to surface as a step error. Run the pipeline
+        # and inspect what landed.
+        try:
+            status = cli_runner.run(
+                cfg.pipeline(MetadataPipeline(includeStoredProcedures=False)).with_filter(schemas_include=["e2e"])
+            )
+        except Exception:
+            status = None
+
+        # Phase 3: regardless of overall status, the unaffected baseline
+        # tables must be in OM. That's the whole point of "error
+        # containment" — one bad apple doesn't drop the rest.
+        for table in ("customers", "transactions", "all_types"):
+            om_client.table(f"{service}.default.e2e.{table}").eventually(30).exists()
+
+        # Phase 4: optionally check the broken view was either (a) reported
+        # as a failure in the status JSON or (b) ingested with no columns.
+        # We accept either outcome — the key invariant is that the rest
+        # of the catalog made it.
+        if status is not None and status.all_failures:
+            failure_text = " ".join(str(f.get("error", "")) for f in status.all_failures).lower()
+            assert "_broken_view" in failure_text or "doomed_col" in failure_text or "invalid" in failure_text, (
+                f"broken view didn't surface in failures: {status.all_failures}"
+            )
+    finally:
+        # Cleanup: drop the synthetic objects.
+        with mysql_admin_engine.begin() as conn:
+            conn.execute(text("DROP VIEW IF EXISTS e2e._broken_view"))
+            conn.execute(text("DROP TABLE IF EXISTS e2e._helper_for_broken_view"))
+
+
+# ---------------------------------------------------------------------------
+# Filter scenarios — isolated services, STRICT mode catches "extras"
+# ---------------------------------------------------------------------------
+
+
+# Per-variant expected-tables for this connector's baseline. The common
+# baseline (customers, transactions) is present in every variant unless
+# excluded; dialect-specific tables (all_types) and the view
+# (customer_txn_summary) are MySQL-only and listed here.
+_EXPECTED_TABLES_BY_VARIANT: dict[str, list[str] | None] = {
+    "inc_exact": ["customers"],
+    "exc_exact": ["customers", "all_types", "customer_txn_summary"],
+    "sch_inc": None,  # None = full baseline
+    "regex_prio": ["customers"],
+}
+
+
+@pytest.mark.parametrize("scenario", COMMON_FILTER_SCENARIOS, ids=lambda s: s.id)
+def test_filter(
+    scenario: FilterScenario,
+    cli_runner: CliRunner,
+    om_client: OmClient,
+    om_server_config: ServerConfig,
+    session_uuid: str,
+    registered_services: list[str],
+    mysql_source_ready: None,
+) -> None:
+    """Filter patterns — include exact / exclude exact / schema include /
+    regex include+exclude with exclude priority.
+
+    Each variant builds an isolated service so STRICT-mode extras detection
+    doesn't cross-contaminate. Expected-tables for this connector's
+    baseline live in `_EXPECTED_TABLES_BY_VARIANT` above.
+    """
+    expected_tables = expected_tables_for(scenario, _EXPECTED_TABLES_BY_VARIANT, connector="mysql")
+
+    service = mysql_service_name(session_uuid, variant=f"filter_{scenario.variant}")
+    registered_services.append(service)
+
+    cfg = build_mysql_config(service, om_server_config)
+    status = cli_runner.run(
+        cfg.pipeline(MetadataPipeline(includeStoredProcedures=True)).with_filter(**scenario.filter_kwargs)
+    )
+    assert status.success, f"filter[{scenario.variant}] failures: {status.all_failures}"
+
+    assert_service_matches(
+        mysql_expected(service, tables=expected_tables),
+        om_client,
+        mode=MatchMode.STRICT,
+    )
diff --git a/ingestion/tests/unit/workflow/test_base_workflow.py b/ingestion/tests/unit/workflow/test_base_workflow.py
index 14d73fe6aec2..f58f247963fa 100644
--- a/ingestion/tests/unit/workflow/test_base_workflow.py
+++ b/ingestion/tests/unit/workflow/test_base_workflow.py
@@ -12,6 +12,7 @@
 Validate the logic and status handling of the base workflow
 """
 
+import json
 from typing import Iterable, Tuple  # noqa: UP035
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
@@ -107,6 +108,20 @@ def close(self) -> None:
         """Nothing to do"""
 
 
+class OkSink(Sink):
+    """Sink that never produces failures — every element succeeds."""
+
+    def _run(self, element: int) -> Either:
+        return Either(right=element)
+
+    @classmethod
+    def create(cls, _: dict, __: OpenMetadataConnection) -> "OkSink":
+        return cls()
+
+    def close(self) -> None:
+        """Nothing to do"""
+
+
 class SimpleWorkflow(IngestionWorkflow):
     """
     Simple Workflow for testing
@@ -118,6 +133,14 @@ def set_steps(self):
         self.steps: Tuple[Step] = (SimpleSink(),)  # noqa: UP006
 
 
+class OkWorkflow(IngestionWorkflow):
+    """Workflow wired to OkSink — produces zero failures."""
+
+    def set_steps(self):
+        self.source = SimpleSource()
+        self.steps: tuple[Step] = (OkSink(),)
+
+
 class BrokenWorkflow(IngestionWorkflow):
     """
     Simple Workflow for testing
@@ -235,3 +258,63 @@ def test_stop_still_runs_when_print_status_raises(self):
 
             mock_print_status.assert_called_once()
             mock_stop.assert_called_once()
+
+
+def test_write_status_file_writes_expected_shape(tmp_path):
+    workflow = OkWorkflow(config=config)
+    workflow.execute()
+
+    status_file = tmp_path / "status.json"
+    workflow.write_status_file(status_file)
+
+    assert status_file.exists()
+    payload = json.loads(status_file.read_text())
+
+    assert payload["pipeline_type"] == "simple"
+    assert payload["ingestion_pipeline_fqn"] is None
+    assert payload["success"] is True
+    assert isinstance(payload["steps"], list)
+    assert len(payload["steps"]) >= 2
+
+
+def test_write_status_file_reports_failure_shape_with_sink_errors(tmp_path):
+    workflow = SimpleWorkflow(config=config)
+    workflow.execute()
+
+    status_file = tmp_path / "status.json"
+    workflow.write_status_file(status_file)
+
+    payload = json.loads(status_file.read_text())
+
+    assert isinstance(payload["steps"], list)
+    assert len(payload["steps"]) >= 2
+    sink_steps_with_failures = [s for s in payload["steps"] if s.get("failures")]
+    assert len(sink_steps_with_failures) >= 1
+    # Sink has 1/5 failures (80%) which is below the default 90% threshold
+    assert payload["success"] is False
+
+
+def test_write_status_file_reports_failure_when_source_fails(tmp_path):
+    workflow = BrokenWorkflow(config=config)
+    workflow.execute()
+
+    status_file = tmp_path / "status.json"
+    workflow.write_status_file(status_file)
+
+    payload = json.loads(status_file.read_text())
+
+    # BrokenSource yields non-Either values → source failures → result_status FAILURE
+    assert payload["success"] is False
+
+
+def test_write_status_file_includes_ingestion_pipeline_fqn(tmp_path):
+    fqn_config = config.model_copy(update={"ingestionPipelineFQN": "test_service.test_pipeline"})
+    workflow = SimpleWorkflow(config=fqn_config)
+    workflow.execute()
+
+    status_file = tmp_path / "status.json"
+    workflow.write_status_file(status_file)
+
+    payload = json.loads(status_file.read_text())
+
+    assert payload["ingestion_pipeline_fqn"] == "test_service.test_pipeline"