open-metadata · ulixius9 · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
@@ -1,6 +1,6 @@
 """Base class for Databricks-based data diff implementations"""
 
-from typing import Optional, Union
+from typing import Any, Optional, Union
 
 from metadata.data_quality.validations.runtime_param_setter.base_diff_params_setter import (
     BaseTableParameter,
@@ -13,7 +13,7 @@ class DatabricksBaseTableParameter(BaseTableParameter):
     @classmethod
     def _get_service_connection_config(
         cls,
-        service_connection_config,
+        service_connection_config: Any,
     ) -> Optional[Union[str, dict]]:  # noqa: UP007, UP045
         """Build connection URL for Databricks-based connections"""
         if not service_connection_config:
@@ -25,8 +25,7 @@ def _get_service_connection_config(
             scheme = scheme.value
 
         host_port = getattr(service_connection_config, "hostPort", "localhost:443")
-        token = getattr(service_connection_config, "token", "")
-        token_value = token.get_secret_value() if hasattr(token, "get_secret_value") else str(token)
+        token_value = cls._extract_pat_token(service_connection_config)
 
         # Include httpPath if available (required for data_diff library)
         http_path = getattr(service_connection_config, "httpPath", "")
@@ -36,3 +35,28 @@ def _get_service_connection_config(
                 http_path = "/" + http_path
             return f"{scheme}://:{token_value}@{host_port}{http_path}"
         return f"{scheme}://:{token_value}@{host_port}"
+
+    @staticmethod
+    def _extract_pat_token(service_connection_config: Any) -> str:
+        """Extract the personal access token for URL-based data-diff auth.
+
+        DatabricksConnection / UnityCatalogConnection nest the token under
+        `authType.token` (PersonalAccessToken auth path). A legacy flat
+        `token` attribute is also honored for backwards compatibility.
+
+        Raises ValueError when no token is found instead of returning ""
+        — the empty-token URL silently falls back to OAuth U2M in the
+        Databricks SQL driver, which opens an interactive browser and hangs
+        non-interactive runs.
+        """
+        auth_type = getattr(service_connection_config, "authType", None)
+        token = getattr(auth_type, "token", None) if auth_type is not None else None
+        if token is None:
+            token = getattr(service_connection_config, "token", None)
+        if token is None:
+            raise ValueError(
+                "Databricks data diff requires Personal Access Token authentication; "
+                "no token found on the service connection. OAuth and Azure AD auth "
+                "types are not supported by the URL-based data-diff connection."
+            )
+        return token.get_secret_value() if hasattr(token, "get_secret_value") else str(token)
@@ -68,10 +68,13 @@ def assert_for_vanilla_ingestion(self, source_status: Status, sink_status: Statu
             )
             self.assertEqual(len(sink_status.failures), 0)
             self.assertEqual(len(sink_status.warnings), 0)
-            self.assertGreater(
-                (len(sink_status.records) + len(sink_status.updated_records)),
-                self.expected_tables(),
-            )
+            # Sink record count is intentionally NOT asserted: the
+            # metadata-rest sink batches PUTs and final-flushes on workflow
+            # close, but the Status object is logged before that flush — so
+            # sink_status.records understates the true count. We treat
+            # source records + zero sink failures as the correctness signal.
+            # Connectors that want stronger end-to-end coverage should
+            # override this method and add an API-level retrieve_table check.
 
         def assert_for_table_with_profiler(self, source_status: Status, sink_status: Status):
             self.assertEqual(len(source_status.failures), 0)
@@ -80,10 +83,8 @@ def assert_for_table_with_profiler(self, source_status: Status, sink_status: Sta
                 self.expected_profiled_tables(),
             )
             self.assertEqual(len(sink_status.failures), 0)
-            self.assertGreaterEqual(
-                (len(sink_status.records) + len(sink_status.updated_records)),
-                self.expected_profiled_tables(),
-            )
+            # Sink record count omitted for the same batched-flush reason
+            # documented in assert_for_vanilla_ingestion.
             # Since we removed view lineage from metadata workflow as part
             # of https://github.com/open-metadata/OpenMetadata/pull/18558
             # we need to introduce Lineage E2E base and add view lineage check there.

@@ -0,0 +1,36 @@
+source:
+  type: databricks
+  serviceName: e2e_databricks
+  serviceConnection:
+    config:
+      type: Databricks
+      hostPort: $E2E_DATABRICKS_HOST_PORT
+      httpPath: $E2E_DATABRICKS_HTTP_PATH
+      authType:
+        token: $E2E_DATABRICKS_TOKEN
+      catalog: $E2E_DATABRICKS_CATALOG
+      connectionTimeout: 120
+      queryHistoryTable: system.query.history
+      connectionOptions: {}
+      connectionArguments: {}
+  sourceConfig:
+    config:
+      type: DatabaseMetadata
+      markDeletedTables: true
+      includeTables: true
+      includeViews: true
+      includeStoredProcedures: false
+      includeDDL: true
+      schemaFilterPattern:
+        excludes:
+        - information_schema.*
+sink:
+  type: metadata-rest
+  config: {}
+workflowConfig:
+  loggerLevel: DEBUG
+  openMetadataServerConfig:
+    hostPort: http://localhost:8585/api
+    authProvider: openmetadata
+    securityConfig:
+      jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"