apache · andygrove · Apr 24, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/docs/source/user-guide/latest/compatibility/expressions/aggregate.md b/docs/source/user-guide/latest/compatibility/expressions/aggregate.md
@@ -19,16 +19,5 @@ under the License.
 
 # Aggregate Expressions
 
-## Incompatible Aggregates
-
-- **CollectSet**: Comet deduplicates NaN values (treats `NaN == NaN`) while Spark treats each NaN as a distinct value.
-  When `spark.comet.exec.strictFloatingPoint=true`, `collect_set` on floating-point types falls back to Spark unless
-  `spark.comet.expression.CollectSet.allowIncompatible=true` is set.
-
-## ANSI Mode
-
-Comet will fall back to Spark for the following aggregate expressions when ANSI mode is enabled. These can be enabled by setting `spark.comet.expression.EXPRNAME.allowIncompatible=true`, where `EXPRNAME` is the Spark expression class name. See the [Comet Supported Expressions Guide](../../expressions.md) for more information on this configuration setting.
-
-- Average (supports all numeric inputs except decimal types)
-
-There is an [epic](https://github.com/apache/datafusion-comet/issues/313) where we are tracking the work to fully implement ANSI support.
+<!--BEGIN:EXPR_COMPAT[aggregate]-->
+<!--END:EXPR_COMPAT-->
diff --git a/docs/source/user-guide/latest/compatibility/expressions/array.md b/docs/source/user-guide/latest/compatibility/expressions/array.md
@@ -19,4 +19,5 @@ under the License.
 
 # Array Expressions
 
-- **SortArray**: Nested arrays with `Struct` or `Null` child values are not supported natively and will fall back to Spark.
+<!--BEGIN:EXPR_COMPAT[array]-->
+<!--END:EXPR_COMPAT-->
diff --git a/docs/source/user-guide/latest/compatibility/expressions/datetime.md b/docs/source/user-guide/latest/compatibility/expressions/datetime.md
@@ -19,9 +19,5 @@ under the License.
 
 # Date/Time Expressions
 
-- **Hour, Minute, Second**: Incorrectly apply timezone conversion to TimestampNTZ inputs. TimestampNTZ stores local
-  time without timezone, so no conversion should be applied. These expressions work correctly with Timestamp inputs.
-  [#3180](https://github.com/apache/datafusion-comet/issues/3180)
-- **TruncTimestamp (date_trunc)**: Produces incorrect results when used with non-UTC timezones. Compatible when
-  timezone is UTC.
-  [#2649](https://github.com/apache/datafusion-comet/issues/2649)
+<!--BEGIN:EXPR_COMPAT[datetime]-->
+<!--END:EXPR_COMPAT-->
diff --git a/docs/source/user-guide/latest/compatibility/expressions/index.md b/docs/source/user-guide/latest/compatibility/expressions/index.md
@@ -31,6 +31,7 @@ Compatibility notes are grouped by expression category:
 aggregate
 array
 datetime
+math
 struct
 cast
 ```
diff --git a/docs/source/user-guide/latest/compatibility/expressions/math.md b/docs/source/user-guide/latest/compatibility/expressions/math.md
@@ -0,0 +1,23 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Math Expressions
+
+<!--BEGIN:EXPR_COMPAT[math]-->
+<!--END:EXPR_COMPAT-->
diff --git a/docs/source/user-guide/latest/compatibility/expressions/struct.md b/docs/source/user-guide/latest/compatibility/expressions/struct.md
@@ -19,5 +19,5 @@ under the License.
 
 # Struct Expressions
 
-- **StructsToJson (to_json)**: Does not support `+Infinity` and `-Infinity` for numeric types (float, double).
-  [#3016](https://github.com/apache/datafusion-comet/issues/3016)
+<!--BEGIN:EXPR_COMPAT[struct]-->
+<!--END:EXPR_COMPAT-->
diff --git a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
@@ -39,10 +39,48 @@ object GenerateDocs {
 
   private val publicConfigs: Set[ConfigEntry[_]] = CometConf.allConfs.filter(_.isPublic).toSet
 
+  /** (expression class simple name, incompatible reasons, unsupported reasons) */
+  private type CategoryNotes = Seq[(String, Seq[String], Seq[String])]
+
+  /**
+   * Mapping from expression category to the compatibility guide page where that category's
+   * auto-generated notes should be written, along with a function that produces the notes for
+   * that category from the serde maps in `QueryPlanSerde`.
+   */
+  private def categoryPages: Map[String, (String, () => CategoryNotes)] = Map(
+    "array" -> ("compatibility/expressions/array.md",
+    () =>
+      QueryPlanSerde.arrayExpressions.toSeq.map { case (cls, serde) =>
+        (cls.getSimpleName, serde.getIncompatibleReasons(), serde.getUnsupportedReasons())
+      }),
+    "datetime" -> ("compatibility/expressions/datetime.md",
+    () =>
+      QueryPlanSerde.temporalExpressions.toSeq.map { case (cls, serde) =>
+        (cls.getSimpleName, serde.getIncompatibleReasons(), serde.getUnsupportedReasons())
+      }),
+    "math" -> ("compatibility/expressions/math.md",
+    () =>
+      QueryPlanSerde.mathExpressions.toSeq.map { case (cls, serde) =>
+        (cls.getSimpleName, serde.getIncompatibleReasons(), serde.getUnsupportedReasons())
+      }),
+    "struct" -> ("compatibility/expressions/struct.md",
+    () =>
+      QueryPlanSerde.structExpressions.toSeq.map { case (cls, serde) =>
+        (cls.getSimpleName, serde.getIncompatibleReasons(), serde.getUnsupportedReasons())
+      }),
+    "aggregate" -> ("compatibility/expressions/aggregate.md",
+    () =>
+      QueryPlanSerde.aggrSerdeMap.toSeq.map { case (cls, serde) =>
+        (cls.getSimpleName, serde.getIncompatibleReasons(), serde.getUnsupportedReasons())
+      }))
+
   def main(args: Array[String]): Unit = {
     val userGuideLocation = args(0)
     generateConfigReference(s"$userGuideLocation/configs.md")
     generateCompatibilityGuide(s"$userGuideLocation/compatibility/expressions/cast.md")
+    for ((category, (page, notesFn)) <- categoryPages) {
+      generateExpressionCompatNotes(s"$userGuideLocation/$page", category, notesFn())
+    }
   }
 
   private def generateConfigReference(filename: String): Unit = {
@@ -121,6 +159,46 @@ object GenerateDocs {
     w.close()
   }
 
+  private def generateExpressionCompatNotes(
+      filename: String,
+      category: String,
+      notes: CategoryNotes): Unit = {
+    val beginTag = s"<!--BEGIN:EXPR_COMPAT[$category]-->"
+    val lines = readFile(filename)
+    val w = new BufferedOutputStream(new FileOutputStream(filename))
+    for (line <- lines) {
+      w.write(s"${line.stripTrailing()}\n".getBytes)
+      if (line.trim == beginTag) {
+        writeExpressionCompatNotes(w, notes)
+      }
+    }
+    w.close()
+  }
+
+  private def writeExpressionCompatNotes(w: BufferedOutputStream, notes: CategoryNotes): Unit = {
+    val sorted = notes.sortBy(_._1).filter { case (_, incompat, unsupported) =>
+      incompat.nonEmpty || unsupported.nonEmpty
+    }
+    for ((name, incompat, unsupported) <- sorted) {
+      w.write(s"\n### $name\n".getBytes)
+      if (incompat.nonEmpty) {
+        w.write(
+          (s"\nThe following incompatibilities cause `$name` to fall back to Spark by default." +
+            s" Set `spark.comet.expression.$name.allowIncompatible=true` to enable Comet" +
+            " acceleration despite these differences.\n\n").getBytes)
+        for (reason <- incompat) {
+          w.write(s"- $reason\n".getBytes)
+        }
+      }
+      if (unsupported.nonEmpty) {
+        w.write("\nThe following cases are not supported by Comet:\n\n".getBytes)
+        for (reason <- unsupported) {
+          w.write(s"- $reason\n".getBytes)
+        }
+      }
+    }
+  }
+
   private def writeCastMatrixForMode(w: BufferedOutputStream, mode: CometEvalMode.Value): Unit = {
     val sortedTypes = CometCast.supportedTypes.sortBy(_.typeName)
     val typeNames = sortedTypes.map(_.typeName.replace("(10,2)", ""))

diff --git a/spark/src/main/scala/org/apache/comet/serde/CometAggregateExpressionSerde.scala b/spark/src/main/scala/org/apache/comet/serde/CometAggregateExpressionSerde.scala
@@ -39,6 +39,26 @@ trait CometAggregateExpressionSerde[T <: AggregateFunction] {
    */
   def getExprConfigName(expr: T): String = expr.getClass.getSimpleName
 
+  /**
+   * Get documentation for usages where this expression may be incompatible with Spark. This is
+   * called from GenerateDocs when generating the Compatibility Guide. Each reason should be
+   * written in Markdown and may span multiple lines.
+   *
+   * @return
+   *   List of reasons, defaulting to an empty list.
+   */
+  def getIncompatibleReasons(): Seq[String] = Seq.empty
+
+  /**
+   * Get documentation for usages where this expression is unsupported with Spark. This is called
+   * from GenerateDocs when generating the Compatibility Guide. Each reason should be written in
+   * Markdown and may span multiple lines.
+   *
+   * @return
+   *   List of reasons, defaulting to an empty list.
+   */
+  def getUnsupportedReasons(): Seq[String] = Seq.empty
+
   /**
    * Determine the support level of the expression based on its attributes.
    *

diff --git a/spark/src/main/scala/org/apache/comet/serde/CometExpressionSerde.scala b/spark/src/main/scala/org/apache/comet/serde/CometExpressionSerde.scala
@@ -37,6 +37,26 @@ trait CometExpressionSerde[T <: Expression] {
    */
   def getExprConfigName(expr: T): String = expr.getClass.getSimpleName
 
+  /**
+   * Get documentation for usages where this expression may be incompatible with Spark. This is
+   * called from GenerateDocs when generating the Compatibility Guide. Each reason should be
+   * written in Markdown and may span multiple lines.
+   *
+   * @return
+   *   List of reasons, defaulting to an empty list.
+   */
+  def getIncompatibleReasons(): Seq[String] = Seq.empty
+
+  /**
+   * Get documentation for usages where this expression is unsupported with Spark. This is called
+   * from GenerateDocs when generating the Compatibility Guide. Each reason should be written in
+   * Markdown and may span multiple lines.
+   *
+   * @return
+   *   List of reasons, defaulting to an empty list.
+   */
+  def getUnsupportedReasons(): Seq[String] = Seq.empty
+
   /**
    * Determine the support level of the expression based on its attributes.
    *
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,6 +31,7 @@ Compatibility notes are grouped by expression category: @@
     aggregate
     array
     datetime
+    math
     struct
     cast
     ```