@@ -24,13 +24,13 @@ import org.apache.spark.sql.catalyst.expressions.json.StructsToJsonEvaluator
2424import org .apache .spark .sql .catalyst .expressions .objects .{Invoke , StaticInvoke }
2525import org .apache .spark .sql .internal .SQLConf
2626import org .apache .spark .sql .internal .types .StringTypeWithCollation
27- import org .apache .spark .sql .types .{BinaryType , BooleanType , DataTypes , StringType }
27+ import org .apache .spark .sql .types .{ArrayType , BinaryType , BooleanType , DataTypes , StringType }
2828
2929import org .apache .comet .CometSparkSessionExtensions .withInfo
3030import org .apache .comet .expressions .{CometCast , CometEvalMode }
3131import org .apache .comet .serde .{CommonStringExprs , Compatible , ExprOuterClass , Incompatible }
3232import org .apache .comet .serde .ExprOuterClass .{BinaryOutputStyle , Expr }
33- import org .apache .comet .serde .QueryPlanSerde .{exprToProtoInternal , optExprWithInfo , scalarFunctionExprToProto }
33+ import org .apache .comet .serde .QueryPlanSerde .{exprToProtoInternal , optExprWithInfo , scalarFunctionExprToProto , scalarFunctionExprToProtoWithReturnType }
3434
3535/**
3636 * `CometExprShim` acts as a shim for parsing expressions from different Spark versions.
@@ -56,6 +56,28 @@ trait CometExprShim extends CommonStringExprs {
5656 inputs : Seq [Attribute ],
5757 binding : Boolean ): Option [Expr ] = {
5858 expr match {
59+ case knc : KnownNotContainsNull =>
60+ // On Spark 4.0, array_compact rewrites to KnownNotContainsNull(ArrayFilter(IsNotNull)).
61+ // Strip the wrapper and serialize the inner ArrayFilter as spark_array_compact.
62+ knc.child match {
63+ case filter : ArrayFilter =>
64+ filter.function.children.headOption match {
65+ case Some (_ : IsNotNull ) =>
66+ val arrayChild = filter.left
67+ val elementType = arrayChild.dataType.asInstanceOf [ArrayType ].elementType
68+ val arrayExprProto = exprToProtoInternal(arrayChild, inputs, binding)
69+ val returnType = ArrayType (elementType)
70+ val scalarExpr = scalarFunctionExprToProtoWithReturnType(
71+ " spark_array_compact" ,
72+ returnType,
73+ false ,
74+ arrayExprProto)
75+ optExprWithInfo(scalarExpr, knc, arrayChild)
76+ case _ => exprToProtoInternal(knc.child, inputs, binding)
77+ }
78+ case _ => exprToProtoInternal(knc.child, inputs, binding)
79+ }
80+
5981 case s : StaticInvoke
6082 if s.staticObject == classOf [StringDecode ] &&
6183 s.dataType.isInstanceOf [StringType ] &&
@@ -109,12 +131,6 @@ trait CometExprShim extends CommonStringExprs {
109131 val optExpr = scalarFunctionExprToProto(" width_bucket" , childExprs : _* )
110132 optExprWithInfo(optExpr, wb, wb.children: _* )
111133
112- // KnownNotContainsNull is a TaggingExpression added in Spark 4.0 that only
113- // changes schema metadata (containsNull = false). It has no runtime effect,
114- // so we pass through to the child expression.
115- case k : KnownNotContainsNull =>
116- exprToProtoInternal(k.child, inputs, binding)
117-
118134 // In Spark 4.0, StructsToJson is a RuntimeReplaceable whose replacement is
119135 // Invoke(Literal(StructsToJsonEvaluator), "evaluate", ...). Reconstruct the
120136 // original StructsToJson and recurse so support-level checks apply.
0 commit comments