1515-- specific language governing permissions and limitations
1616-- under the License.
1717
18- -- Config: spark.comet.expression.ArrayUnion.allowIncompatible=true
19-
2018statement
2119CREATE TABLE test_array_union (a array< int > , b array< int > ) USING parquet
2220
2321statement
2422INSERT INTO test_array_union VALUES (array(1 , 2 , 3 ), array(3 , 4 , 5 )), (array(1 , 2 ), array()), (array(), array(1 )), (NULL , array(1 )), (array(1 , NULL ), array(NULL , 2 ))
2523
26- query ignore(https: // github . com / apache / datafusion - comet / issues / 3644 )
24+ query
2725SELECT array_union(a, b) FROM test_array_union
2826
2927-- column + literal
30- query ignore(https: // github . com / apache / datafusion - comet / issues / 3644 )
28+ query
3129SELECT array_union(a, array(3 , 4 , 5 )) FROM test_array_union
3230
3331-- literal + column
@@ -37,3 +35,156 @@ SELECT array_union(array(1, 2, 3), b) FROM test_array_union
3735-- literal + literal
3836query
3937SELECT array_union(array(1 , 2 , 3 ), array(3 , 4 , 5 )), array_union(array(1 , 2 ), array()), array_union(array(), array(1 )), array_union(cast(NULL as array< int > ), array(1 ))
38+
39+ -- NULL element deduplication (NULLs treated as values, kept once in result)
40+ statement
41+ CREATE TABLE test_union_nulls (a array< int > , b array< int > ) USING parquet
42+
43+ statement
44+ INSERT INTO test_union_nulls VALUES (array(1 , NULL , 3 ), array(4 , NULL , 5 )), (array(NULL ), array(NULL )), (array(NULL , NULL ), array(NULL , NULL )), (array(1 , NULL ), array(2 , NULL )), (array(NULL , 2 ), array(1 , NULL )), (array(1 , NULL , 3 ), array(1 , 5 ))
45+
46+ query
47+ SELECT a, b, array_union(a, b) FROM test_union_nulls
48+
49+ -- empty array combinations
50+ query
51+ SELECT array_union(array(), array()) FROM test_union_nulls
52+
53+ query
54+ SELECT array_union(array(), array(1 , 2 )) FROM test_union_nulls
55+
56+ query
57+ SELECT array_union(array(1 , 2 ), array()) FROM test_union_nulls
58+
59+ query
60+ SELECT array_union(array(), array(NULL )) FROM test_union_nulls
61+
62+ -- both-NULL arrays
63+ query
64+ SELECT array_union(cast(NULL as array< int > ), cast(NULL as array< int > )) FROM test_union_nulls
65+
66+ -- self-union (deduplication)
67+ query
68+ SELECT a, array_union(a, a) FROM test_union_nulls
69+
70+ -- duplicate elements within and across arrays
71+ statement
72+ CREATE TABLE test_union_dups (a array< int > , b array< int > ) USING parquet
73+
74+ statement
75+ INSERT INTO test_union_dups VALUES (array(1 , 1 , 1 ), array(2 , 2 , 2 )), (array(1 , 1 , 1 ), array(1 , 2 , 2 )), (array(1 , 2 , 1 , 2 ), array(3 , 4 , 3 , 4 )), (array(1 , 2 , 1 , 2 ), array(2 , 3 , 2 , 3 )), (array(1 , 2 , 3 ), array(1 , 2 , 3 ))
76+
77+ query
78+ SELECT a, b, array_union(a, b) FROM test_union_dups
79+
80+ -- single element arrays
81+ query
82+ SELECT array_union(array(1 ), array(1 )) FROM test_union_dups
83+
84+ query
85+ SELECT array_union(array(1 ), array(2 )) FROM test_union_dups
86+
87+ -- string arrays
88+ statement
89+ CREATE TABLE test_union_str (a array< string> , b array< string> ) USING parquet
90+
91+ statement
92+ INSERT INTO test_union_str VALUES (array(' a' , ' b' , ' c' ), array(' c' , ' d' )), (array(' a' , ' b' ), array(' c' , ' d' )), (array(' a' , NULL ), array(' b' , NULL )), (array(' a' , NULL ), array(' a' , ' b' )), (NULL , array(' a' )), (array(' ' ), array(' ' )), (array(' ' , NULL ), array(' x' ))
93+
94+ query
95+ SELECT a, b, array_union(a, b) FROM test_union_str
96+
97+ -- empty string handling
98+ query
99+ SELECT array_union(array(' ' , ' a' ), array(' ' , ' b' )) FROM test_union_str
100+
101+ -- double arrays with special values
102+ statement
103+ CREATE TABLE test_union_dbl (a array< double> , b array< double> ) USING parquet
104+
105+ statement
106+ INSERT INTO test_union_dbl VALUES (array(1 .0 , 2 .0 ), array(2 .0 , 3 .0 )), (array(1 .0 , double(' NaN' )), array(double(' NaN' ), 2 .0 )), (array(double(' Infinity' ), 1 .0 ), array(double(' Infinity' ))), (array(double(' -Infinity' )), array(double(' Infinity' ))), (array(0 .0 ), array(- 0 .0 )), (array(1 .0 , NULL ), array(2 .0 , NULL ))
107+
108+ query
109+ SELECT a, b, array_union(a, b) FROM test_union_dbl
110+
111+ -- boolean arrays
112+ query
113+ SELECT array_union(array(true, false), array(false)) FROM test_union_dbl
114+
115+ query
116+ SELECT array_union(array(true), array(false)) FROM test_union_dbl
117+
118+ query
119+ SELECT array_union(array(true, NULL ), array(false, NULL )) FROM test_union_dbl
120+
121+ -- bigint arrays
122+ statement
123+ CREATE TABLE test_union_long (a array< bigint > , b array< bigint > ) USING parquet
124+
125+ statement
126+ INSERT INTO test_union_long VALUES (array(9223372036854775807 , 1 ), array(9223372036854775807 )), (array(- 9223372036854775808 ), array(- 9223372036854775808 )), (array(0 ), array(1 ))
127+
128+ query
129+ SELECT a, b, array_union(a, b) FROM test_union_long
130+
131+ -- decimal arrays
132+ statement
133+ CREATE TABLE test_union_dec (a array< decimal (10 ,2 )> , b array< decimal (10 ,2 )> ) USING parquet
134+
135+ statement
136+ INSERT INTO test_union_dec VALUES (array(1 .00 , 2 .50 ), array(2 .50 , 3 .00 )), (array(1 .00 , 2 .00 ), array(3 .00 , 4 .00 )), (array(1 .10 , NULL ), array(2 .20 , NULL ))
137+
138+ query
139+ SELECT a, b, array_union(a, b) FROM test_union_dec
140+
141+ -- date arrays
142+ statement
143+ CREATE TABLE test_union_date (a array< date > , b array< date > ) USING parquet
144+
145+ statement
146+ INSERT INTO test_union_date VALUES (array(date ' 2024-01-01' , date ' 2024-06-15' ), array(date ' 2024-06-15' , date ' 2024-12-31' )), (array(date ' 2024-01-01' ), array(date ' 2024-12-31' )), (array(date ' 2024-01-01' , NULL ), array(date ' 2024-12-31' ))
147+
148+ query
149+ SELECT a, b, array_union(a, b) FROM test_union_date
150+
151+ -- timestamp arrays
152+ statement
153+ CREATE TABLE test_union_ts (a array< timestamp > , b array< timestamp > ) USING parquet
154+
155+ statement
156+ INSERT INTO test_union_ts VALUES (array(timestamp ' 2024-01-01 00:00:00' , timestamp ' 2024-06-15 12:00:00' ), array(timestamp ' 2024-06-15 12:00:00' )), (array(timestamp ' 2024-01-01 00:00:00' ), array(timestamp ' 2024-12-31 23:59:59' ))
157+
158+ query
159+ SELECT a, b, array_union(a, b) FROM test_union_ts
160+
161+ -- nested arrays
162+ statement
163+ CREATE TABLE test_union_nested (a array< array< int >> , b array< array< int >> ) USING parquet
164+
165+ statement
166+ INSERT INTO test_union_nested VALUES (array(array(1 , 2 ), array(3 , 4 )), array(array(3 , 4 ), array(5 , 6 ))), (array(array(1 , 2 )), array(array(3 , 4 ))), (array(array(1 , 2 ), cast(NULL as array< int > )), array(array(3 , 4 ), cast(NULL as array< int > ))), (array(array(1 , NULL )), array(array(1 , NULL )))
167+
168+ query
169+ SELECT a, b, array_union(a, b) FROM test_union_nested
170+
171+ -- struct element arrays
172+ statement
173+ CREATE TABLE test_union_struct (a array< struct< x:int , y:int >> , b array< struct< x:int , y:int >> ) USING parquet
174+
175+ statement
176+ INSERT INTO test_union_struct VALUES (array(named_struct(' x' , 1 , ' y' , 2 )), array(named_struct(' x' , 1 , ' y' , 2 ))), (array(named_struct(' x' , 1 , ' y' , 2 )), array(named_struct(' x' , 3 , ' y' , 4 ))), (array(named_struct(' x' , 1 , ' y' , cast(NULL as int ))), array(named_struct(' x' , 1 , ' y' , cast(NULL as int )))), (array(cast(NULL as struct< x:int , y:int > )), array(cast(NULL as struct< x:int , y:int > )))
177+
178+ query
179+ SELECT a, b, array_union(a, b) FROM test_union_struct
180+
181+ -- mixed column and literal with NULL elements
182+ query
183+ SELECT array_union(a, array(99 , NULL )) FROM test_array_union
184+
185+ query
186+ SELECT array_union(array(NULL , 99 ), b) FROM test_array_union
187+
188+ -- conditional (CASE WHEN) arrays
189+ query
190+ SELECT array_union(CASE WHEN a IS NOT NULL THEN a ELSE array(0 ) END, b) FROM test_array_union
0 commit comments