Skip to content

Commit f172b7c

Browse files
authored
test: Add SQL file tests for left and right expressions (#3463)
1 parent 1d49074 commit f172b7c

3 files changed

Lines changed: 123 additions & 117 deletions

File tree

spark/src/test/resources/sql-tests/expressions/string/left.sql

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,37 @@ SELECT left(s, n) FROM test_str_left
3030
query
3131
SELECT left(s, 3) FROM test_str_left
3232

33+
-- column + literal: edge cases
34+
query
35+
SELECT left(s, 0) FROM test_str_left
36+
37+
query
38+
SELECT left(s, -1) FROM test_str_left
39+
40+
query
41+
-- n exceeds length of 'hello' (5 chars)
42+
SELECT left(s, 10) FROM test_str_left
43+
3344
-- literal + column
3445
query expect_fallback(Substring pos and len must be literals)
3546
SELECT left('hello', n) FROM test_str_left
3647

3748
-- literal + literal
3849
query ignore(https://github.com/apache/datafusion-comet/issues/3337)
3950
SELECT left('hello', 3), left('hello', 0), left('hello', -1), left('', 3), left(NULL, 3)
51+
52+
-- unicode
53+
statement
54+
CREATE TABLE test_str_left_unicode(s string) USING parquet
55+
56+
statement
57+
INSERT INTO test_str_left_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL)
58+
59+
query
60+
SELECT s, left(s, 2) FROM test_str_left_unicode
61+
62+
query
63+
SELECT s, left(s, 4) FROM test_str_left_unicode
64+
65+
query
66+
SELECT s, left(s, 0) FROM test_str_left_unicode
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- Note: Right is a RuntimeReplaceable expression. Spark replaces it with
19+
-- If(IsNull(str), null, If(len <= 0, "", Substring(str, -len, len)))
20+
-- before Comet sees it. CometRight handles the serde, but the optimizer
21+
-- may replace it first. We use spark_answer_only to verify correctness.
22+
23+
-- ConfigMatrix: parquet.enable.dictionary=false,true
24+
25+
statement
26+
CREATE TABLE test_str_right(s string, n int) USING parquet
27+
28+
statement
29+
INSERT INTO test_str_right VALUES ('hello', 3), ('hello', 0), ('hello', -1), ('hello', 10), ('', 3), (NULL, 3), ('hello', NULL)
30+
31+
-- both columns: len must be literal, falls back
32+
query spark_answer_only
33+
SELECT right(s, n) FROM test_str_right
34+
35+
-- column + literal: basic
36+
query spark_answer_only
37+
SELECT right(s, 3) FROM test_str_right
38+
39+
-- column + literal: edge cases
40+
query spark_answer_only
41+
SELECT right(s, 0) FROM test_str_right
42+
43+
query spark_answer_only
44+
SELECT right(s, -1) FROM test_str_right
45+
46+
query spark_answer_only
47+
-- n exceeds length of 'hello' (5 chars)
48+
SELECT right(s, 10) FROM test_str_right
49+
50+
-- literal + column: falls back
51+
query spark_answer_only
52+
SELECT right('hello', n) FROM test_str_right
53+
54+
-- literal + literal
55+
query spark_answer_only
56+
SELECT right('hello', 3), right('hello', 0), right('hello', -1), right('', 3), right(NULL, 3)
57+
58+
-- null propagation with len <= 0 (critical: NULL str with non-positive len must return NULL, not empty string)
59+
query spark_answer_only
60+
SELECT right(CAST(NULL AS STRING), 0), right(CAST(NULL AS STRING), -1), right(CAST(NULL AS STRING), 2)
61+
62+
-- mixed null and non-null values with len <= 0
63+
statement
64+
CREATE TABLE test_str_right_nulls(s string) USING parquet
65+
66+
statement
67+
INSERT INTO test_str_right_nulls VALUES ('hello'), (NULL), (''), ('world')
68+
69+
query spark_answer_only
70+
SELECT s, right(s, 0) FROM test_str_right_nulls
71+
72+
query spark_answer_only
73+
SELECT s, right(s, -1) FROM test_str_right_nulls
74+
75+
query spark_answer_only
76+
SELECT s, right(s, 2) FROM test_str_right_nulls
77+
78+
-- equivalence with substring
79+
query spark_answer_only
80+
SELECT s, right(s, 3), substring(s, -3, 3) FROM test_str_right_nulls
81+
82+
-- unicode
83+
statement
84+
CREATE TABLE test_str_right_unicode(s string) USING parquet
85+
86+
statement
87+
INSERT INTO test_str_right_unicode VALUES ('café'), ('hello世界'), ('😀emoji'), ('తెలుగు'), (NULL)
88+
89+
query spark_answer_only
90+
SELECT s, right(s, 2) FROM test_str_right_unicode
91+
92+
query spark_answer_only
93+
SELECT s, right(s, 4) FROM test_str_right_unicode
94+
95+
query spark_answer_only
96+
SELECT s, right(s, 0) FROM test_str_right_unicode

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 0 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -523,123 +523,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
523523
}
524524
}
525525

526-
test("LEFT function") {
527-
withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") {
528-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM tbl")
529-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 4) FROM tbl")
530-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM tbl")
531-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, -1) FROM tbl")
532-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 100) FROM tbl")
533-
checkSparkAnswerAndOperator("SELECT LEFT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1")
534-
}
535-
}
536-
537-
test("LEFT function with unicode") {
538-
val data = Seq("café", "hello世界", "😀emoji", "తెలుగు")
539-
withParquetTable(data.zipWithIndex, "unicode_tbl") {
540-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM unicode_tbl")
541-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM unicode_tbl")
542-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM unicode_tbl")
543-
}
544-
}
545-
546-
test("LEFT function equivalence with SUBSTRING") {
547-
withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") {
548-
val df = spark.sql("""
549-
SELECT _1,
550-
LEFT(_1, 3) as left_result,
551-
SUBSTRING(_1, 1, 3) as substring_result
552-
FROM equiv_tbl
553-
""")
554-
checkAnswer(
555-
df.filter(
556-
"left_result != substring_result OR " +
557-
"(left_result IS NULL AND substring_result IS NOT NULL) OR " +
558-
"(left_result IS NOT NULL AND substring_result IS NULL)"),
559-
Seq.empty)
560-
}
561-
}
562-
563-
test("LEFT function with dictionary") {
564-
val data = (0 until 1000)
565-
.map(_ % 5)
566-
.map(i => s"value$i")
567-
withParquetTable(data.zipWithIndex, "dict_tbl") {
568-
checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM dict_tbl")
569-
}
570-
}
571-
572-
test("RIGHT function") {
573-
withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") {
574-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM tbl")
575-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 4) FROM tbl")
576-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM tbl")
577-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM tbl")
578-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 100) FROM tbl")
579-
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1")
580-
}
581-
}
582-
583-
test("RIGHT function with unicode") {
584-
val data = Seq("café", "hello世界", "😀emoji", "తెలుగు")
585-
withParquetTable(data.zipWithIndex, "unicode_tbl") {
586-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM unicode_tbl")
587-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM unicode_tbl")
588-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM unicode_tbl")
589-
}
590-
}
591-
592-
test("RIGHT function equivalence with SUBSTRING negative pos") {
593-
withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") {
594-
val df = spark.sql("""
595-
SELECT _1,
596-
RIGHT(_1, 3) as right_result,
597-
SUBSTRING(_1, -3, 3) as substring_result
598-
FROM equiv_tbl
599-
""")
600-
checkAnswer(
601-
df.filter(
602-
"right_result != substring_result OR " +
603-
"(right_result IS NULL AND substring_result IS NOT NULL) OR " +
604-
"(right_result IS NOT NULL AND substring_result IS NULL)"),
605-
Seq.empty)
606-
}
607-
}
608-
609-
test("RIGHT function with dictionary") {
610-
val data = (0 until 1000)
611-
.map(_ % 5)
612-
.map(i => s"value$i")
613-
withParquetTable(data.zipWithIndex, "dict_tbl") {
614-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM dict_tbl")
615-
}
616-
}
617-
618-
test("RIGHT function NULL handling") {
619-
// Test NULL propagation with len = 0 (critical edge case)
620-
withParquetTable((0 until 5).map(i => (s"test$i", i)), "null_tbl") {
621-
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 0) FROM null_tbl LIMIT 1")
622-
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -1) FROM null_tbl LIMIT 1")
623-
checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -5) FROM null_tbl LIMIT 1")
624-
}
625-
626-
// Test non-NULL strings with len <= 0 (should return empty string)
627-
withParquetTable((0 until 5).map(i => (s"test$i", i)), "edge_tbl") {
628-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM edge_tbl")
629-
checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM edge_tbl")
630-
}
631-
632-
// Test mixed NULL and non-NULL values with a table
633-
val table = "right_null_edge"
634-
withTable(table) {
635-
sql(s"create table $table(str string) using parquet")
636-
sql(s"insert into $table values('hello'), (NULL), (''), ('world')")
637-
checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 0) FROM $table")
638-
checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, -1) FROM $table")
639-
checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 2) FROM $table")
640-
}
641-
}
642-
643526
test("hour, minute, second") {
644527
Seq(true, false).foreach { dictionaryEnabled =>
645528
withTempDir { dir =>

0 commit comments

Comments
 (0)