Skip to content

Commit 7465fe3

Browse files
hsiang-cclaude
andauthored
fix: Fix space() with negative input (#3347)
* fix: handle negative lengths in string_space expression The string_space function previously failed with overflow when given negative input values. Now treats negative lengths as zero, returning empty strings. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * Fix a typo * Call max() once; more comments --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent de633c8 commit 7465fe3

3 files changed

Lines changed: 31 additions & 5 deletions

File tree

native/spark-expr/src/string_funcs/string_space.rs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,14 @@ fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> Arr
119119
let null_bit_buffer = length.to_data().nulls().map(|b| b.buffer().clone());
120120

121121
// Gets slice of length array to access it directly for performance.
122+
// Negative length values are set to zero to match Spark behavior
122123
let length_data = length.to_data();
123-
let lengths = length_data.buffers()[0].typed_data::<i32>();
124-
let total = lengths.iter().map(|l| *l as usize).sum::<usize>();
124+
let lengths: Vec<_> = length_data.buffers()[0]
125+
.typed_data::<i32>()
126+
.iter()
127+
.map(|l| (*l).max(0) as usize)
128+
.collect();
129+
let total = lengths.iter().sum::<usize>();
125130
let mut values = MutableBuffer::new(total);
126131

127132
offsets.push(length_so_far);
@@ -130,7 +135,7 @@ fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> Arr
130135
values.resize(total, blank);
131136

132137
(0..array_len).for_each(|i| {
133-
let current_len = lengths[i] as usize;
138+
let current_len = lengths[i];
134139

135140
length_so_far += OffsetSize::from_usize(current_len).unwrap();
136141
offsets.push(length_so_far);
@@ -149,3 +154,24 @@ fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> Arr
149154
};
150155
make_array(data)
151156
}
157+
158+
#[cfg(test)]
159+
mod tests {
160+
use super::*;
161+
use arrow::array::StringArray;
162+
use datafusion::common::cast::as_string_array;
163+
164+
#[test]
165+
fn test_negative_length() {
166+
let input = Int32Array::from(vec![Some(-1), Some(-2), None]);
167+
let args = ColumnarValue::Array(Arc::new(input));
168+
match spark_string_space(&[args]) {
169+
Ok(ColumnarValue::Array(result)) => {
170+
let actual = as_string_array(&result).unwrap();
171+
let expected = StringArray::from(vec![Some(""), Some(""), None]);
172+
assert_eq!(actual, &expected)
173+
}
174+
_ => unreachable!(),
175+
}
176+
}
177+
}

native/spark-expr/src/string_funcs/substring.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ impl Display for SubstringExpr {
6161
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
6262
write!(
6363
f,
64-
"StringSpace [start: {}, len: {}, child: {}]",
64+
"Substring [start: {}, len: {}, child: {}]",
6565
self.start, self.len, self.child
6666
)
6767
}

spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ class CometStringExpressionSuite extends CometTestBase {
376376
}
377377

378378
test("string_space") {
379-
withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
379+
withParquetTable((0 until 5).map(i => (-i, i + 1)), "tbl") {
380380
checkSparkAnswerAndOperator("SELECT space(_1), space(_2) FROM tbl")
381381
}
382382
}

0 commit comments

Comments
 (0)