diff --git a/src/cast_to_variant.rs b/src/cast_to_variant.rs index dfce849..8513d7e 100644 --- a/src/cast_to_variant.rs +++ b/src/cast_to_variant.rs @@ -191,86 +191,13 @@ impl ScalarUDFImpl for CastToVariantUdf { #[cfg(test)] mod tests { - - use arrow::array::{FixedSizeBinaryBuilder, Int32Array, StringArray, StringViewArray}; - use arrow_schema::Fields; - use parquet_variant::Variant; - use parquet_variant_compute::{VariantArray, VariantType}; - - use crate::shared::{build_variant_array_from_json, build_variant_array_from_json_array}; - use super::*; - - #[test] - fn test_scalar_float64() { - let udf = CastToVariantUdf::default(); - - let arg_field = Arc::new(Field::new("input", DataType::Float64, true)); - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::Float64]).unwrap(), - true, - )); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(3.25)))], - return_field, - arg_fields: vec![arg_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Scalar(ScalarValue::Struct(variant_array)) = res else { - panic!("expected struct scalar") - }; - - let variant_array = VariantArray::try_new(variant_array.as_ref()).unwrap(); - - assert_eq!(variant_array.value(0), Variant::Double(3.25)); - } - - #[test] - fn test_array_int32() { - let udf = CastToVariantUdf::default(); - - let arg_field = Arc::new(Field::new("input", DataType::Int32, true)); - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::Int32]).unwrap(), - true, - )); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::new(Int32Array::from(vec![ - Some(1), - None, - Some(-5), - ])) as ArrayRef)], - return_field, - arg_fields: vec![arg_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Array(arr) = res else { - panic!("expected array output") - }; - - let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); - - assert_eq!(variant_array.value(0), Variant::Int32(1)); - assert!(variant_array.is_null(1)); - assert_eq!(variant_array.value(2), Variant::Int32(-5)); - } + use arrow_schema::Fields; + use parquet_variant_compute::VariantType; #[test] fn test_return_field_extension_type() { let udf = CastToVariantUdf::default(); - let arg_field = Arc::new(Field::new("input", DataType::Utf8, true)); let return_field = udf @@ -289,217 +216,4 @@ mod tests { ])) ); } - - #[test] - fn test_scalar_binary_views() { - let expected_variant_array = build_variant_array_from_json(&serde_json::json!({ - "name": "norm", - })); - - let (input_metadata, input_value) = { - let metadata = expected_variant_array.metadata_field().value(0); - let value = expected_variant_array.value_field().unwrap().value(0); - - (metadata, value) - }; - - let udf = CastToVariantUdf::default(); - - let metadata_field = Arc::new(Field::new("metadata", DataType::BinaryView, true)); - let variant_field = Arc::new(Field::new("value", DataType::BinaryView, true)); - - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::BinaryView, DataType::BinaryView]) - .unwrap(), - true, - )); - - let args = ScalarFunctionArgs { - args: vec![ - ColumnarValue::Scalar(ScalarValue::BinaryView(Some(input_metadata.to_vec()))), - ColumnarValue::Scalar(ScalarValue::BinaryView(Some(input_value.to_vec()))), - ], - return_field, - arg_fields: vec![metadata_field, variant_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Scalar(ScalarValue::Struct(variant_array)) = res else { - panic!("expected scalar value struct array") - }; - - let variant_array = VariantArray::try_new(variant_array.as_ref()).unwrap(); - - assert_eq!(&variant_array, &expected_variant_array); - } - - #[test] - fn test_array_string() { - let udf = CastToVariantUdf::default(); - - let arg_field = Arc::new(Field::new("input", DataType::Utf8, true)); - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::Utf8]).unwrap(), - true, - )); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![ - Some("abcdefghijklmnop"), - None, - Some("hello world"), - ])) as ArrayRef)], - return_field, - arg_fields: vec![arg_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Array(arr) = res else { - panic!("expected array output") - }; - - let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); - - assert_eq!(variant_array.value(0), Variant::from("abcdefghijklmnop")); - assert!(variant_array.is_null(1)); - assert_eq!(variant_array.value(2), Variant::from("hello world")); - } - - #[test] - fn test_fixed_size_binary_uuid_like() { - let udf = CastToVariantUdf::default(); - - let arg_field = Arc::new(Field::new("input", DataType::FixedSizeBinary(16), true)); - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::FixedSizeBinary(16)]).unwrap(), - true, - )); - - let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 16); - builder.append_value([1u8; 16]).unwrap(); - builder.append_null(); - builder.append_value([2u8; 16]).unwrap(); - let array = builder.finish(); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::new(array) as ArrayRef)], - return_field, - arg_fields: vec![arg_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Array(arr) = res else { - panic!("expected array output") - }; - - let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); - - assert_eq!(variant_array.value(0), Variant::Binary(&[1u8; 16])); - assert!(variant_array.is_null(1)); - assert_eq!(variant_array.value(2), Variant::Binary(&[2u8; 16])); - } - - #[test] - fn test_array_binary_views() { - let expected_variant_array = build_variant_array_from_json_array(&[ - Some(serde_json::json!({ - "name": "norm", - })), - None, - None, - Some(serde_json::json!({ - "id": 1, - "parent_id": 0, - "child_ids": [2, 3, 4, 5] - })), - ]); - - let (input_metadata_array, input_value_array) = { - let metadata = expected_variant_array.metadata_field().clone(); - let value = expected_variant_array.value_field().unwrap().clone(); - - (metadata, value) - }; - - let udf = CastToVariantUdf::default(); - - let metadata_field = Arc::new(Field::new("metadata", DataType::BinaryView, true)); - let variant_field = Arc::new(Field::new("value", DataType::BinaryView, true)); - - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::BinaryView, DataType::BinaryView]) - .unwrap(), - true, - )); - - let args = ScalarFunctionArgs { - args: vec![ - ColumnarValue::Array(Arc::new(input_metadata_array) as ArrayRef), - ColumnarValue::Array(Arc::new(input_value_array) as ArrayRef), - ], - return_field, - arg_fields: vec![metadata_field, variant_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Array(variant_array) = res else { - panic!("expected scalar value struct array") - }; - - let variant_array = VariantArray::try_new(variant_array.as_ref()).unwrap(); - - assert_eq!(&variant_array, &expected_variant_array); - } - - #[test] - fn test_array_string_view() { - let udf = CastToVariantUdf::default(); - - let arg_field = Arc::new(Field::new("input", DataType::Utf8View, true)); - let return_field = Arc::new(Field::new( - "res", - udf.return_type(&[DataType::Utf8View]).unwrap(), - true, - )); - - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::new(StringViewArray::from(vec![ - Some("short"), - None, - Some("another"), - ])) as ArrayRef)], - return_field, - arg_fields: vec![arg_field], - number_rows: Default::default(), - config_options: Default::default(), - }; - - let res = udf.invoke_with_args(args).unwrap(); - - let ColumnarValue::Array(arr) = res else { - panic!("expected array output") - }; - - let variant_array = VariantArray::try_new(arr.as_ref()).unwrap(); - - assert_eq!(variant_array.value(0), Variant::from("short")); - assert!(variant_array.is_null(1)); - assert_eq!(variant_array.value(2), Variant::from("another")); - } } diff --git a/tests/sqllogictests.rs b/tests/sqllogictests.rs index 0e7136c..d10cbdd 100644 --- a/tests/sqllogictests.rs +++ b/tests/sqllogictests.rs @@ -30,7 +30,7 @@ async fn run_sqllogictests() -> Result<(), Box> { test_files.sort(); for test_file in test_files { - println!("Running test file: {:?}", test_file); + println!("Running test file: {test_file:?}"); let relative_path = test_file .strip_prefix(&test_files_dir) diff --git a/tests/test_files/cast_to_variant.slt b/tests/test_files/cast_to_variant.slt new file mode 100644 index 0000000..29e6f95 --- /dev/null +++ b/tests/test_files/cast_to_variant.slt @@ -0,0 +1,104 @@ +# tests the cast_to_variant udf +# this function takes a Variant Scalar or Array and converts it into a respective Variant value +# you can also use metadata and value to create a Variant + +# one-arg path (from_scalar_value) +# test_scalar_float64 +query T +SELECT variant_pretty(cast_to_variant(3.25)); +---- +Double(3.25) + +# one-arg path (from_array) +# test_array_int32 +statement ok +CREATE TABLE cast_int_data (id INT, i INT) AS VALUES + (1, 1), + (2, NULL), + (3, -5); + +query IT +SELECT id, variant_pretty(cast_to_variant(i)) FROM cast_int_data ORDER BY id; +---- +1 Int32(1) +2 NULL +3 Int32(-5) + +# test_array_string +statement ok +CREATE TABLE cast_string_data (id INT, s TEXT) AS VALUES + (1, 'abcdefghijklmnop'), + (2, NULL), + (3, 'hello world'); + +query IT +SELECT id, variant_pretty(cast_to_variant(s)) FROM cast_string_data ORDER BY id; +---- +1 ShortString(ShortString("abcdefghijklmnop")) +2 NULL +3 ShortString(ShortString("hello world")) + +# test_array_string_view +query IT +SELECT id, variant_pretty(cast_to_variant(arrow_cast(s, 'Utf8View'))) FROM cast_string_data ORDER BY id; +---- +1 ShortString(ShortString("abcdefghijklmnop")) +2 NULL +3 ShortString(ShortString("hello world")) + +# test_fixed_size_binary_uuid_like +query IT +SELECT id, variant_pretty(cast_to_variant(b)) +FROM ( + SELECT + 1 AS id, + arrow_cast(X'01010101010101010101010101010101', 'FixedSizeBinary(16)') AS b + UNION ALL + SELECT + 2 AS id, + arrow_cast(NULL, 'FixedSizeBinary(16)') AS b + UNION ALL + SELECT + 3 AS id, + arrow_cast(X'02020202020202020202020202020202', 'FixedSizeBinary(16)') AS b +) t +ORDER BY id; +---- +1 Binary(01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01) +2 NULL +3 Binary(02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02) + +# one-arg path (already Variant): pass-through should be unchanged +query T +SELECT variant_pretty(cast_to_variant(json_to_variant('{"name": "norm"}'))); +---- +{"name": ShortString(ShortString("norm"))} + +# two-arg path (from_metadata_value) +# test_scalar_binary_views +query T +WITH input AS ( + SELECT json_to_variant('{"name": "norm"}') AS v +) +SELECT variant_pretty(cast_to_variant(v.metadata, v.value)) FROM input; +---- +{"name": ShortString(ShortString("norm"))} + +# test_array_binary_views +statement ok +CREATE TABLE cast_json_data (id INT, json_str TEXT) AS VALUES + (1, '{"name": "norm"}'), + (2, NULL), + (3, NULL), + (4, '{"id": 1, "parent_id": 0, "child_ids": [2, 3, 4, 5]}'); + +query IT +WITH input AS ( + SELECT id, json_to_variant(json_str) AS v FROM cast_json_data +) +SELECT id, variant_to_json(cast_to_variant(v.metadata, v.value)) FROM input ORDER BY id; +---- +1 {"name":"norm"} +2 NULL +3 NULL +4 {"child_ids":[2,3,4,5],"id":1,"parent_id":0}