Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/common_union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,24 @@ use datafusion::common::ScalarValue;
///
/// Attach this to any Arrow `Field` whose values are JSON-encoded strings so
/// downstream consumers can recognize them as JSON rather than opaque text.
///
/// Emits Arrow's canonical JSON extension type keys
/// (`ARROW:extension:name` = `arrow.json`, `ARROW:extension:metadata` = `{}`),
/// see <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>.
///
/// Also emits a legacy `is_json` = `true` key. This key predates this crate's
/// adoption of the canonical extension and is non-standard — no other Arrow
/// tool recognizes it. It is kept only for back-compat with existing
/// downstream consumers of this crate and will be removed in a future
/// release; new consumers should key off `ARROW:extension:name` instead.
#[must_use]
pub fn json_field_metadata() -> HashMap<String, String> {
HashMap::from([("is_json".to_string(), "true".to_string())])
HashMap::from([
("ARROW:extension:name".to_string(), "arrow.json".to_string()),
("ARROW:extension:metadata".to_string(), "{}".to_string()),
// Legacy, non-standard. Remove in a future release — see doc comment above.
("is_json".to_string(), "true".to_string()),
])
}

pub fn is_json_union(data_type: &DataType) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ mod json_length;
mod json_object_keys;
mod rewrite;

pub use common_union::{JsonUnionEncoder, JsonUnionValue, JSON_UNION_DATA_TYPE};
pub use common_union::{json_field_metadata, JsonUnionEncoder, JsonUnionValue, JSON_UNION_DATA_TYPE};

pub mod functions {
pub use crate::json_as_text::json_as_text;
Expand Down
28 changes: 14 additions & 14 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use datafusion::common::ScalarValue;
use datafusion::config::ConfigOptions;
use datafusion::logical_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion::prelude::SessionContext;
use datafusion_functions_json::json_field_metadata;
use datafusion_functions_json::udfs::json_get_str_udf;
use utils::{create_context, display_val, logical_plan, run_query, run_query_params};

Expand Down Expand Up @@ -162,15 +163,15 @@ async fn test_json_get_array_with_path() {
}

#[tokio::test]
async fn test_json_get_array_inner_field_is_json_metadata() {
async fn test_json_get_array_inner_field_json_metadata() {
let sql = r#"select json_get_array('[{"a": 1}, {"b": 2}]') as v"#;
let batches = run_query(sql).await.unwrap();
let schema = batches[0].schema();
let field = schema.field(0);
let DataType::List(inner_field) = field.data_type() else {
panic!("expected List, got {:?}", field.data_type());
};
assert_eq!(inner_field.metadata().get("is_json").map(String::as_str), Some("true"));
assert_json_field_metadata(inner_field.metadata());

let array_field = batches[0]
.column(0)
Expand All @@ -180,10 +181,15 @@ async fn test_json_get_array_inner_field_is_json_metadata() {
let DataType::List(produced_inner) = array_field.data_type() else {
panic!("expected List in produced array");
};
assert_json_field_metadata(produced_inner.metadata());
}

fn assert_json_field_metadata(metadata: &HashMap<String, String>) {
assert_eq!(
produced_inner.metadata().get("is_json").map(String::as_str),
Some("true")
metadata.get("ARROW:extension:name").map(String::as_str),
Some("arrow.json")
);
assert_eq!(metadata.get("ARROW:extension:metadata").map(String::as_str), Some("{}"));
}

#[tokio::test]
Expand Down Expand Up @@ -437,12 +443,12 @@ async fn test_json_get_json_float() {
}

#[tokio::test]
async fn test_json_get_json_is_json_metadata() {
async fn test_json_get_json_json_metadata() {
let sql = r#"select json_get_json('{"x": [1, 2]}', 'x') as v"#;
let batches = run_query(sql).await.unwrap();
let schema = batches[0].schema();
let field = schema.field(0);
assert_eq!(field.metadata().get("is_json").map(String::as_str), Some("true"));
assert_json_field_metadata(field.metadata());
}

#[tokio::test]
Expand Down Expand Up @@ -631,10 +637,7 @@ fn test_json_get_utf8() {
Arc::new(Field::new("arg_3", DataType::LargeUtf8, false)),
],
number_rows: 1,
return_field: Arc::new(
Field::new("ret_field", DataType::Utf8, false)
.with_metadata(HashMap::from_iter(vec![("is_json".to_string(), "true".to_string())])),
),
return_field: Arc::new(Field::new("ret_field", DataType::Utf8, false).with_metadata(json_field_metadata())),
config_options: Arc::new(ConfigOptions::default()),
})
.unwrap()
Expand Down Expand Up @@ -665,10 +668,7 @@ fn test_json_get_large_utf8() {
Arc::new(Field::new("arg_3", DataType::LargeUtf8, false)),
],
number_rows: 1,
return_field: Arc::new(
Field::new("ret_field", DataType::Utf8, false)
.with_metadata(HashMap::from_iter(vec![("is_json".to_string(), "true".to_string())])),
),
return_field: Arc::new(Field::new("ret_field", DataType::Utf8, false).with_metadata(json_field_metadata())),
config_options: Arc::new(ConfigOptions::default()),
})
.unwrap()
Expand Down
Loading