Skip to content

Commit ad838ad

Browse files
authored
feat: add native support for get_json_object expression (#3747)
1 parent a341a57 commit ad838ad

11 files changed

Lines changed: 901 additions & 3 deletions

File tree

docs/source/user-guide/latest/expressions.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ Expressions that are not Spark-compatible will fall back to Spark by default and
9090
| Substring | Yes | |
9191
| Upper | No | Results can vary depending on locale and character set. Requires `spark.comet.caseConversion.enabled=true` |
9292

93+
## JSON Functions
94+
95+
| Expression | Spark-Compatible? | Compatibility Notes |
96+
| ------------- | ----------------- | --------------------------------------------------------------------------------------------- |
97+
| GetJsonObject | No | Spark allows single-quoted JSON and unescaped control characters which Comet does not support |
98+
9399
## Date/Time Functions
94100

95101
| Expression | SQL | Spark-Compatible? | Compatibility Notes |

docs/spark_expressions_support.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@
245245
### json_funcs
246246

247247
- [ ] from_json
248-
- [ ] get_json_object
248+
- [x] get_json_object
249249
- [ ] json_array_length
250250
- [ ] json_object_keys
251251
- [ ] json_tuple

native/Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

native/spark-expr/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ datafusion = { workspace = true }
3333
chrono-tz = { workspace = true }
3434
num = { workspace = true }
3535
regex = { workspace = true }
36-
serde_json = "1.0"
36+
# preserve_order: needed for get_json_object to match Spark's JSON key ordering
37+
serde_json = { version = "1.0", features = ["preserve_order"] }
3738
datafusion-comet-common = { workspace = true }
3839
futures = { workspace = true }
3940
twox-hash = "2.1.2"

native/spark-expr/src/comet_scalar_funcs.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ pub fn create_comet_physical_fun_with_eval_mode(
186186
let func = Arc::new(crate::string_funcs::spark_split);
187187
make_comet_scalar_udf!("split", func, without data_type)
188188
}
189+
"get_json_object" => {
190+
let func = Arc::new(crate::string_funcs::spark_get_json_object);
191+
make_comet_scalar_udf!("get_json_object", func, without data_type)
192+
}
189193
_ => registry.udf(fun_name).map_err(|e| {
190194
DataFusionError::Execution(format!(
191195
"Function {fun_name} not found in the registry: {e}",

0 commit comments

Comments
 (0)