@@ -78,11 +78,11 @@ object CometConf extends ShimCometConf {
7878
7979 val COMET_PREFIX = " spark.comet" ;
8080
81- val COMET_EXEC_CONFIG_PREFIX : String = s " $COMET_PREFIX.exec " ;
81+ val COMET_EXEC_CONFIG_PREFIX : String = s " $COMET_PREFIX.exec "
8282
83- val COMET_EXPR_CONFIG_PREFIX : String = s " $COMET_PREFIX.expression " ;
83+ val COMET_EXPR_CONFIG_PREFIX : String = s " $COMET_PREFIX.expression "
8484
85- val COMET_OPERATOR_CONFIG_PREFIX : String = s " $COMET_PREFIX.operator " ;
85+ val COMET_OPERATOR_CONFIG_PREFIX : String = s " $COMET_PREFIX.operator "
8686
8787 val COMET_ENABLED : ConfigEntry [Boolean ] = conf(" spark.comet.enabled" )
8888 .category(CATEGORY_EXEC )
@@ -112,7 +112,7 @@ object CometConf extends ShimCometConf {
112112 " feature is highly experimental and only partially implemented. It should not " +
113113 " be used in production." )
114114 .booleanConf
115- .createWithDefault( false )
115+ .createWithEnvVarOrDefault( " ENABLE_COMET_WRITE " , false )
116116
117117 // Deprecated: native_comet uses mutable buffers incompatible with Arrow FFI best practices
118118 // and does not support complex types. Use native_iceberg_compat or auto instead.
@@ -488,13 +488,23 @@ object CometConf extends ShimCometConf {
488488 " Ensure that Comet shuffle memory overhead factor is a double greater than 0" )
489489 .createWithDefault(1.0 )
490490
491+ val COMET_BATCH_SIZE : ConfigEntry [Int ] = conf(" spark.comet.batchSize" )
492+ .category(CATEGORY_TUNING )
493+ .doc(" The columnar batch size, i.e., the maximum number of rows that a batch can contain." )
494+ .intConf
495+ .checkValue(v => v > 0 , " Batch size must be positive" )
496+ .createWithDefault(8192 )
497+
491498 val COMET_COLUMNAR_SHUFFLE_BATCH_SIZE : ConfigEntry [Int ] =
492499 conf(" spark.comet.columnar.shuffle.batch.size" )
493500 .category(CATEGORY_SHUFFLE )
494501 .doc(" Batch size when writing out sorted spill files on the native side. Note that " +
495502 " this should not be larger than batch size (i.e., `spark.comet.batchSize`). Otherwise " +
496503 " it will produce larger batches than expected in the native operator after shuffle." )
497504 .intConf
505+ .checkValue(
506+ v => v <= COMET_BATCH_SIZE .get(),
507+ " Should not be larger than batch size `spark.comet.batchSize`" )
498508 .createWithDefault(8192 )
499509
500510 val COMET_SHUFFLE_WRITE_BUFFER_SIZE : ConfigEntry [Long ] =
@@ -550,6 +560,7 @@ object CometConf extends ShimCometConf {
550560 .booleanConf
551561 .createWithDefault(false )
552562
563+ // Used on native side. Check spark_config.rs how the config is used
553564 val COMET_DEBUG_MEMORY_ENABLED : ConfigEntry [Boolean ] =
554565 conf(s " $COMET_PREFIX.debug.memory " )
555566 .category(CATEGORY_TESTING )
@@ -608,12 +619,6 @@ object CometConf extends ShimCometConf {
608619 .booleanConf
609620 .createWithDefault(false )
610621
611- val COMET_BATCH_SIZE : ConfigEntry [Int ] = conf(" spark.comet.batchSize" )
612- .category(CATEGORY_TUNING )
613- .doc(" The columnar batch size, i.e., the maximum number of rows that a batch can contain." )
614- .intConf
615- .createWithDefault(8192 )
616-
617622 val COMET_PARQUET_ENABLE_DIRECT_BUFFER : ConfigEntry [Boolean ] =
618623 conf(" spark.comet.parquet.enable.directBuffer" )
619624 .category(CATEGORY_PARQUET )
@@ -793,14 +798,6 @@ object CometConf extends ShimCometConf {
793798 .booleanConf
794799 .createWithDefault(false )
795800
796- val COMET_REGEXP_ALLOW_INCOMPATIBLE : ConfigEntry [Boolean ] =
797- conf(" spark.comet.regexp.allowIncompatible" )
798- .category(CATEGORY_EXEC )
799- .doc(" Comet is not currently fully compatible with Spark for all regular expressions. " +
800- s " Set this config to true to allow them anyway. $COMPAT_GUIDE. " )
801- .booleanConf
802- .createWithDefault(false )
803-
804801 val COMET_METRICS_UPDATE_INTERVAL : ConfigEntry [Long ] =
805802 conf(" spark.comet.metrics.updateInterval" )
806803 .category(CATEGORY_EXEC )
@@ -819,6 +816,7 @@ object CometConf extends ShimCometConf {
819816 .stringConf
820817 .createOptional
821818
819+ // Used on native side. Check spark_config.rs how the config is used
822820 val COMET_MAX_TEMP_DIRECTORY_SIZE : ConfigEntry [Long ] =
823821 conf(" spark.comet.maxTempDirectorySize" )
824822 .category(CATEGORY_EXEC )
@@ -843,6 +841,9 @@ object CometConf extends ShimCometConf {
843841 .booleanConf
844842 .createWithEnvVarOrDefault(" ENABLE_COMET_STRICT_TESTING" , false )
845843
844+ val COMET_OPERATOR_DATA_WRITING_COMMAND_ALLOW_INCOMPAT : ConfigEntry [Boolean ] =
845+ createOperatorIncompatConfig(" DataWritingCommandExec" )
846+
846847 /** Create a config to enable a specific operator */
847848 private def createExecEnabledConfig (
848849 exec : String ,
@@ -858,6 +859,25 @@ object CometConf extends ShimCometConf {
858859 .createWithDefault(defaultValue)
859860 }
860861
862+ /**
863+ * Converts a config key to a valid environment variable name. Example:
864+ * "spark.comet.operator.DataWritingCommandExec.allowIncompatible" ->
865+ * "SPARK_COMET_OPERATOR_DATAWRITINGCOMMANDEXEC_ALLOWINCOMPATIBLE"
866+ */
867+ private def configKeyToEnvVar (configKey : String ): String =
868+ configKey.toUpperCase(Locale .ROOT ).replace('.' , '_' )
869+
870+ private def createOperatorIncompatConfig (name : String ): ConfigEntry [Boolean ] = {
871+ val configKey = getOperatorAllowIncompatConfigKey(name)
872+ val envVar = configKeyToEnvVar(configKey)
873+ conf(configKey)
874+ .category(CATEGORY_EXEC )
875+ .doc(s " Whether to allow incompatibility for operator: $name. " +
876+ s " False by default. Can be overridden with $envVar env variable " )
877+ .booleanConf
878+ .createWithEnvVarOrDefault(envVar, false )
879+ }
880+
861881 def isExprEnabled (name : String , conf : SQLConf = SQLConf .get): Boolean = {
862882 getBooleanConf(getExprEnabledConfigKey(name), defaultValue = true , conf)
863883 }
0 commit comments