@@ -32,6 +32,7 @@ use datafusion::physical_expr::PhysicalExpr;
3232use datafusion:: prelude:: SessionContext ;
3333use datafusion:: scalar:: ScalarValue ;
3434use datafusion_comet_spark_expr:: EvalMode ;
35+ use datafusion_datasource:: TableSchema ;
3536use itertools:: Itertools ;
3637use std:: collections:: HashMap ;
3738use std:: sync:: Arc ;
@@ -78,7 +79,24 @@ pub(crate) fn init_datasource_exec(
7879 encryption_enabled,
7980 ) ;
8081
81- let mut parquet_source = ParquetSource :: new ( table_parquet_options) ;
82+ // Determine the schema to use for ParquetSource
83+ let table_schema = if let Some ( ref data_schema) = data_schema {
84+ if let Some ( ref partition_schema) = partition_schema {
85+ let partition_fields: Vec < _ > = partition_schema
86+ . fields ( )
87+ . iter ( )
88+ . map ( |f| Arc :: new ( Field :: new ( f. name ( ) , f. data_type ( ) . clone ( ) , f. is_nullable ( ) ) ) as _ )
89+ . collect ( ) ;
90+ TableSchema :: new ( Arc :: clone ( data_schema) , partition_fields)
91+ } else {
92+ TableSchema :: from_file_schema ( Arc :: clone ( data_schema) )
93+ }
94+ } else {
95+ TableSchema :: from_file_schema ( Arc :: clone ( & required_schema) )
96+ } ;
97+
98+ let mut parquet_source = ParquetSource :: new ( table_schema)
99+ . with_table_parquet_options ( table_parquet_options) ;
82100
83101 // Create a conjunctive form of the vector because ParquetExecBuilder takes
84102 // a single expression
@@ -104,37 +122,21 @@ pub(crate) fn init_datasource_exec(
104122 ) ;
105123 }
106124
107- let file_source = parquet_source. with_schema_adapter_factory ( Arc :: new (
108- SparkSchemaAdapterFactory :: new ( spark_parquet_options, default_values) ,
109- ) ) ?;
125+ let file_source = Arc :: new ( parquet_source) as Arc < dyn FileSource > ;
110126
111127 let file_groups = file_groups
112128 . iter ( )
113129 . map ( |files| FileGroup :: new ( files. clone ( ) ) )
114130 . collect ( ) ;
115131
116- let file_scan_config = match ( data_schema, projection_vector, partition_fields) {
117- ( Some ( data_schema) , Some ( projection_vector) , Some ( partition_fields) ) => {
118- get_file_config_builder (
119- data_schema,
120- partition_schema,
121- file_groups,
122- object_store_url,
123- file_source,
124- )
125- . with_projection_indices ( Some ( projection_vector) )
126- . with_table_partition_cols ( partition_fields)
127- . build ( )
128- }
129- _ => get_file_config_builder (
130- required_schema,
131- partition_schema,
132- file_groups,
133- object_store_url,
134- file_source,
135- )
136- . build ( ) ,
137- } ;
132+ let mut file_scan_config_builder = FileScanConfigBuilder :: new ( object_store_url, file_source)
133+ . with_file_groups ( file_groups) ;
134+
135+ if let Some ( projection_vector) = projection_vector {
136+ file_scan_config_builder = file_scan_config_builder. with_projection_indices ( Some ( projection_vector) ) ?;
137+ }
138+
139+ let file_scan_config = file_scan_config_builder. build ( ) ;
138140
139141 Ok ( Arc :: new ( DataSourceExec :: new ( Arc :: new ( file_scan_config) ) ) )
140142}
@@ -165,28 +167,3 @@ fn get_options(
165167
166168 ( table_parquet_options, spark_parquet_options)
167169}
168-
169- fn get_file_config_builder (
170- schema : SchemaRef ,
171- partition_schema : Option < SchemaRef > ,
172- file_groups : Vec < FileGroup > ,
173- object_store_url : ObjectStoreUrl ,
174- file_source : Arc < dyn FileSource > ,
175- ) -> FileScanConfigBuilder {
176- match partition_schema {
177- Some ( partition_schema) => {
178- let partition_fields: Vec < Field > = partition_schema
179- . fields ( )
180- . iter ( )
181- . map ( |field| {
182- Field :: new ( field. name ( ) , field. data_type ( ) . clone ( ) , field. is_nullable ( ) )
183- } )
184- . collect_vec ( ) ;
185- FileScanConfigBuilder :: new ( object_store_url, Arc :: clone ( & schema) , file_source)
186- . with_file_groups ( file_groups)
187- . with_table_partition_cols ( partition_fields)
188- }
189- _ => FileScanConfigBuilder :: new ( object_store_url, Arc :: clone ( & schema) , file_source)
190- . with_file_groups ( file_groups) ,
191- }
192- }
0 commit comments