@@ -529,20 +529,22 @@ impl MultiPartitionShuffleRepartitioner {
529529 let mut iter = partitioned_batches. produce ( partition_id) ;
530530
531531 let offset = spill_file. stream_position ( ) ?;
532- let bytes_written = partition_writer. write_to (
532+ partition_writer. write_to (
533533 & mut iter,
534534 & mut spill_file,
535535 & self . metrics ,
536536 self . write_buffer_size ,
537537 self . batch_size ,
538538 ) ?;
539+ let end_offset = spill_file. stream_position ( ) ?;
540+ let actual_bytes = ( end_offset - offset) as usize ;
539541
540- if bytes_written > 0 {
542+ if actual_bytes > 0 {
541543 partition_ranges. push ( Some ( PartitionSpillRange {
542544 offset,
543- length : bytes_written as u64 ,
545+ length : actual_bytes as u64 ,
544546 } ) ) ;
545- spilled_bytes += bytes_written ;
547+ spilled_bytes += actual_bytes ;
546548 } else {
547549 partition_ranges. push ( None ) ;
548550 }
@@ -612,16 +614,24 @@ impl ShufflePartitioner for MultiPartitionShuffleRepartitioner {
612614
613615 let mut output_data = BufWriter :: new ( output_data) ;
614616
617+ // Pre-open all spill files once to avoid repeated File::open() calls.
618+ // With N partitions and S spill files, this reduces open() calls from
619+ // N*S to S.
620+ let mut spill_handles: Vec < _ > = self
621+ . spill_infos
622+ . iter ( )
623+ . map ( |info| info. open_for_read ( ) )
624+ . collect :: < datafusion:: common:: Result < Vec < _ > > > ( ) ?;
625+
615626 #[ allow( clippy:: needless_range_loop) ]
616627 for i in 0 ..num_output_partitions {
617628 offsets[ i] = output_data. stream_position ( ) ?;
618629
619- // Copy spilled data for this partition from each spill file.
620- // Each SpillInfo is a single file containing data from all partitions
621- // ordered by partition ID, with byte ranges tracked per partition.
622- for spill_info in & self . spill_infos {
630+ // Copy spilled data for this partition from each spill file
631+ // using pre-opened file handles.
632+ for ( spill_info, handle) in self . spill_infos . iter ( ) . zip ( spill_handles. iter_mut ( ) ) {
623633 let mut write_timer = self . metrics . write_time . timer ( ) ;
624- spill_info. copy_partition_to ( i , & mut output_data) ?;
634+ spill_info. copy_partition_with_handle ( i , handle , & mut output_data) ?;
625635 write_timer. stop ( ) ;
626636 }
627637
0 commit comments