Skip to content

Commit d937479

Browse files
authored
perf: remove BufReader wrapper when copying spill files to shuffle output (#3861)
Remove the BufReader wrapper around spill file reads in MultiPartitionShuffleRepartitioner::shuffle_write(). std::io::copy already buffers internally, and wrapping in BufReader defeats the copy_file_range/sendfile zero-copy specialization on Linux.
1 parent 7878f0d commit d937479

1 file changed

Lines changed: 4 additions & 2 deletions

File tree

native/shuffle/src/partitioners/multi_partition.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use itertools::Itertools;
3535
use std::fmt;
3636
use std::fmt::{Debug, Formatter};
3737
use std::fs::{File, OpenOptions};
38-
use std::io::{BufReader, BufWriter, Seek, Write};
38+
use std::io::{BufWriter, Seek, Write};
3939
use std::sync::Arc;
4040
use tokio::time::Instant;
4141

@@ -582,7 +582,9 @@ impl ShufflePartitioner for MultiPartitionShuffleRepartitioner {
582582
// if we wrote a spill file for this partition then copy the
583583
// contents into the shuffle file
584584
if let Some(spill_path) = self.partition_writers[i].path() {
585-
let mut spill_file = BufReader::new(File::open(spill_path)?);
585+
// Use raw File handle (not BufReader) so that std::io::copy
586+
// can use copy_file_range/sendfile for zero-copy on Linux.
587+
let mut spill_file = File::open(spill_path)?;
586588
let mut write_timer = self.metrics.write_time.timer();
587589
std::io::copy(&mut spill_file, &mut output_data)?;
588590
write_timer.stop();

0 commit comments

Comments
 (0)