Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ impl<T: GraceMemoryJoin> GraceHashJoin<T> {
)?;

for hash in hashes.iter_mut() {
*hash = ((*hash << self.shift_bits) >> 60) & 0b1111;
*hash = Self::get_partition_id(*hash, self.shift_bits);
}

Ok(self.build_partition_stream.partition(hashes, data, true))
Expand All @@ -324,7 +324,7 @@ impl<T: GraceMemoryJoin> GraceHashJoin<T> {
)?;

for hash in hashes.iter_mut() {
*hash = ((*hash << self.shift_bits) >> 60) & 0b1111;
*hash = Self::get_partition_id(*hash, self.shift_bits);
}

Ok(self.probe_partition_stream.partition(hashes, data, true))
Expand Down Expand Up @@ -385,6 +385,20 @@ impl<T: GraceMemoryJoin> GraceHashJoin<T> {

Ok(())
}

#[inline(always)]
#[cfg(target_feature = "sse4.2")]
fn get_partition_id(hash: u64, shift_bits: usize) -> u64 {
// On SSE4.2, _mm_crc32_u64 only sets the low 32 bits; high 32 bits are always 0.
// Extract partition bits from the low 32 bits to avoid all rows landing in partition 0.
(hash << shift_bits >> 28) & 0b1111
}

#[inline(always)]
#[cfg(not(target_feature = "sse4.2"))]
fn get_partition_id(hash: u64, shift_bits: usize) -> u64 {
(hash << shift_bits >> 60) & 0b1111
}
}

pub enum RestoreStage {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,15 @@ impl HybridHashJoinState {
level: usize,
factory: Arc<HashJoinFactory>,
) -> Result<Arc<HybridHashJoinState>> {
// On SSE4.2, fast_hash (_mm_crc32_u64) only sets the low 32 bits.
#[cfg(target_feature = "sse4.2")]
const HASH_JOIN_SPILL_MAX_LEVEL: usize = 7;
#[cfg(not(target_feature = "sse4.2"))]
const HASH_JOIN_SPILL_MAX_LEVEL: usize = 15;

let settings = ctx.get_settings();
let max_level = settings.get_max_hash_join_spill_level()? as usize;
let max_spill_level = settings.get_max_hash_join_spill_level()? as usize;
let max_level = (max_spill_level).min(HASH_JOIN_SPILL_MAX_LEVEL);

Ok(Arc::new(HybridHashJoinState {
ctx,
Expand Down
6 changes: 6 additions & 0 deletions src/query/sql/src/planner/optimizer/ir/stats/selectivity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ impl SelectivityVisitor<'_> {
let column_stat = self
.ensure_column_stat(column_index)
.expect("checked above");

return Self::compute_comparison_with_stat(
column_stat,
constant,
Expand Down Expand Up @@ -367,6 +368,11 @@ impl SelectivityVisitor<'_> {
let selectivity =
Self::compute_histogram_comparison(histogram, op, &const_datum)?;
if let Selectivity::N(n) = selectivity {
// Too low selectivity in an unreliable histogram.
if !histogram.accuracy && n < 0.05 {
return Ok(Selectivity::LowerBound);
Comment thread
zhang2014 marked this conversation as resolved.
Outdated
}

let (new_min, new_max) = match op {
ComparisonOp::GT | ComparisonOp::GTE => {
(const_datum.clone(), column_stat.max.clone())
Expand Down
Loading