Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,12 +532,21 @@ bool IntegralRange::Contains(int64_t value) const
// CAST(ulong/long <- int) - [INT_MIN..INT_MAX]
if (!cast->gtOverflow())
{
if ((fromType == TYP_INT) && fromUnsigned)
IntegralRange typeRange = (fromType == TYP_INT) && fromUnsigned
? IntegralRange{SymbolicIntegerValue::Zero, SymbolicIntegerValue::UIntMax}
: IntegralRange{SymbolicIntegerValue::IntMin, SymbolicIntegerValue::IntMax};

// Refine using the operand's known range: when the operand fits entirely
// inside what the cast can represent, the cast preserves the value, so we
// can tighten the output range to the operand's range. This handles cases
// like CAST(int <- ulong) of a value already known to be in uint16 range.
IntegralRange operandRange = ForNode(cast->CastOp(), compiler);
if (typeRange.Contains(operandRange))
{
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::UIntMax};
return operandRange;
}

return {SymbolicIntegerValue::IntMin, SymbolicIntegerValue::IntMax};
return typeRange;
}

SymbolicIntegerValue lowerBound;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7884,6 +7884,7 @@ class Compiler
#define OMF_HAS_STACK_ARRAY 0x00100000 // Method contains stack allocated arrays
#define OMF_HAS_BOUNDS_CHECKS 0x00200000 // Method contains bounds checks
#define OMF_HAS_EARLY_QMARKS 0x00400000 // Method contains early expandable QMARKs
#define OMF_HAS_UMOD_BY_CONST_UINT16_CANDIDATE 0x00800000 // Method contains a MOD/UMOD that may turn into a uint16 FastMod candidate during the range check phase

// clang-format on

Expand Down Expand Up @@ -7924,6 +7925,16 @@ class Compiler
optMethodFlags |= OMF_HAS_BOUNDS_CHECKS;
}

bool doesMethodHaveUModByConstUInt16Candidate()
{
return (optMethodFlags & OMF_HAS_UMOD_BY_CONST_UINT16_CANDIDATE) != 0;
}

void setMethodHasUModByConstUInt16Candidate()
{
optMethodFlags |= OMF_HAS_UMOD_BY_CONST_UINT16_CANDIDATE;
}

bool doesMethodHaveExpandableCasts()
{
return (optMethodFlags & OMF_HAS_EXPANDABLE_CAST) != 0;
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2672,8 +2672,8 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK)
}
if (op1->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV))
{
if ((op1->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW)) !=
(op2->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW)))
if ((op1->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW | GTF_UMOD_UINT16_OPERANDS)) !=
(op2->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW | GTF_UMOD_UINT16_OPERANDS)))
{
return false;
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,8 @@ enum GenTreeFlags : unsigned

GTF_DIV_MOD_NO_OVERFLOW = 0x40000000, // GT_DIV, GT_MOD -- Div or mod definitely does not overflow.

GTF_UMOD_UINT16_OPERANDS = 0x80000000, // GT_UMOD -- Both operands to a mod are in uint16 range. The divisor is a non-zero constant.

GTF_CHK_INDEX_INBND = 0x80000000, // GT_BOUNDS_CHECK -- have proven this check is always in-bounds

GTF_ARRLEN_NONFAULTING = 0x20000000, // GT_ARR_LENGTH -- An array length operation that cannot fault. Same as GT_IND_NONFAULTING.
Expand Down
75 changes: 73 additions & 2 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8088,7 +8088,10 @@ bool Lowering::TryLowerConstIntUDivOrUMod(GenTreeOp* divMod)
assert(varTypeIsFloating(divMod->TypeGet()));
#endif // USE_HELPERS_FOR_INT_DIV
#if defined(TARGET_ARM64)
assert(!divMod->OperIs(GT_UMOD));
// ARM64 has no remainder instruction. Morph rewrites every non-pow2 MOD/UMOD
// into a SUB-MUL-DIV form except when the FastMod path here can apply, which
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been thinking about this particular problem a bit recently.

That is, the general problem where we have some operation X which has to be morphed into a different pattern for VN/CSE or other purposes, particularly when part of the expression is complex and/or expensive.

I wonder if it would be beneficial in such cases to either have a flag on the "root" of the expression - i.e. a flag on the SUB in SUB(x, MUL(DIV(x, y), y)) indicating say GTF_MOD_ROOT or even a custom node that exists only in HIR (you could identify it as a MOD with only 1 operand, for example).

This would allow us to still do the morph but also allow any other code to trivially see "hey, this is actually a MOD expression and so you can extract the x/y and do what you need optimization wise with it"

The same would also apply to some other cases where we have data we want to CSE, like say -float where it has to emit as XOR(x, -0.0) on xarch or Abs(float) where it is ANDN(x, -0.0)

// requires a constant uint16 divisor (and a uint16-range dividend).
assert(!divMod->OperIs(GT_UMOD) || divMod->gtGetOp2()->IsCnsIntOrI());
#endif // TARGET_ARM64

GenTree* dividend = divMod->gtGetOp1();
Expand Down Expand Up @@ -8170,10 +8173,78 @@ bool Lowering::TryLowerConstIntUDivOrUMod(GenTreeOp* divMod)
}
}

assert(divisorValue >= 3);

// TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
if (!m_compiler->opts.MinOpts() && (divisorValue >= 3))
if (!m_compiler->opts.MinOpts())
{
#ifdef TARGET_64BIT
// Replace (uint16 % uint16) with a cheaper variant of FastMod, specialized for 16-bit operands.
//
// uint multiplier = uint.MaxValue / divisor + 1;
// ulong result = ((ulong)(dividend * multiplier) * divisor) >> 32;
// return (int)result;
//
// The dividend is first truncated to TYP_INT (safe because we know it fits in
// uint16), and the final value (always in [0, divisor)) is widened back to the
// mod's result type. The dividend's range may have been recovered either
// statically by IntegralRange::ForNode at this point, or earlier by the range
// check phase (which leaves GTF_UMOD_UINT16_OPERANDS for us).
if (!isDiv && FitsIn<uint16_t>(divisorValue) &&
(((divMod->gtFlags & GTF_UMOD_UINT16_OPERANDS) != 0) ||
IntegralRange::ForType(TYP_USHORT).Contains(IntegralRange::ForNode(dividend, m_compiler))))
{
GenTree* dividendInt = dividend;
if (genActualType(dividend) != TYP_INT)
{
assert(genActualType(dividend) == TYP_LONG);
dividendInt = m_compiler->gtNewCastNode(TYP_INT, dividend, /* unsigned */ true, TYP_INT);
BlockRange().InsertBefore(divMod, dividendInt);
}

GenTree* multiplier =
m_compiler->gtNewIconNode(static_cast<ssize_t>((UINT32_MAX / divisorValue) + 1), TYP_INT);
GenTree* mul1 = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, dividendInt, multiplier);
mul1->SetUnsigned();
GenTree* castUp = m_compiler->gtNewCastNode(TYP_LONG, mul1, /* unsigned */ true, TYP_LONG);
BlockRange().InsertBefore(divMod, multiplier, mul1, castUp);

// Reuse the existing constant divisor as a TYP_LONG operand for the second multiply.
divisor->BashToConst(static_cast<int64_t>(divisorValue), TYP_LONG);

GenTree* mul2 = m_compiler->gtNewOperNode(GT_MUL, TYP_LONG, castUp, divisor);
mul2->SetUnsigned();
GenTree* shiftAmount = m_compiler->gtNewIconNode(32);
BlockRange().InsertBefore(divMod, mul2, shiftAmount);

GenTree* firstNode = (dividendInt == dividend) ? multiplier : dividendInt;

if (type == TYP_LONG)
{
// The shift result is already TYP_LONG; turn divMod itself into the shift.
divMod->ChangeOper(GT_RSZ);
divMod->gtOp1 = mul2;
divMod->gtOp2 = shiftAmount;
}
else
{
assert(type == TYP_INT);
GenTree* shift = m_compiler->gtNewOperNode(GT_RSZ, TYP_LONG, mul2, shiftAmount);
BlockRange().InsertBefore(divMod, shift);

divMod->ChangeOper(GT_CAST);
divMod->AsCast()->gtCastType = TYP_INT;
divMod->gtOp1 = shift;
divMod->gtOp2 = nullptr;
divMod->SetUnsigned();
}
Comment on lines +8223 to +8241

ContainCheckRange(firstNode, divMod);
return true;
}
#endif // TARGET_64BIT

size_t magic;
bool increment;
int preShift;
Expand Down
42 changes: 42 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7424,11 +7424,53 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, bool* optAssertionPropDone)
else if (tree->OperIs(GT_MOD) && op2->IsIntegralConst() && !op2->IsIntegralConstAbsPow2())
#endif
{
#if defined(TARGET_64BIT)
// Skip the SUB-MUL-DIV rewrite below when lowering may apply
// the cheaper uint16 FastMod sequence. That requires a non-zero
// constant uint16 divisor AND a dividend provable as uint16.
// The dividend's range may be provable statically here, or only
// later via assertion/VN information from the range check phase;
// defer to that phase by leaving the node as MOD and setting
// OMF_HAS_UMOD_BY_CONST_UINT16_CANDIDATE when we can't decide yet.
if (!opts.MinOpts() && op2->IsCnsIntOrI())
{
ssize_t modDivisor = op2->AsIntCon()->IconValue();
if ((modDivisor > 0) && FitsIn<uint16_t>(modDivisor))
{
if (IntegralRange::ForType(TYP_USHORT).Contains(IntegralRange::ForNode(op1, this)))
{
if (tree->OperIs(GT_MOD))
{
tree->SetOper(GT_UMOD);
Comment thread
MihaZupan marked this conversation as resolved.
Outdated
}
break;
}

// Defer to the range check phase: leave the MOD/UMOD
// node intact so that VN- and assertion-based range
// analysis can decide whether the dividend fits in
// uint16.
setMethodHasUModByConstUInt16Candidate();
break;
}
}
#endif // TARGET_64BIT

// Transformation: a % b = a - (a / b) * b;
tree = fgMorphModToSubMulDiv(tree->AsOp());
op1 = tree->AsOp()->gtOp1;
op2 = tree->AsOp()->gtOp2;
}
#if defined(TARGET_64BIT) && !defined(TARGET_ARM64)
else if (!opts.MinOpts() && tree->OperIs(GT_MOD, GT_UMOD) && !op2->IsIntegralConst() &&
((typ == TYP_INT) || (typ == TYP_LONG)))
{
// The divisor may fold to a uint16 constant later (e.g. a
// span length that is computed from an RVA static). Let the
// range check phase re-examine such nodes after VN/CSE.
setMethodHasUModByConstUInt16Candidate();
}
#endif // TARGET_64BIT && !TARGET_ARM64
break;

USE_HELPER_FOR_ARITH:
Expand Down
101 changes: 99 additions & 2 deletions src/coreclr/jit/rangecheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,21 @@
//
PhaseStatus Compiler::rangeCheckPhase()
{
if (!doesMethodHaveBoundsChecks() || (fgSsaPassesCompleted == 0))
if ((!doesMethodHaveBoundsChecks() && !doesMethodHaveUModByConstUInt16Candidate()) || (fgSsaPassesCompleted == 0))
{
return PhaseStatus::MODIFIED_NOTHING;
}

const bool madeChanges = GetRangeCheck()->OptimizeRangeChecks();
RangeCheck* rc = GetRangeCheck();
bool madeChanges = false;
if (doesMethodHaveBoundsChecks())
{
madeChanges = rc->OptimizeRangeChecks();
}
if (doesMethodHaveUModByConstUInt16Candidate())
{
madeChanges |= rc->TryMarkUModUInt16Operands();
}
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

Expand Down Expand Up @@ -2401,3 +2410,91 @@ bool RangeCheck::OptimizeRangeChecks()

return madeChanges;
}

//------------------------------------------------------------------------
// TryMarkUModUInt16Operands: detect GT_MOD/GT_UMOD nodes where the divisor is
// a positive uint16 constant and the dividend is provably in uint16 range.
// Such nodes are converted to GT_UMOD (if signed) and tagged with
// GTF_UMOD_UINT16_OPERANDS so that lowering can emit a cheaper FastMod
// sequence specialized for 16-bit operands.
//
// Return Value:
// True if any node was modified.
//
bool RangeCheck::TryMarkUModUInt16Operands()
{
#ifdef TARGET_64BIT
bool madeChanges = false;

for (BasicBlock* const block : m_compiler->Blocks())
{
for (Statement* const stmt : block->Statements())
{
for (GenTree* const tree : stmt->TreeList())
{
if (!tree->OperIs(GT_MOD, GT_UMOD))
{
continue;
}

if ((tree->gtFlags & GTF_UMOD_UINT16_OPERANDS) != 0)
{
continue;
}

if (!tree->TypeIs(TYP_INT, TYP_LONG))
{
continue;
}

GenTree* divisor = tree->gtGetOp2();
if (!divisor->IsCnsIntOrI())
{
continue;
}

ssize_t divisorValue = divisor->AsIntCon()->IconValue();
if ((divisorValue <= 0) || !FitsIn<uint16_t>(divisorValue))
{
continue;
}

GenTree* dividend = tree->gtGetOp1();

bool dividendFitsUint16 =
IntegralRange::ForType(TYP_USHORT).Contains(IntegralRange::ForNode(dividend, m_compiler));

if (!dividendFitsUint16 && (m_compiler->vnStore != nullptr))
{
// Fall back to a richer VN-/assertion-based range analysis.
Range range = Range(Limit(Limit::keUndef));
if (TryGetRange(block, dividend, &range) && range.IsConstantRange())
{
const int lower = range.LowerLimit().GetConstant();
const int upper = range.UpperLimit().GetConstant();
dividendFitsUint16 = (lower >= 0) && (upper <= UINT16_MAX);
}
}

if (!dividendFitsUint16)
{
continue;
}

JITDUMP("Marking [%06u] as a uint16 UMOD candidate\n", Compiler::dspTreeID(tree));

if (tree->OperIs(GT_MOD))
{
tree->SetOper(GT_UMOD);
Comment thread
MihaZupan marked this conversation as resolved.
}
tree->gtFlags |= GTF_UMOD_UINT16_OPERANDS;
madeChanges = true;
}
}
}

return madeChanges;
#else
return false;
#endif // TARGET_64BIT
}
6 changes: 6 additions & 0 deletions src/coreclr/jit/rangecheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,12 @@ class RangeCheck
// and assertion prop phases are completed.
bool OptimizeRangeChecks();

// Walks the IR looking for "uint16 % const-uint16" patterns and tags eligible
// GT_UMOD nodes with GTF_UMOD_UINT16_OPERANDS so lowering can emit a cheaper
// FastMod sequence. GT_MOD nodes whose dividend is non-negative and whose
// divisor is a positive uint16 constant are converted to GT_UMOD.
bool TryMarkUModUInt16Operands();

bool TryGetRange(BasicBlock* block, GenTree* expr, Range* pRange);

// Cheaper version of TryGetRange that is based only on incoming assertions.
Expand Down
Loading
Loading