Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,31 @@ bool IntegralRange::Contains(int64_t value) const
rangeType = compiler->lvaGetDesc(node->AsLclVar())->TypeGet();
}

// If the local's conservative VN identifies it as the result of a CAST
// to a small type, the value stored is bounded by that type's range.
//
// This refinement is only sound when the local's storage is fully
// normalized to the cast's destination type. For "normalize on load"
// small-type locals the store writes only the low bits, leaving the
// upper bits stale; tightening the range here would let downstream
// optimizations (e.g. fgOptimizeCast) drop a required sign/zero
// extending load and read those stale bits.
if ((compiler->vnStore != nullptr) && (!varTypeIsSmall(varDsc->TypeGet()) || varDsc->lvNormalizeOnStore()))
{
ValueNum vn = compiler->vnStore->VNConservativeNormalValue(node->gtVNPair);
Copy link
Copy Markdown
Member

@jakobbotsch jakobbotsch May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think IntegralRange::ForNode should depend on VNs. VNs are a flow sensitive concept that is only valid during specific phases. I think this is too general a utility to start using VNs.

I would be ok with introducing specific helpers that can refine integral ranges based on VNs though, and then using it explicitly from phases where VNs are known to be valid. But in the end you would probably want to use range check instead.

VNFuncApp funcApp;
if (compiler->vnStore->GetVNFunc(vn, &funcApp) && (funcApp.m_func == VNF_Cast))
{
var_types castToType;
bool srcIsUnsigned;
compiler->vnStore->GetCastOperFromVN(funcApp.m_args[1], &castToType, &srcIsUnsigned);
if (varTypeIsSmall(castToType))
{
return ForType(castToType);
}
}
}

if (varDsc->IsNeverNegative())
{
return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
Expand Down Expand Up @@ -532,12 +557,21 @@ bool IntegralRange::Contains(int64_t value) const
// CAST(ulong/long <- int) - [INT_MIN..INT_MAX]
if (!cast->gtOverflow())
{
if ((fromType == TYP_INT) && fromUnsigned)
IntegralRange typeRange = (fromType == TYP_INT) && fromUnsigned
? IntegralRange{SymbolicIntegerValue::Zero, SymbolicIntegerValue::UIntMax}
: IntegralRange{SymbolicIntegerValue::IntMin, SymbolicIntegerValue::IntMax};

// Refine using the operand's known range: when the operand fits entirely
// inside what the cast can represent, the cast preserves the value, so we
// can tighten the output range to the operand's range. This handles cases
// like CAST(int <- ulong) of a value already known to be in uint16 range.
IntegralRange operandRange = ForNode(cast->CastOp(), compiler);
if (typeRange.Contains(operandRange))
{
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::UIntMax};
return operandRange;
}

return {SymbolicIntegerValue::IntMin, SymbolicIntegerValue::IntMax};
return typeRange;
}

SymbolicIntegerValue lowerBound;
Expand Down Expand Up @@ -4044,6 +4078,7 @@ GenTree* Compiler::optAssertionProp_AddMulSub(ASSERT_VALARG_TP assertions, GenTr
// 1) Convert DIV/MOD to UDIV/UMOD if both operands are proven to be never negative
// 2) Marks DIV/UDIV/MOD/UMOD with GTF_DIV_MOD_NO_BY_ZERO if divisor is proven to be never zero
// 3) Marks DIV/UDIV/MOD/UMOD with GTF_DIV_MOD_NO_OVERFLOW if both operands are proven to be never negative
// 4) Marks UMOD with GTF_UMOD_UINT16_OPERANDS if both operands are proven to be in uint16 range
//
// Arguments:
// assertions - set of live assertions
Expand Down Expand Up @@ -4091,6 +4126,22 @@ GenTree* Compiler::optAssertionProp_ModDiv(ASSERT_VALARG_TP assertions,
changed = true;
}

#ifdef TARGET_64BIT
// Detect "uint16 % const-uint16" patterns so lowering can emit a cheaper FastMod sequence.
if (((tree->gtFlags & GTF_UMOD_UINT16_OPERANDS) == 0) && tree->OperIs(GT_UMOD) && !opts.MinOpts() &&
op2->IsCnsIntOrI())
{
ssize_t divisorValue = op2->AsIntCon()->IconValue();
if ((divisorValue > 0) && FitsIn<uint16_t>(divisorValue) &&
IntegralRange::ForType(TYP_USHORT).Contains(IntegralRange::ForNode(op1, this)))
{
JITDUMP("Both operands for UMOD are in uint16 range...\n")
tree->gtFlags |= GTF_UMOD_UINT16_OPERANDS;
changed = true;
}
}
#endif // TARGET_64BIT

return changed ? optAssertionProp_Update(tree, tree, stmt) : nullptr;
}

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2672,8 +2672,8 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK)
}
if (op1->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV))
{
if ((op1->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW)) !=
(op2->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW)))
if ((op1->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW | GTF_UMOD_UINT16_OPERANDS)) !=
(op2->gtFlags & (GTF_DIV_MOD_NO_BY_ZERO | GTF_DIV_MOD_NO_OVERFLOW | GTF_UMOD_UINT16_OPERANDS)))
{
return false;
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,8 @@ enum GenTreeFlags : unsigned

GTF_DIV_MOD_NO_OVERFLOW = 0x40000000, // GT_DIV, GT_MOD -- Div or mod definitely does not overflow.

GTF_UMOD_UINT16_OPERANDS = 0x80000000, // UMOD -- Both operands to a mod are in uint16 range. The divisor is non-zero constant.

GTF_CHK_INDEX_INBND = 0x80000000, // GT_BOUNDS_CHECK -- have proven this check is always in-bounds

GTF_ARRLEN_NONFAULTING = 0x20000000, // GT_ARR_LENGTH -- An array length operation that cannot fault. Same as GT_IND_NONFAULTING.
Expand Down
75 changes: 73 additions & 2 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8088,7 +8088,10 @@ bool Lowering::TryLowerConstIntUDivOrUMod(GenTreeOp* divMod)
assert(varTypeIsFloating(divMod->TypeGet()));
#endif // USE_HELPERS_FOR_INT_DIV
#if defined(TARGET_ARM64)
assert(!divMod->OperIs(GT_UMOD));
// ARM64 has no remainder instruction. Morph rewrites every non-pow2 MOD/UMOD
// into a SUB-MUL-DIV form except when the FastMod path here can apply, which
// requires a constant uint16 divisor (and a uint16-range dividend).
assert(!divMod->OperIs(GT_UMOD) || divMod->gtGetOp2()->IsCnsIntOrI());
#endif // TARGET_ARM64

GenTree* dividend = divMod->gtGetOp1();
Expand Down Expand Up @@ -8170,10 +8173,78 @@ bool Lowering::TryLowerConstIntUDivOrUMod(GenTreeOp* divMod)
}
}

assert(divisorValue >= 3);

// TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
if (!m_compiler->opts.MinOpts() && (divisorValue >= 3))
if (!m_compiler->opts.MinOpts())
{
#ifdef TARGET_64BIT
// Replace (uint16 % uint16) with a cheaper variant of FastMod, specialized for 16-bit operands.
//
// uint multiplier = uint.MaxValue / divisor + 1;
// ulong result = ((ulong)(dividend * multiplier) * divisor) >> 32;
// return (int)result;
//
// The dividend is first truncated to TYP_INT (safe because we know it fits in
// uint16), and the final value (always in [0, divisor)) is widened back to the
// mod's result type. The dividend's range may have been recovered either
// statically by IntegralRange::ForNode at this point, or earlier by assertion
// propagation (which leaves GTF_UMOD_UINT16_OPERANDS for us).
Comment thread
MihaZupan marked this conversation as resolved.
if (!isDiv && FitsIn<uint16_t>(divisorValue) &&
(((divMod->gtFlags & GTF_UMOD_UINT16_OPERANDS) != 0) ||
IntegralRange::ForType(TYP_USHORT).Contains(IntegralRange::ForNode(dividend, m_compiler))))
{
GenTree* dividendInt = dividend;
if (genActualType(dividend) != TYP_INT)
{
assert(genActualType(dividend) == TYP_LONG);
dividendInt = m_compiler->gtNewCastNode(TYP_INT, dividend, /* unsigned */ true, TYP_INT);
BlockRange().InsertBefore(divMod, dividendInt);
}

GenTree* multiplier =
m_compiler->gtNewIconNode(static_cast<ssize_t>((UINT32_MAX / divisorValue) + 1), TYP_INT);
GenTree* mul1 = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, dividendInt, multiplier);
mul1->SetUnsigned();
GenTree* castUp = m_compiler->gtNewCastNode(TYP_LONG, mul1, /* unsigned */ true, TYP_LONG);
BlockRange().InsertBefore(divMod, multiplier, mul1, castUp);

// Reuse the existing constant divisor as a TYP_LONG operand for the second multiply.
divisor->BashToConst(static_cast<int64_t>(divisorValue));

GenTree* mul2 = m_compiler->gtNewOperNode(GT_MUL, TYP_LONG, castUp, divisor);
mul2->SetUnsigned();
GenTree* shiftAmount = m_compiler->gtNewIconNode(32);
BlockRange().InsertBefore(divMod, mul2, shiftAmount);

GenTree* firstNode = (dividendInt == dividend) ? multiplier : dividendInt;

if (type == TYP_LONG)
{
// The shift result is already TYP_LONG; turn divMod itself into the shift.
divMod->ChangeOper(GT_RSZ);
divMod->gtOp1 = mul2;
divMod->gtOp2 = shiftAmount;
}
else
{
assert(type == TYP_INT);
GenTree* shift = m_compiler->gtNewOperNode(GT_RSZ, TYP_LONG, mul2, shiftAmount);
BlockRange().InsertBefore(divMod, shift);

divMod->ChangeOper(GT_CAST);
divMod->AsCast()->gtCastType = TYP_INT;
divMod->gtOp1 = shift;
divMod->gtOp2 = nullptr;
divMod->SetUnsigned();
}

ContainCheckRange(firstNode, divMod);
return true;
}
#endif // TARGET_64BIT

size_t magic;
bool increment;
int preShift;
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7424,6 +7424,26 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, bool* optAssertionPropDone)
else if (tree->OperIs(GT_MOD) && op2->IsIntegralConst() && !op2->IsIntegralConstAbsPow2())
#endif
{
#if defined(TARGET_64BIT)
// Skip this transform when lowering will be able to apply the
// cheaper uint16 FastMod sequence. This requires the divisor to
// be a non-zero constant in uint16 range and the dividend's
Comment thread
MihaZupan marked this conversation as resolved.
// static IntegralRange to already fit in uint16.
if (!opts.MinOpts() && op2->IsCnsIntOrI())
{
ssize_t modDivisor = op2->AsIntCon()->IconValue();
if ((modDivisor > 0) && FitsIn<uint16_t>(modDivisor) &&
IntegralRange::ForType(TYP_USHORT).Contains(IntegralRange::ForNode(op1, this)))
{
if (tree->OperIs(GT_MOD))
{
tree->SetOper(GT_UMOD);
}
break;
}
}
#endif // TARGET_64BIT

// Transformation: a % b = a - (a / b) * b;
tree = fgMorphModToSubMulDiv(tree->AsOp());
op1 = tree->AsOp()->gtOp1;
Expand Down
169 changes: 169 additions & 0 deletions src/tests/JIT/opt/Divide/Regressions/Regression4_Divide.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics.X86;
using Xunit;

public class Program
{
private static ushort s_field1;
private static ulong s_field2;

[MethodImpl(MethodImplOptions.NoInlining)]
public static uint Umod_U4_CharByZero(char c)
{
return (uint)c % 0;
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static ushort Umod_U2_CharByConst(char c)
{
return (ushort)(c % 42);
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static int Umod_I4_CharByConst(char c)
{
return c % 42;
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static uint Umod_U4_CharByConst(char c)
{
return (uint)c % 42;
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static long Umod_I8_CharByConst(char c)
{
return (long)c % 42;
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static ulong Umod_U8_CharByConst(char c)
{
return (ulong)c % 42;
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static bool TestIsWhiteSpace(char c)
{
ReadOnlySpan<char> HashEntries = [' ', ' ', '\u00A0', ' ', ' ', ' ', ' ', ' ', ' ', '\t', '\n', '\v', '\f', '\r', ' ', ' ', '\u2028', '\u2029', ' ', ' ', ' ', ' ', ' ', '\u202F', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\u3000', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\u0085', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', ' ', ' ', ' ', ' ', ' ', '\u205F', '\u1680', ' ', ' ', ' ', ' ', ' ', ' '];
return HashEntries[c % HashEntries.Length] == c;
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static ushort Umod_TZC(ulong value)
{
return (ushort)(BitOperations.TrailingZeroCount(value) % 42);
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static ushort Umod_TZC_Intrinsic(ulong value)
{
return (ushort)(Bmi1.X64.TrailingZeroCount(value) % 42);
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static int Umod_UInt16Range_ByConst(int value)
{
if (value is > 0 and < 1234)
{
return value % 123;
}

return -1;
}

[MethodImpl(MethodImplOptions.AggressiveOptimization)]
private static void Test1()
{
for (int i = 0; i < 2; i++)
{
Core();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void Core()
{
s_field1 = (ushort)(Bmi1.X64.TrailingZeroCount(s_field2) % 42);
}
}

[Fact]
public static int TestEntryPoint()
{
try
{
Umod_U4_CharByZero('a');
return 0;
}
catch (DivideByZeroException) { }

if (Umod_U2_CharByConst('a') != 13)
return 0;

if (Umod_I4_CharByConst('a') != 13)
return 0;

if (Umod_U4_CharByConst('a') != 13)
return 0;

if (Umod_I8_CharByConst('a') != 13)
return 0;

if (Umod_U8_CharByConst('a') != 13)
return 0;

if (!TestIsWhiteSpace(' '))
return 0;

if (!TestIsWhiteSpace('\u2029'))
return 0;

if (TestIsWhiteSpace('\0'))
return 0;

if (TestIsWhiteSpace('a'))
return 0;

if (Umod_TZC(1L << 40) != 40)
return 0;

if (Umod_TZC(1L << 50) != 8)
return 0;

if (Bmi1.X64.IsSupported)
{
if (Umod_TZC_Intrinsic(1L << 40) != 40)
return 0;

if (Umod_TZC_Intrinsic(1L << 50) != 8)
return 0;
}

if (Umod_UInt16Range_ByConst(0) != -1)
return 0;

if (Umod_UInt16Range_ByConst(42) != 42)
return 0;

if (Umod_UInt16Range_ByConst(123) != 0)
return 0;

if (Bmi1.X64.IsSupported)
{
s_field1 = 1;
s_field2 = 1L << 50;
Test1();

if (s_field1 != 8)
return 0;
}

return 100;
}
}
Loading
Loading