Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
OrderedFCmpSelect, ///< FP max implemented in terms of select(cmp()), but
/// without any fast-math flags. Users need to handle NaNs
/// and signed zeros when generating code.
FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
FMinimum, ///< FP min with llvm.minimum semantics
Expand Down Expand Up @@ -252,9 +255,10 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
Kind == RecurKind::OrderedFCmpSelect || Kind == RecurKind::FMinNum ||
Kind == RecurKind::FMaxNum || Kind == RecurKind::FMinimum ||
Kind == RecurKind::FMaximum || Kind == RecurKind::FMinimumNum ||
Kind == RecurKind::FMaximumNum;
}

/// Returns true if the recurrence kind is any min/max kind.
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,8 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
if (match(I, m_OrdOrUnordFMin(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_OrdOrUnordFMax(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
return InstDesc(
Kind == RecurKind::FMax || Kind == RecurKind::OrderedFCmpSelect, I);
if (match(I, m_FMinNum(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_FMaxNum(m_Value(), m_Value())))
Expand Down Expand Up @@ -962,6 +963,14 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
"unexpected recurrence kind for minnum");
return InstDesc(I, RecurKind::FMinNum);
}
if (Kind == RecurKind::FMax || Kind == RecurKind::OrderedFCmpSelect) {
if (isa<SelectInst>(I))
return InstDesc(I, RecurKind::OrderedFCmpSelect);
auto *Cmp = dyn_cast<FCmpInst>(I);
if (Cmp && FCmpInst::isOrdered(Cmp->getPredicate()) &&
isMinMaxPattern(I, Kind, Prev).isRecurrence())
return InstDesc(I, RecurKind::OrderedFCmpSelect);
}
return InstDesc(false, I);
}
if (isFMulAddIntrinsic(I))
Expand Down Expand Up @@ -1227,6 +1236,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::UMin:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMin:
case RecurKind::FMaximum:
case RecurKind::FMinimum:
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,7 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
return Intrinsic::vector_reduce_umax;
case RecurKind::UMin:
return Intrinsic::vector_reduce_umin;
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMax:
case RecurKind::FMaxNum:
return Intrinsic::vector_reduce_fmax;
Expand Down Expand Up @@ -1088,6 +1089,7 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
return CmpInst::ICMP_SGT;
case RecurKind::FMin:
return CmpInst::FCMP_OLT;
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMax:
return CmpInst::FCMP_OGT;
// We do not add FMinimum/FMaximum recurrence kind here since there is no
Expand Down Expand Up @@ -1310,6 +1312,7 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::FMinNum:
Expand Down
18 changes: 10 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4405,13 +4405,15 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {

bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
// reductions need special handling and are currently unsupported.
// Cross iteration phis such as fixed-order recurrences and
// OrderedFCmpSelect/FMaxNum/FMinNum reductions need special handling and are
// currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
if (!Legal->isReductionVariable(&Phi))
return Legal->isFixedOrderRecurrence(&Phi);
RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
return RK == RecurKind::OrderedFCmpSelect || RK == RecurKind::FMinNum ||
RK == RecurKind::FMaxNum;
}))
return false;

Expand Down Expand Up @@ -8847,11 +8849,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(

// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
*Plan))
// Apply mandatory transformation to handle FP maxnum/minnum/OrderedFCmpSelect
// reduction with NaNs and signed-zeros if possible, bail out otherwise.
if (!VPlanTransforms::runPass(
VPlanTransforms::handleMaxMinNumAndOrderedFCmpSelectReductions,
*Plan))
return nullptr;

// Transform recipes to abstract recipes if it is legal and beneficial and
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23705,6 +23705,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
Expand Down Expand Up @@ -23844,6 +23845,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
Expand Down Expand Up @@ -23948,6 +23950,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::OrderedFCmpSelect:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
Expand Down
112 changes: 110 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,105 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
}
}

bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
static bool handleOrderedFCmpSelect(VPlan &Plan,
VPReductionPHIRecipe *RedPhiR) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPWidenIntOrFpInductionRecipe *WideIV = nullptr;

// MaxOp feeding the reduction phi must be a select (either wide or a
// replicate recipe), where the phi is the last operand, and the compare
// predicate is strict. This ensures NaNs won't get propagated unless the
// initial value is NaN
auto *MaxOp = dyn_cast<VPRecipeWithIRFlags>(
RedPhiR->getBackedgeValue()->getDefiningRecipe());
if (!MaxOp)
return false;
auto *RepR = dyn_cast<VPReplicateRecipe>(MaxOp);
if (!isa<VPWidenSelectRecipe>(MaxOp) &&
!(RepR && (isa<SelectInst>(RepR->getUnderlyingInstr()))))
return false;

auto *Cmp = cast<VPRecipeWithIRFlags>(MaxOp->getOperand(0));
if (MaxOp->getOperand(1) == RedPhiR ||
!CmpInst::isStrictPredicate(Cmp->getPredicate()))
return false;

for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
// We need a wide canonical IV
if (auto *CurIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
if (CurIV->isCanonical()) {
WideIV = CurIV;
break;
}
}
}

// A wide canonical IV is currently required.
// TODO: Create an induction if no suitable existing one is available.
if (!WideIV)
return false;

// Create a reduction that tracks the first indices where the latest maximum
// value has been selected. This is later used to select the max value from
// the partial reductions in a way that correctly handles signed zeros and
// NaNs in the input.
// Note that we do not need to check if the induction may hit the sentinel
// value. If the sentinel value gets hit, the final reduction value is at the
// last index or the maximum was never set and all lanes contain the start
// value. In either case, the correct value is selected.
unsigned IVWidth =
VPTypeAnalysis(Plan).inferScalarType(WideIV)->getScalarSizeInBits();
LLVMContext &Ctx = Plan.getScalarHeader()->getIRBasicBlock()->getContext();
VPValue *UMinSentinel =
Plan.getOrAddLiveIn(ConstantInt::get(Ctx, APInt::getMaxValue(IVWidth)));
auto *IdxPhi = new VPReductionPHIRecipe(nullptr, RecurKind::FindFirstIVUMin,
*UMinSentinel, false, false, 1);
IdxPhi->insertBefore(RedPhiR);
auto *MinIdxSel = new VPInstruction(Instruction::Select,
{MaxOp->getOperand(0), WideIV, IdxPhi});
MinIdxSel->insertAfter(MaxOp);
IdxPhi->addOperand(MinIdxSel);

// Find the first index holding with the maximum value. This is used to
// extract the lane with the final max value and is needed to handle signed
// zeros and NaNs in the input.
auto *MaxResult = find_singleton<VPSingleDefRecipe>(
RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
auto *VPI = dyn_cast<VPInstruction>(U);
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
return VPI;
return nullptr;
});
VPBuilder Builder(MaxResult->getParent(),
std::next(MaxResult->getIterator()));

// Create mask for lanes that have the max value and use it to mask out
// indices that don't contain maximum values.
auto *MaskFinalMaxValue = Builder.createNaryOp(
Instruction::FCmp, {MaxResult->getOperand(1), MaxResult},
VPIRFlags(CmpInst::FCMP_OEQ));
auto *IndicesWithMaxValue = Builder.createNaryOp(
Instruction::Select, {MaskFinalMaxValue, MinIdxSel, UMinSentinel});
auto *FirstMaxIdx = Builder.createNaryOp(
VPInstruction::ComputeFindIVResult,
{IdxPhi, WideIV->getStartValue(), UMinSentinel, IndicesWithMaxValue});
// Convert the index of the first max value to an index in the vector lanes of
// the partial reduction results. This ensures we select the first max value
// and acts as a tie-breaker if the partial reductions contain signed zeros.
auto *FirstMaxLane =
Builder.createNaryOp(Instruction::URem, {FirstMaxIdx, &Plan.getVFxUF()});

// Extract the final max value and update the users.
auto *Res = Builder.createNaryOp(VPInstruction::ExtractLane,
{FirstMaxLane, MaxResult->getOperand(1)});
MaxResult->replaceUsesWithIf(Res, [MaskFinalMaxValue](VPUser &U, unsigned) {
return &U != MaskFinalMaxValue;
});
return true;
}

bool VPlanTransforms::handleMaxMinNumAndOrderedFCmpSelectReductions(
VPlan &Plan) {
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
RedPhiR->getBackedgeValue()->getDefiningRecipe());
Expand Down Expand Up @@ -703,7 +801,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
if (RedPhiR)
return false;
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
Cur->getRecurrenceKind() != RecurKind::FMinNum) {
Cur->getRecurrenceKind() != RecurKind::FMinNum &&
Cur->getRecurrenceKind() != RecurKind::OrderedFCmpSelect) {
HasUnsupportedPhi = true;
continue;
}
Expand All @@ -713,6 +812,15 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
if (!RedPhiR)
return true;

if (HasUnsupportedPhi)
return false;

if (RedPhiR->getRecurrenceKind() == RecurKind::OrderedFCmpSelect)
return handleOrderedFCmpSelect(Plan, RedPhiR);

// Try to update the vector loop to exit early if any input is NaN and resume
// executing in the scalar loop to handle the NaNs there.

// We won't be able to resume execution in the scalar tail, if there are
// unsupported header phis or there is no scalar tail at all, due to
// tail-folding.
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
return true;
switch (Opcode) {
case Instruction::Freeze:
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::PHI:
case Instruction::Select:
Expand Down Expand Up @@ -599,7 +600,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
llvm_unreachable("should be handled by VPPhi::execute");
}
case Instruction::Select: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
bool OnlyFirstLaneUsed =
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this);
Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
Expand Down Expand Up @@ -1015,7 +1017,8 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
getOpcode() == VPInstruction::AnyOf ||
getOpcode() == VPInstruction::ExtractLane;
}

bool VPInstruction::isSingleScalar() const {
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,10 @@ struct VPlanTransforms {
/// not valid.
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);

/// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
/// try to update the vector loop to exit early if any input is NaN and resume
/// executing in the scalar loop to handle the NaNs there. Return false if
/// this attempt was unsuccessful.
static bool handleMaxMinNumReductions(VPlan &Plan);
/// Check if \p Plan contains any FMaxNum, FMinNum or reductions. If they do,
/// try to update the vector loop to account for NaNs and signed zeros as
/// needed.
static bool handleMaxMinNumAndOrderedFCmpSelectReductions(VPlan &Plan);

/// Clear NSW/NUW flags from reduction instructions if necessary.
static void clearReductionWrapFlags(VPlan &Plan);
Expand Down
Loading