diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index a8ab92f5c668e1..2ac9de809d6f65 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1783,25 +1783,6 @@ BBehfDesc::BBehfDesc(Compiler* comp, const BBehfDesc* other) : bbeCount(other->b } } -//------------------------------------------------------------------------ -// unmarkLoopAlign: Unmarks the LOOP_ALIGN flag from the block and reduce the -// loop alignment count. -// -// Arguments: -// compiler - Compiler instance -// reason - Reason to print in JITDUMP -// -void BasicBlock::unmarkLoopAlign(Compiler* compiler DEBUG_ARG(const char* reason)) -{ - // Make sure we unmark and count just once. - if (isLoopAlign()) - { - compiler->loopAlignCandidates--; - RemoveFlags(BBF_LOOP_ALIGN); - JITDUMP("Unmarking LOOP_ALIGN from " FMT_BB ". Reason= %s.\n", bbNum, reason); - } -} - //------------------------------------------------------------------------ // getCalledCount: get the value used to normalized weights for this method // diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 159bcd61f54703..f3d83cbfa6a5d0 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -430,6 +430,7 @@ enum BasicBlockFlags : unsigned __int64 // and should be treated as if it falls through. // This is just to reduce diffs from removing BBJ_NONE. // (TODO: Remove this quirk after refactoring Compiler::fgFindInsertPoint) + BBF_OLD_LOOP_HEADER_QUIRK = MAKE_BBFLAG(42), // Block was the header ('entry') of a loop recognized by old loop finding // The following are sets of flags. @@ -440,7 +441,7 @@ enum BasicBlockFlags : unsigned __int64 // Flags to update when two blocks are compacted BBF_COMPACT_UPD = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_BACKWARD_JUMP | \ - BBF_HAS_NEWOBJ | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_PREHEADER, + BBF_HAS_NEWOBJ | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_PREHEADER | BBF_OLD_LOOP_HEADER_QUIRK, // Flags a block should not have had before it is split. @@ -873,8 +874,6 @@ struct BasicBlock : private LIR::Range return HasFlag(BBF_LOOP_ALIGN); } - void unmarkLoopAlign(Compiler* comp DEBUG_ARG(const char* reason)); - bool hasAlign() const { return HasFlag(BBF_HAS_ALIGN); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1ecb1723c40dfe..f00e3f74ef8314 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4008,7 +4008,8 @@ void Compiler::compSetOptimizationLevel() codeGen->setFrameRequired(true); #endif - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + if (opts.OptimizationDisabled() || + (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !IsTargetAbi(CORINFO_NATIVEAOT_ABI))) { // The JIT doesn't currently support loop alignment for prejitted images outside NativeAOT. // (The JIT doesn't know the final address of the code, hence @@ -5228,6 +5229,165 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl #if FEATURE_LOOP_ALIGN +// TODO-Quirk: Remove +static bool HasOldChildLoop(Compiler* comp, FlowGraphNaturalLoop* loop) +{ + for (FlowGraphNaturalLoop* child = loop->GetChild(); child != nullptr; child = child->GetSibling()) + { + if (child->GetHeader()->HasFlag(BBF_OLD_LOOP_HEADER_QUIRK)) + return true; + + if (HasOldChildLoop(comp, child)) + return true; + } + + return false; +} + +//------------------------------------------------------------------------ +// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment. +// +// Parameters: +// loops - Identified natural loops +// blockToLoop - Map from block back to its most-nested containing natural loop +// +// Remarks: +// All innermost loops whose block weight meets a threshold are candidates for alignment. +// The `top` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this +// (the loop table itself is not changed). +// +// Depends on the loop table, and on block weights being set. +// +// Sets `loopAlignCandidates` to the number of loop candidates for alignment. +// +void Compiler::optIdentifyLoopsForAlignment(FlowGraphNaturalLoops* loops, BlockToNaturalLoopMap* blockToLoop) +{ + loopAlignCandidates = 0; + + // TODO-Cleanup: We cannot currently renumber blocks after LSRA, so we need + // a side map for the lexically top most block here. + // Since placeLoopAlignInstructions already does a lexical visit we can + // merge it with the identification of candidates to make all of this + // cheaper. + BasicBlock** topMostBlocks = new (this, CMK_LoopOpt) BasicBlock*[loops->NumLoops()]{}; + for (BasicBlock* block : Blocks()) + { + FlowGraphNaturalLoop* loop = blockToLoop->GetLoop(block); + if (loop == nullptr) + { + continue; + } + + if (topMostBlocks[loop->GetIndex()] == nullptr) + { + topMostBlocks[loop->GetIndex()] = block; + } + } + + for (FlowGraphNaturalLoop* loop : loops->InReversePostOrder()) + { + // TODO-Quirk: Remove. When removing we will likely need to add some + // form of "lexicality" heuristic here: only align loops whose blocks + // are fairly tightly packed together physically. + if (!loop->GetHeader()->HasFlag(BBF_OLD_LOOP_HEADER_QUIRK)) + { + continue; + } + + // TODO-Quirk: Switch to loop->GetChild() != nullptr + if (HasOldChildLoop(this, loop)) + { + JITDUMP("Skipping alignment for " FMT_LP "; not an innermost loop\n", loop->GetIndex()); + continue; + } + + BasicBlock* top = topMostBlocks[loop->GetIndex()]; + if (top == fgFirstBB) + { + // Adding align instruction in prolog is not supported. + // TODO: Insert an empty block before the loop, if want to align it, so we have a place to put + // the align instruction. + JITDUMP("Skipping alignment for " FMT_LP "; loop starts in first block\n", loop->GetIndex()); + continue; + } + + if (top->HasFlag(BBF_COLD)) + { + JITDUMP("Skipping alignment for cold loop " FMT_LP "\n", loop->GetIndex()); + continue; + } + + bool hasCall = loop->VisitLoopBlocks([](BasicBlock* block) { + for (GenTree* tree : LIR::AsRange(block)) + { + if (tree->IsCall()) + { + return BasicBlockVisit::Abort; + } + } + + return BasicBlockVisit::Continue; + }) == BasicBlockVisit::Abort; + + if (hasCall) + { + // Heuristic: it is not valuable to align loops with calls. + JITDUMP("Skipping alignment for " FMT_LP "; loop contains call\n", loop->GetIndex()); + continue; + } + + if (!top->IsFirst()) + { +#if FEATURE_EH_CALLFINALLY_THUNKS + if (top->Prev()->KindIs(BBJ_CALLFINALLY)) + { + // It must be a retless BBJ_CALLFINALLY if we get here. + assert(!top->Prev()->isBBCallAlwaysPair()); + + // If the block before the loop start is a retless BBJ_CALLFINALLY + // with FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment + // because it will affect reported EH region range. For x86 (where + // !FEATURE_EH_CALLFINALLY_THUNKS), we can allow this. + + JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY block\n", + loop->GetIndex()); + continue; + } +#endif // FEATURE_EH_CALLFINALLY_THUNKS + + if (top->Prev()->isBBCallAlwaysPairTail()) + { + // If the previous block is the BBJ_ALWAYS of a + // BBJ_CALLFINALLY/BBJ_ALWAYS pair, then we can't add alignment + // because we can't add instructions in that block. In the + // FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the + // reported EH, as above. + JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY/ALWAYS pair\n", + loop->GetIndex()); + continue; + } + } + + // Now we have an innerloop candidate that might need alignment + + weight_t topWeight = top->getBBWeight(this); + weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT; + if (topWeight >= compareWeight) + { + loopAlignCandidates++; + assert(!top->isLoopAlign()); + top->SetFlags(BBF_LOOP_ALIGN); + JITDUMP("Aligning " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " >= " FMT_WT ".\n", + loop->GetIndex(), top->bbNum, topWeight, compareWeight); + } + else + { + JITDUMP("Skipping alignment for " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " < " FMT_WT ".\n", + loop->GetIndex(), top->bbNum, topWeight, compareWeight); + } + } +} + //------------------------------------------------------------------------ // placeLoopAlignInstructions: determine where to place alignment padding // @@ -5246,44 +5406,69 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // PhaseStatus Compiler::placeLoopAlignInstructions() { - // Add align only if there were any loops that needed alignment - if (loopAlignCandidates == 0) + JITDUMP("*************** In placeLoopAlignInstructions()\n"); + + if (!codeGen->ShouldAlignLoops()) { + JITDUMP("Not aligning loops; ShouldAlignLoops is false\n"); return PhaseStatus::MODIFIED_NOTHING; } - JITDUMP("Inside placeLoopAlignInstructions for %d loops.\n", loopAlignCandidates); - - bool madeChanges = false; - weight_t minBlockSoFar = BB_MAX_WEIGHT; - BasicBlock* bbHavingAlign = nullptr; - BasicBlock::loopNumber currentAlignedLoopNum = BasicBlock::NOT_IN_LOOP; - bool visitedLoopNum[BasicBlock::MAX_LOOP_NUM]; - memset(visitedLoopNum, false, sizeof(visitedLoopNum)); - + if (!fgMightHaveNaturalLoops) + { #ifdef DEBUG - unsigned visitedBlockForLoopNum[BasicBlock::MAX_LOOP_NUM]; - memset(visitedBlockForLoopNum, 0, sizeof(visitedBlockForLoopNum)); + // If false, then we expect there to be no natural loops. Phases + // between loop finding and loop alignment that may introduce loops + // should conservatively set fgMightHaveNaturalLoops. + FlowGraphDfsTree* dfsTree = fgComputeDfs(); + FlowGraphNaturalLoops* loops = FlowGraphNaturalLoops::Find(dfsTree); + assert(loops->NumLoops() == 0); #endif - if ((fgFirstBB != nullptr) && fgFirstBB->isLoopAlign()) + JITDUMP("Not checking for any loops as fgMightHaveNaturalLoops is false\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + FlowGraphDfsTree* dfsTree = fgComputeDfs(); + FlowGraphNaturalLoops* loops = FlowGraphNaturalLoops::Find(dfsTree); + + // fgMightHaveNaturalLoops being true does not necessarily mean there's still any more loops left. + if (loops->NumLoops() == 0) + { + JITDUMP("No natural loops found\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + + BlockToNaturalLoopMap* blockToLoop = BlockToNaturalLoopMap::Build(loops); + + optIdentifyLoopsForAlignment(loops, blockToLoop); + + // Add align only if there were any loops that needed alignment + if (loopAlignCandidates == 0) { - // Adding align instruction in prolog is not supported - // hence just remove that loop from our list. - fgFirstBB->unmarkLoopAlign(this DEBUG_ARG("prolog block")); - madeChanges = true; + JITDUMP("Found no candidates for loop alignment\n"); + return PhaseStatus::MODIFIED_NOTHING; } + JITDUMP("Inside placeLoopAlignInstructions for %d loop candidates.\n", loopAlignCandidates); + + bool madeChanges = false; + weight_t minBlockSoFar = BB_MAX_WEIGHT; + BasicBlock* bbHavingAlign = nullptr; + FlowGraphNaturalLoop* currentAlignedLoop = nullptr; + int loopsToProcess = loopAlignCandidates; for (BasicBlock* const block : Blocks()) { - if (currentAlignedLoopNum != BasicBlock::NOT_IN_LOOP) + FlowGraphNaturalLoop* loop = blockToLoop->GetLoop(block); + + if (currentAlignedLoop != nullptr) { // We've been processing blocks within an aligned loop. Are we out of that loop now? - if (currentAlignedLoopNum != block->bbNatLoopNum) + if (currentAlignedLoop != loop) { - currentAlignedLoopNum = BasicBlock::NOT_IN_LOOP; + currentAlignedLoop = nullptr; } } @@ -5294,7 +5479,7 @@ PhaseStatus Compiler::placeLoopAlignInstructions() // Track the lower weight blocks if (block->bbWeight < minBlockSoFar) { - if (currentAlignedLoopNum == BasicBlock::NOT_IN_LOOP) + if (currentAlignedLoop == nullptr) { // Ok to insert align instruction in this block because it is not part of any aligned loop. minBlockSoFar = block->bbWeight; @@ -5309,100 +5494,36 @@ PhaseStatus Compiler::placeLoopAlignInstructions() { // Loop alignment is disabled for cold blocks assert(!block->HasFlag(BBF_COLD)); - BasicBlock* const loopTop = block->Next(); - bool isSpecialCallFinally = block->isBBCallAlwaysPairTail(); - bool unmarkedLoopAlign = false; - -#if FEATURE_EH_CALLFINALLY_THUNKS - if (block->KindIs(BBJ_CALLFINALLY)) - { - // It must be a retless BBJ_CALLFINALLY if we get here. - assert(!block->isBBCallAlwaysPair()); - - // In the case of FEATURE_EH_CALLFINALLY_THUNKS, we can't put the align instruction in a retless - // BBJ_CALLFINALLY either, because it alters the "cloned finally" region reported to the VM. - // In the x86 case (the only !FEATURE_EH_CALLFINALLY_THUNKS that supports retless - // BBJ_CALLFINALLY), we allow it. - isSpecialCallFinally = true; - } -#endif // FEATURE_EH_CALLFINALLY_THUNKS + BasicBlock* const loopTop = block->Next(); - if (isSpecialCallFinally) + if (bbHavingAlign == nullptr) { - // There are two special cases: - // 1. If the block before the loop start is a retless BBJ_CALLFINALLY with - // FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment because it will affect reported EH - // region range. - // 2. If the previous block is the BBJ_ALWAYS of a BBJ_CALLFINALLY/BBJ_ALWAYS pair, then we - // can't add alignment because we can't add instructions in that block. In the - // FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the reported EH, as above. - // Currently, we don't align loops for these cases. + // If jmp was not found, then block before the loop start is where align instruction will be added. - loopTop->unmarkLoopAlign(this DEBUG_ARG("block before loop is special callfinally/always block")); - madeChanges = true; - unmarkedLoopAlign = true; + bbHavingAlign = block; + JITDUMP("Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB "\n", block->bbNum, + loopTop->bbNum); } - else if ((loopTop->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) && visitedLoopNum[loopTop->bbNatLoopNum]) + else { -#ifdef DEBUG - char buffer[100]; - sprintf_s(buffer, 100, "loop block " FMT_BB " appears before top of loop", - visitedBlockForLoopNum[loopTop->bbNatLoopNum]); -#endif - - // In some odd cases we may see blocks within the loop before we see the - // top block of the loop. Just bail on aligning such loops. - // - - loopTop->unmarkLoopAlign(this DEBUG_ARG(buffer)); - madeChanges = true; - unmarkedLoopAlign = true; + JITDUMP("Marking " FMT_BB " that ends with unconditional jump with BBF_HAS_ALIGN for loop at " FMT_BB + "\n", + bbHavingAlign->bbNum, loopTop->bbNum); } - if (!unmarkedLoopAlign) - { - if (bbHavingAlign == nullptr) - { - // If jmp was not found, then block before the loop start is where align instruction will be added. - - bbHavingAlign = block; - JITDUMP("Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB "\n", - block->bbNum, loopTop->bbNum); - } - else - { - JITDUMP("Marking " FMT_BB - " that ends with unconditional jump with BBF_HAS_ALIGN for loop at " FMT_BB "\n", - bbHavingAlign->bbNum, loopTop->bbNum); - } - - madeChanges = true; - bbHavingAlign->SetFlags(BBF_HAS_ALIGN); - } + madeChanges = true; + bbHavingAlign->SetFlags(BBF_HAS_ALIGN); - minBlockSoFar = BB_MAX_WEIGHT; - bbHavingAlign = nullptr; - currentAlignedLoopNum = loopTop->bbNatLoopNum; + minBlockSoFar = BB_MAX_WEIGHT; + bbHavingAlign = nullptr; + currentAlignedLoop = blockToLoop->GetLoop(loopTop); + assert(currentAlignedLoop != nullptr); if (--loopsToProcess == 0) { break; } } - - if (block->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) - { -#ifdef DEBUG - if (!visitedLoopNum[block->bbNatLoopNum]) - { - // Record the first block for which bbNatLoopNum was seen for - // debugging purpose. - visitedBlockForLoopNum[block->bbNatLoopNum] = block->bbNum; - } -#endif - // If this block is part of loop, mark the loopNum as visited. - visitedLoopNum[block->bbNatLoopNum] = true; - } } assert(loopsToProcess == 0); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2d7d1e23357c51..b93a69e67c387e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4944,6 +4944,11 @@ class Compiler unsigned fgBBcountAtCodegen; // # of BBs in the method at the start of codegen jitstd::vector* fgBBOrder; // ordered vector of BBs #endif + // Used as a quick check for whether loop alignment should look for natural loops. + // If true: there may or may not be any natural loops in the flow graph, so try to find them + // If false: there's definitely not any natural loops in the flow graph + bool fgMightHaveNaturalLoops; + unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder @@ -5348,6 +5353,7 @@ class Compiler bool fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast); #if FEATURE_LOOP_ALIGN + void optIdentifyLoopsForAlignment(FlowGraphNaturalLoops* loops, BlockToNaturalLoopMap* blockToLoop); PhaseStatus placeLoopAlignInstructions(); #endif @@ -7076,7 +7082,7 @@ class Compiler bool optLoopTableValid; // info in loop table should be valid bool optLoopsRequirePreHeaders; // Do we require that all loops (in the loop table) have pre-headers? unsigned char optLoopCount; // number of tracked loops - unsigned char loopAlignCandidates; // number of loops identified for alignment + unsigned loopAlignCandidates; // number of loops identified for alignment // Every time we rebuild the loop table, we increase the global "loop epoch". Any loop indices or // loop table pointers from the previous epoch are invalid. @@ -7140,8 +7146,6 @@ class Compiler void optFindNaturalLoops(); - void optIdentifyLoopsForAlignment(); - // Ensures that all the loops in the loop nest rooted at "loopInd" (an index into the loop table) are 'canonical' -- // each loop has a unique "top." Returns "true" iff the flowgraph has been modified. bool optCanonicalizeLoopNest(unsigned char loopInd); @@ -10150,7 +10154,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // based on experimenting with various benchmarks. // Default minimum loop block weight required to enable loop alignment. -#define DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT 4 +#define DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT 3 // By default a loop will be aligned at 32B address boundary to get better // performance as per architecture manuals. diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 8388afb3eaec6f..c36bf5656809ab 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -59,11 +59,12 @@ void Compiler::fgInit() fgBBOrder = nullptr; #endif // DEBUG - fgBBNumMax = 0; - fgEdgeCount = 0; - fgDomBBcount = 0; - fgBBVarSetsInited = false; - fgReturnCount = 0; + fgMightHaveNaturalLoops = false; + fgBBNumMax = 0; + fgEdgeCount = 0; + fgDomBBcount = 0; + fgBBVarSetsInited = false; + fgReturnCount = 0; m_dfsTree = nullptr; m_loops = nullptr; @@ -5464,9 +5465,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) block->SetFlags(BBF_REMOVED); } - // If this was marked for alignment, remove it - block->unmarkLoopAlign(this DEBUG_ARG("Removed block")); - if (bPrev != nullptr) { switch (bPrev->GetKind()) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 6cfa8b38c908c6..75d9ce7672905d 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1947,13 +1947,6 @@ bool Compiler::fgCanCompactBlocks(BasicBlock* block, BasicBlock* bNext) } } - // We cannot compact a block that participates in loop alignment. - // - if ((bNext->countOfInEdges() > 1) && bNext->isLoopAlign()) - { - return false; - } - // Don't compact blocks from different loops. // if ((block->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) && (bNext->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) && @@ -2353,30 +2346,6 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) assert(block->KindIs(bNext->GetKind())); - if (bNext->KindIs(BBJ_ALWAYS) && bNext->GetTarget()->isLoopAlign()) - { - // `bNext` has a backward target to some block which mean bNext is part of a loop. - // `block` into which `bNext` is compacted should be updated with its loop number - JITDUMP("Updating loop number for " FMT_BB " from " FMT_LP " to " FMT_LP ".\n", block->bbNum, - block->bbNatLoopNum, bNext->bbNatLoopNum); - block->bbNatLoopNum = bNext->bbNatLoopNum; - } - else if (bNext->KindIs(BBJ_COND) && bNext->GetTrueTarget()->isLoopAlign()) - { - // `bNext` has a backward target to some block which mean bNext is part of a loop. - // `block` into which `bNext` is compacted should be updated with its loop number - JITDUMP("Updating loop number for " FMT_BB " from " FMT_LP " to " FMT_LP ".\n", block->bbNum, - block->bbNatLoopNum, bNext->bbNatLoopNum); - block->bbNatLoopNum = bNext->bbNatLoopNum; - } - - if (bNext->isLoopAlign()) - { - block->SetFlags(BBF_LOOP_ALIGN); - JITDUMP("Propagating LOOP_ALIGN flag from " FMT_BB " to " FMT_BB " during compacting.\n", bNext->bbNum, - block->bbNum); - } - // If we're collapsing a block created after the dominators are // computed, copy block number the block and reuse dominator // information from bNext to block. @@ -6261,9 +6230,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication, bool isPhase) // Update the loop table if we removed the bottom of a loop, for example. fgUpdateLoopsAfterCompacting(block, bNext); - // If this block was aligned, unmark it - bNext->unmarkLoopAlign(this DEBUG_ARG("Optimized jump")); - // If this is the first Cold basic block update fgFirstColdBlock if (bNext->IsFirstColdBlock(this)) { diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index e7a176dab63122..6873aebe8f952b 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3237,12 +3237,6 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() } } - for (block = firstColdBlock; block != nullptr; block = block->Next()) - { - block->SetFlags(BBF_COLD); - block->unmarkLoopAlign(this DEBUG_ARG("Loop alignment disabled for cold blocks")); - } - EXIT:; #ifdef DEBUG diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index f1a027de6f0ed4..d9490685b5dafb 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2138,18 +2138,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex newBlk->scaleBBWeight(slowPathWeightScaleFactor); blk->scaleBBWeight(fastPathWeightScaleFactor); -// TODO: scale the pred edges of `blk`? - -#if FEATURE_LOOP_ALIGN - // If the original loop is aligned, do not align the cloned loop because cloned loop will be executed in - // rare scenario. Additionally, having to align cloned loop will force us to disable some VEX prefix encoding - // and adding compensation for over-estimated instructions. - if (newBlk->isLoopAlign()) - { - newBlk->RemoveFlags(BBF_LOOP_ALIGN); - JITDUMP("Removing LOOP_ALIGN flag from cloned loop in " FMT_BB "\n", newBlk->bbNum); - } -#endif + // TODO: scale the pred edges of `blk`? // If the loop we're cloning contains nested loops, we need to clear the pre-header bit on // any nested loop pre-header blocks, since they will no longer be loop pre-headers. (This is because diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a8e78a477411b3..b9b7b97d80416b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -13332,7 +13332,6 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) loopNum, loop.lpTop->bbNum, loop.lpBottom->bbNum); optMarkLoopRemoved(loopNum); - loop.lpTop->unmarkLoopAlign(this DEBUG_ARG("removed loop")); } } @@ -13346,7 +13345,6 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) loopNum, loop.lpTop->bbNum, loop.lpBottom->bbNum); optMarkLoopRemoved(loopNum); - loop.lpTop->unmarkLoopAlign(this DEBUG_ARG("removed loop")); } } } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 286b56ea997b14..145c98c64a88a5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -373,8 +373,6 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) } JITDUMP("\n"); - - begBlk->unmarkLoopAlign(this DEBUG_ARG("Removed loop")); } /***************************************************************************************************** @@ -2638,50 +2636,6 @@ void Compiler::optFindNaturalLoops() #endif // DEBUG } -//------------------------------------------------------------------------ -// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment. -// -// All innermost loops whose block weight meets a threshold are candidates for alignment. -// The `first` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this -// (the loop table itself is not changed). -// -// Depends on the loop table, and on block weights being set. -// -void Compiler::optIdentifyLoopsForAlignment() -{ -#if FEATURE_LOOP_ALIGN - if (codeGen->ShouldAlignLoops()) - { - for (BasicBlock::loopNumber loopInd = 0; loopInd < optLoopCount; loopInd++) - { - // An innerloop candidate that might need alignment - if (optLoopTable[loopInd].lpChild == BasicBlock::NOT_IN_LOOP) - { - BasicBlock* top = optLoopTable[loopInd].lpTop; - weight_t topWeight = top->getBBWeight(this); - if (topWeight >= (opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT)) - { - // Sometimes with JitOptRepeat > 1, we might end up finding the loops twice. In such - // cases, make sure to count them just once. - if (!top->isLoopAlign()) - { - loopAlignCandidates++; - top->SetFlags(BBF_LOOP_ALIGN); - JITDUMP(FMT_LP " that starts at " FMT_BB " needs alignment, weight=" FMT_WT ".\n", loopInd, - top->bbNum, top->getBBWeight(this)); - } - } - else - { - JITDUMP(";; Skip alignment for " FMT_LP " that starts at " FMT_BB " weight=" FMT_WT ".\n", loopInd, - top->bbNum, topWeight); - } - } - } - } -#endif -} - //------------------------------------------------------------------------ // optRedirectBlock: Replace the branch successors of a block based on a block map. // @@ -4345,13 +4299,6 @@ PhaseStatus Compiler::optUnrollLoops() #endif } -#if FEATURE_LOOP_ALIGN - for (BasicBlock* const block : loop.LoopBlocks()) - { - block->unmarkLoopAlign(this DEBUG_ARG("Unrolled loop")); - } -#endif - // Create the unrolled loop statement list. { // When unrolling a loop, that loop disappears (and will be removed from the loop table). Each unrolled @@ -5564,7 +5511,6 @@ void Compiler::optFindLoops() { optFindNaturalLoops(); optFindAndScaleGeneralLoopBlocks(); - optIdentifyLoopsForAlignment(); // Check if any of the loops need alignment } optLoopTableValid = true; @@ -5593,6 +5539,10 @@ void Compiler::optFindNewLoops() m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); m_oldToNewLoop = new (this, CMK_Loops) FlowGraphNaturalLoop*[BasicBlock::MAX_LOOP_NUM]{}; + // Unnatural loops can quickly become natural if we manage to remove some + // edges, so be conservative here. + fgMightHaveNaturalLoops = (m_loops->NumLoops() > 0) || m_loops->HaveNonNaturalLoopCycles(); + for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder()) { BasicBlock* head = loop->GetHeader(); @@ -5607,6 +5557,7 @@ void Compiler::optFindNewLoops() assert(m_newToOldLoop[loop->GetIndex()] == nullptr); m_oldToNewLoop[head->bbNatLoopNum] = loop; m_newToOldLoop[loop->GetIndex()] = dsc; + head->SetFlags(BBF_OLD_LOOP_HEADER_QUIRK); } } @@ -8748,37 +8699,9 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk, FlowGraphNatura } } -// TODO-Quirk: Remove -static bool HasOldChildLoop(Compiler* comp, FlowGraphNaturalLoop* loop) -{ - for (FlowGraphNaturalLoop* child = loop->GetChild(); child != nullptr; child = child->GetSibling()) - { - if (comp->m_newToOldLoop[child->GetIndex()] != nullptr) - return true; - - if (HasOldChildLoop(comp, child)) - return true; - } - - return false; -} - // Marks the containsCall information to "loop" and any parent loops. void Compiler::AddContainsCallAllContainingLoops(FlowGraphNaturalLoop* loop) { - -#if FEATURE_LOOP_ALIGN - // If this is the inner most loop, reset the LOOP_ALIGN flag - // because a loop having call will not likely to benefit from - // alignment - if (!HasOldChildLoop(this, loop)) - { - BasicBlock* top = loop->GetLexicallyTopMostBlock(); - - top->unmarkLoopAlign(this DEBUG_ARG("Loop with call")); - } -#endif - do { m_loopSideEffects[loop->GetIndex()].ContainsCall = true;