@@ -5257,147 +5257,117 @@ static bool HasOldChildLoop(Compiler* comp, FlowGraphNaturalLoop* loop)
52575257}
52585258
52595259// ------------------------------------------------------------------------
5260- // optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment .
5260+ // shouldAlignLoop: Check if it is legal and profitable to align a loop .
52615261//
52625262// Parameters:
5263- // loops - Identified natural loops
5264- // blockToLoop - Map from block back to its most-nested containing natural loop
5263+ // loop - The loop
5264+ // top - First block of the loop that appears in lexical order
5265+ //
5266+ // Returns:
5267+ // True if it is legal and profitable to align this loop.
52655268//
52665269// Remarks:
52675270// All innermost loops whose block weight meets a threshold are candidates for alignment.
5268- // The ` top` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this
5271+ // The top block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this
52695272// (the loop table itself is not changed).
52705273//
52715274// Depends on the loop table, and on block weights being set.
52725275//
5273- // Sets `loopAlignCandidates` to the number of loop candidates for alignment.
5274- //
5275- void Compiler::optIdentifyLoopsForAlignment (FlowGraphNaturalLoops* loops, BlockToNaturalLoopMap* blockToLoop)
5276+ bool Compiler::shouldAlignLoop (FlowGraphNaturalLoop* loop, BasicBlock* top)
52765277{
5277- loopAlignCandidates = 0 ;
5278-
5279- // TODO-Cleanup: We cannot currently renumber blocks after LSRA, so we need
5280- // a side map for the lexically top most block here.
5281- // Since placeLoopAlignInstructions already does a lexical visit we can
5282- // merge it with the identification of candidates to make all of this
5283- // cheaper.
5284- BasicBlock** topMostBlocks = new (this , CMK_LoopOpt) BasicBlock*[loops->NumLoops ()]{};
5285- for (BasicBlock* block : Blocks ())
5278+ // TODO-Quirk: Remove. When removing we will likely need to add some
5279+ // form of "lexicality" heuristic here: only align loops whose blocks
5280+ // are fairly tightly packed together physically.
5281+ if (!loop->GetHeader ()->HasFlag (BBF_OLD_LOOP_HEADER_QUIRK))
52865282 {
5287- FlowGraphNaturalLoop* loop = blockToLoop->GetLoop (block);
5288- if (loop == nullptr )
5289- {
5290- continue ;
5291- }
5292-
5293- if (topMostBlocks[loop->GetIndex ()] == nullptr )
5294- {
5295- topMostBlocks[loop->GetIndex ()] = block;
5296- }
5283+ return false ;
52975284 }
52985285
5299- for (FlowGraphNaturalLoop* loop : loops->InReversePostOrder ())
5286+ // TODO-Quirk: Switch to loop->GetChild() != nullptr
5287+ if (HasOldChildLoop (this , loop))
53005288 {
5301- // TODO-Quirk: Remove. When removing we will likely need to add some
5302- // form of "lexicality" heuristic here: only align loops whose blocks
5303- // are fairly tightly packed together physically.
5304- if (!loop->GetHeader ()->HasFlag (BBF_OLD_LOOP_HEADER_QUIRK))
5305- {
5306- continue ;
5307- }
5289+ JITDUMP (" Skipping alignment for " FMT_LP " ; not an innermost loop\n " , loop->GetIndex ());
5290+ return false ;
5291+ }
53085292
5309- // TODO-Quirk: Switch to loop->GetChild() != nullptr
5310- if (HasOldChildLoop (this , loop))
5311- {
5312- JITDUMP (" Skipping alignment for " FMT_LP " ; not an innermost loop\n " , loop->GetIndex ());
5313- continue ;
5314- }
5293+ if (top == fgFirstBB)
5294+ {
5295+ // Adding align instruction in prolog is not supported.
5296+ // TODO: Insert an empty block before the loop, if want to align it, so we have a place to put
5297+ // the align instruction.
5298+ JITDUMP (" Skipping alignment for " FMT_LP " ; loop starts in first block\n " , loop->GetIndex ());
5299+ return false ;
5300+ }
53155301
5316- BasicBlock* top = topMostBlocks[loop->GetIndex ()];
5317- if (top == fgFirstBB)
5318- {
5319- // Adding align instruction in prolog is not supported.
5320- // TODO: Insert an empty block before the loop, if want to align it, so we have a place to put
5321- // the align instruction.
5322- JITDUMP (" Skipping alignment for " FMT_LP " ; loop starts in first block\n " , loop->GetIndex ());
5323- continue ;
5324- }
5302+ if (top->HasFlag (BBF_COLD))
5303+ {
5304+ JITDUMP (" Skipping alignment for cold loop " FMT_LP " \n " , loop->GetIndex ());
5305+ return false ;
5306+ }
53255307
5326- if (top->HasFlag (BBF_COLD))
5308+ bool hasCall = loop->VisitLoopBlocks ([](BasicBlock* block) {
5309+ for (GenTree* tree : LIR::AsRange (block))
53275310 {
5328- JITDUMP (" Skipping alignment for cold loop " FMT_LP " \n " , loop->GetIndex ());
5329- continue ;
5330- }
5331-
5332- bool hasCall = loop->VisitLoopBlocks ([](BasicBlock* block) {
5333- for (GenTree* tree : LIR::AsRange (block))
5311+ if (tree->IsCall ())
53345312 {
5335- if (tree->IsCall ())
5336- {
5337- return BasicBlockVisit::Abort;
5338- }
5313+ return BasicBlockVisit::Abort;
53395314 }
5315+ }
53405316
5341- return BasicBlockVisit::Continue;
5342- }) == BasicBlockVisit::Abort;
5317+ return BasicBlockVisit::Continue;
5318+ }) == BasicBlockVisit::Abort;
53435319
5344- if (hasCall)
5345- {
5346- // Heuristic: it is not valuable to align loops with calls.
5347- JITDUMP (" Skipping alignment for " FMT_LP " ; loop contains call\n " , loop->GetIndex ());
5348- continue ;
5349- }
5320+ if (hasCall)
5321+ {
5322+ // Heuristic: it is not valuable to align loops with calls.
5323+ JITDUMP (" Skipping alignment for " FMT_LP " ; loop contains call\n " , loop->GetIndex ());
5324+ return false ;
5325+ }
5326+
5327+ assert (!top->IsFirst ());
53505328
5351- if (!top->IsFirst ())
5352- {
53535329#if FEATURE_EH_CALLFINALLY_THUNKS
5354- if (top->Prev ()->KindIs (BBJ_CALLFINALLY))
5355- {
5356- // It must be a retless BBJ_CALLFINALLY if we get here.
5357- assert (!top->Prev ()->isBBCallFinallyPair ());
5330+ if (top->Prev ()->KindIs (BBJ_CALLFINALLY))
5331+ {
5332+ // It must be a retless BBJ_CALLFINALLY if we get here.
5333+ assert (!top->Prev ()->isBBCallFinallyPair ());
53585334
5359- // If the block before the loop start is a retless BBJ_CALLFINALLY
5360- // with FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment
5361- // because it will affect reported EH region range. For x86 (where
5362- // !FEATURE_EH_CALLFINALLY_THUNKS), we can allow this.
5335+ // If the block before the loop start is a retless BBJ_CALLFINALLY
5336+ // with FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment
5337+ // because it will affect reported EH region range. For x86 (where
5338+ // !FEATURE_EH_CALLFINALLY_THUNKS), we can allow this.
53635339
5364- JITDUMP (" Skipping alignment for " FMT_LP " ; its top block follows a CALLFINALLY block\n " ,
5365- loop->GetIndex ());
5366- continue ;
5367- }
5340+ JITDUMP (" Skipping alignment for " FMT_LP " ; its top block follows a CALLFINALLY block\n " , loop->GetIndex ());
5341+ return false ;
5342+ }
53685343#endif // FEATURE_EH_CALLFINALLY_THUNKS
53695344
5370- if (top->Prev ()->isBBCallFinallyPairTail ())
5371- {
5372- // If the previous block is the BBJ_CALLFINALLYRET of a
5373- // BBJ_CALLFINALLY/BBJ_CALLFINALLYRET pair, then we can't add alignment
5374- // because we can't add instructions in that block. In the
5375- // FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the
5376- // reported EH, as above.
5377- JITDUMP (" Skipping alignment for " FMT_LP " ; its top block follows a CALLFINALLY/ALWAYS pair\n " ,
5378- loop->GetIndex ());
5379- continue ;
5380- }
5381- }
5345+ if (top->Prev ()->isBBCallFinallyPairTail ())
5346+ {
5347+ // If the previous block is the BBJ_ALWAYS of a
5348+ // BBJ_CALLFINALLY/BBJ_ALWAYS pair, then we can't add alignment
5349+ // because we can't add instructions in that block. In the
5350+ // FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the
5351+ // reported EH, as above.
5352+ JITDUMP (" Skipping alignment for " FMT_LP " ; its top block follows a CALLFINALLY/ALWAYS pair\n " ,
5353+ loop->GetIndex ());
5354+ return false ;
5355+ }
53825356
5383- // Now we have an innerloop candidate that might need alignment
5357+ // Now we have an innerloop candidate that might need alignment
53845358
5385- weight_t topWeight = top->getBBWeight (this );
5386- weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT;
5387- if (topWeight >= compareWeight)
5388- {
5389- loopAlignCandidates++;
5390- assert (!top->isLoopAlign ());
5391- top->SetFlags (BBF_LOOP_ALIGN);
5392- JITDUMP (" Aligning " FMT_LP " that starts at " FMT_BB " , weight=" FMT_WT " >= " FMT_WT " .\n " ,
5393- loop->GetIndex (), top->bbNum , topWeight, compareWeight);
5394- }
5395- else
5396- {
5397- JITDUMP (" Skipping alignment for " FMT_LP " that starts at " FMT_BB " , weight=" FMT_WT " < " FMT_WT " .\n " ,
5398- loop->GetIndex (), top->bbNum , topWeight, compareWeight);
5399- }
5359+ weight_t topWeight = top->getBBWeight (this );
5360+ weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT;
5361+ if (topWeight < compareWeight)
5362+ {
5363+ JITDUMP (" Skipping alignment for " FMT_LP " that starts at " FMT_BB " , weight=" FMT_WT " < " FMT_WT " .\n " ,
5364+ loop->GetIndex (), top->bbNum , topWeight, compareWeight);
5365+ return false ;
54005366 }
5367+
5368+ JITDUMP (" Aligning " FMT_LP " that starts at " FMT_BB " , weight=" FMT_WT " >= " FMT_WT " .\n " , loop->GetIndex (),
5369+ top->bbNum , topWeight, compareWeight);
5370+ return true ;
54015371}
54025372
54035373// ------------------------------------------------------------------------
@@ -5453,94 +5423,78 @@ PhaseStatus Compiler::placeLoopAlignInstructions()
54535423
54545424 BlockToNaturalLoopMap* blockToLoop = BlockToNaturalLoopMap::Build (loops);
54555425
5456- optIdentifyLoopsForAlignment (loops, blockToLoop);
5426+ BitVecTraits loopTraits ((unsigned )loops->NumLoops (), this );
5427+ BitVec seenLoops (BitVecOps::MakeEmpty (&loopTraits));
5428+ BitVec alignedLoops (BitVecOps::MakeEmpty (&loopTraits));
54575429
5458- // Add align only if there were any loops that needed alignment
5459- if (loopAlignCandidates == 0 )
5460- {
5461- JITDUMP (" Found no candidates for loop alignment\n " );
5462- return PhaseStatus::MODIFIED_NOTHING;
5463- }
5464-
5465- JITDUMP (" Inside placeLoopAlignInstructions for %d loop candidates.\n " , loopAlignCandidates);
5466-
5467- bool madeChanges = false ;
5468- weight_t minBlockSoFar = BB_MAX_WEIGHT;
5469- BasicBlock* bbHavingAlign = nullptr ;
5470- FlowGraphNaturalLoop* currentAlignedLoop = nullptr ;
5471-
5472- int loopsToProcess = loopAlignCandidates;
5430+ bool madeChanges = false ;
5431+ weight_t minBlockSoFar = BB_MAX_WEIGHT;
5432+ BasicBlock* bbHavingAlign = nullptr ;
54735433
54745434 for (BasicBlock* const block : Blocks ())
54755435 {
54765436 FlowGraphNaturalLoop* loop = blockToLoop->GetLoop (block);
54775437
5478- if (currentAlignedLoop != nullptr )
5438+ // If this is the first block of a loop then see if we should align it
5439+ if ((loop != nullptr ) && BitVecOps::TryAddElemD (&loopTraits, seenLoops, loop->GetIndex ()) &&
5440+ shouldAlignLoop (loop, block))
54795441 {
5480- // We've been processing blocks within an aligned loop. Are we out of that loop now?
5481- if (currentAlignedLoop != loop)
5482- {
5483- currentAlignedLoop = nullptr ;
5484- }
5485- }
5486-
5487- // If there is an unconditional jump that won't be removed
5488- if (opts.compJitHideAlignBehindJmp && block->KindIs (BBJ_ALWAYS) && !block->CanRemoveJumpToNext (this ))
5489- {
5490- // Track the lower weight blocks
5491- if (block->bbWeight < minBlockSoFar)
5492- {
5493- if (currentAlignedLoop == nullptr )
5494- {
5495- // Ok to insert align instruction in this block because it is not part of any aligned loop.
5496- minBlockSoFar = block->bbWeight ;
5497- bbHavingAlign = block;
5498- JITDUMP (FMT_BB " , bbWeight=" FMT_WT " ends with unconditional 'jmp' \n " , block->bbNum ,
5499- block->bbWeight );
5500- }
5501- }
5502- }
5442+ block->SetFlags (BBF_LOOP_ALIGN);
5443+ BitVecOps::AddElemD (&loopTraits, alignedLoops, loop->GetIndex ());
5444+ INDEBUG (loopAlignCandidates++);
55035445
5504- if (!block->IsLast () && block->Next ()->isLoopAlign ())
5505- {
5506- // Loop alignment is disabled for cold blocks
5507- assert (!block->HasFlag (BBF_COLD));
5508- BasicBlock* const loopTop = block->Next ();
5446+ BasicBlock* prev = block->Prev ();
5447+ // shouldAlignLoop should have guaranteed these properties.
5448+ assert ((prev != nullptr ) && !prev->HasFlag (BBF_COLD));
55095449
55105450 if (bbHavingAlign == nullptr )
55115451 {
55125452 // If jmp was not found, then block before the loop start is where align instruction will be added.
55135453
5514- bbHavingAlign = block ;
5515- JITDUMP (" Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB " \n " , block ->bbNum ,
5516- loopTop ->bbNum );
5454+ bbHavingAlign = prev ;
5455+ JITDUMP (" Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB " \n " , prev ->bbNum ,
5456+ block ->bbNum );
55175457 }
55185458 else
55195459 {
55205460 JITDUMP (" Marking " FMT_BB " that ends with unconditional jump with BBF_HAS_ALIGN for loop at " FMT_BB
55215461 " \n " ,
5522- bbHavingAlign->bbNum , loopTop ->bbNum );
5462+ bbHavingAlign->bbNum , block ->bbNum );
55235463 }
55245464
55255465 madeChanges = true ;
55265466 bbHavingAlign->SetFlags (BBF_HAS_ALIGN);
55275467
5528- minBlockSoFar = BB_MAX_WEIGHT;
5529- bbHavingAlign = nullptr ;
5530- currentAlignedLoop = blockToLoop->GetLoop (loopTop);
5531- assert (currentAlignedLoop != nullptr );
5468+ minBlockSoFar = BB_MAX_WEIGHT;
5469+ bbHavingAlign = nullptr ;
55325470
5533- if (--loopsToProcess == 0 )
5471+ continue ;
5472+ }
5473+
5474+ // Otherwise check if this is a candidate block for placing alignment for a future loop.
5475+ // If there is an unconditional jump that won't be removed
5476+ if (opts.compJitHideAlignBehindJmp && block->KindIs (BBJ_ALWAYS) && !block->CanRemoveJumpToNext (this ))
5477+ {
5478+ // Track the lower weight blocks
5479+ if (block->bbWeight < minBlockSoFar)
55345480 {
5535- break ;
5481+ if ((loop == nullptr ) || !BitVecOps::IsMember (&loopTraits, alignedLoops, loop->GetIndex ()))
5482+ {
5483+ // Ok to insert align instruction in this block because it is not part of any aligned loop.
5484+ minBlockSoFar = block->bbWeight ;
5485+ bbHavingAlign = block;
5486+ JITDUMP (FMT_BB " , bbWeight=" FMT_WT " ends with unconditional 'jmp' \n " , block->bbNum ,
5487+ block->bbWeight );
5488+ }
55365489 }
55375490 }
55385491 }
55395492
5540- assert (loopsToProcess == 0 );
5493+ JITDUMP ( " Found %u candidates for loop alignment \n " , loopAlignCandidates );
55415494
55425495 return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
55435496}
5497+
55445498#endif
55455499
55465500// ------------------------------------------------------------------------
0 commit comments