Skip to content

Commit e565285

Browse files
authored
JIT: Further clean up loop alignment placement (#95917)
Merge the candidate identification and marking into one single lexical pass over the basic block. Use a couple of bit vectors to track which loops we have seen blocks of (to know when we've found the top most blocks), and to know which loops we have decided to align.
1 parent 8269482 commit e565285

3 files changed

Lines changed: 127 additions & 174 deletions

File tree

src/coreclr/jit/compiler.cpp

Lines changed: 120 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -5257,147 +5257,117 @@ static bool HasOldChildLoop(Compiler* comp, FlowGraphNaturalLoop* loop)
52575257
}
52585258

52595259
//------------------------------------------------------------------------
5260-
// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment.
5260+
// shouldAlignLoop: Check if it is legal and profitable to align a loop.
52615261
//
52625262
// Parameters:
5263-
// loops - Identified natural loops
5264-
// blockToLoop - Map from block back to its most-nested containing natural loop
5263+
// loop - The loop
5264+
// top - First block of the loop that appears in lexical order
5265+
//
5266+
// Returns:
5267+
// True if it is legal and profitable to align this loop.
52655268
//
52665269
// Remarks:
52675270
// All innermost loops whose block weight meets a threshold are candidates for alignment.
5268-
// The `top` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this
5271+
// The top block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this
52695272
// (the loop table itself is not changed).
52705273
//
52715274
// Depends on the loop table, and on block weights being set.
52725275
//
5273-
// Sets `loopAlignCandidates` to the number of loop candidates for alignment.
5274-
//
5275-
void Compiler::optIdentifyLoopsForAlignment(FlowGraphNaturalLoops* loops, BlockToNaturalLoopMap* blockToLoop)
5276+
bool Compiler::shouldAlignLoop(FlowGraphNaturalLoop* loop, BasicBlock* top)
52765277
{
5277-
loopAlignCandidates = 0;
5278-
5279-
// TODO-Cleanup: We cannot currently renumber blocks after LSRA, so we need
5280-
// a side map for the lexically top most block here.
5281-
// Since placeLoopAlignInstructions already does a lexical visit we can
5282-
// merge it with the identification of candidates to make all of this
5283-
// cheaper.
5284-
BasicBlock** topMostBlocks = new (this, CMK_LoopOpt) BasicBlock*[loops->NumLoops()]{};
5285-
for (BasicBlock* block : Blocks())
5278+
// TODO-Quirk: Remove. When removing we will likely need to add some
5279+
// form of "lexicality" heuristic here: only align loops whose blocks
5280+
// are fairly tightly packed together physically.
5281+
if (!loop->GetHeader()->HasFlag(BBF_OLD_LOOP_HEADER_QUIRK))
52865282
{
5287-
FlowGraphNaturalLoop* loop = blockToLoop->GetLoop(block);
5288-
if (loop == nullptr)
5289-
{
5290-
continue;
5291-
}
5292-
5293-
if (topMostBlocks[loop->GetIndex()] == nullptr)
5294-
{
5295-
topMostBlocks[loop->GetIndex()] = block;
5296-
}
5283+
return false;
52975284
}
52985285

5299-
for (FlowGraphNaturalLoop* loop : loops->InReversePostOrder())
5286+
// TODO-Quirk: Switch to loop->GetChild() != nullptr
5287+
if (HasOldChildLoop(this, loop))
53005288
{
5301-
// TODO-Quirk: Remove. When removing we will likely need to add some
5302-
// form of "lexicality" heuristic here: only align loops whose blocks
5303-
// are fairly tightly packed together physically.
5304-
if (!loop->GetHeader()->HasFlag(BBF_OLD_LOOP_HEADER_QUIRK))
5305-
{
5306-
continue;
5307-
}
5289+
JITDUMP("Skipping alignment for " FMT_LP "; not an innermost loop\n", loop->GetIndex());
5290+
return false;
5291+
}
53085292

5309-
// TODO-Quirk: Switch to loop->GetChild() != nullptr
5310-
if (HasOldChildLoop(this, loop))
5311-
{
5312-
JITDUMP("Skipping alignment for " FMT_LP "; not an innermost loop\n", loop->GetIndex());
5313-
continue;
5314-
}
5293+
if (top == fgFirstBB)
5294+
{
5295+
// Adding align instruction in prolog is not supported.
5296+
// TODO: Insert an empty block before the loop, if want to align it, so we have a place to put
5297+
// the align instruction.
5298+
JITDUMP("Skipping alignment for " FMT_LP "; loop starts in first block\n", loop->GetIndex());
5299+
return false;
5300+
}
53155301

5316-
BasicBlock* top = topMostBlocks[loop->GetIndex()];
5317-
if (top == fgFirstBB)
5318-
{
5319-
// Adding align instruction in prolog is not supported.
5320-
// TODO: Insert an empty block before the loop, if want to align it, so we have a place to put
5321-
// the align instruction.
5322-
JITDUMP("Skipping alignment for " FMT_LP "; loop starts in first block\n", loop->GetIndex());
5323-
continue;
5324-
}
5302+
if (top->HasFlag(BBF_COLD))
5303+
{
5304+
JITDUMP("Skipping alignment for cold loop " FMT_LP "\n", loop->GetIndex());
5305+
return false;
5306+
}
53255307

5326-
if (top->HasFlag(BBF_COLD))
5308+
bool hasCall = loop->VisitLoopBlocks([](BasicBlock* block) {
5309+
for (GenTree* tree : LIR::AsRange(block))
53275310
{
5328-
JITDUMP("Skipping alignment for cold loop " FMT_LP "\n", loop->GetIndex());
5329-
continue;
5330-
}
5331-
5332-
bool hasCall = loop->VisitLoopBlocks([](BasicBlock* block) {
5333-
for (GenTree* tree : LIR::AsRange(block))
5311+
if (tree->IsCall())
53345312
{
5335-
if (tree->IsCall())
5336-
{
5337-
return BasicBlockVisit::Abort;
5338-
}
5313+
return BasicBlockVisit::Abort;
53395314
}
5315+
}
53405316

5341-
return BasicBlockVisit::Continue;
5342-
}) == BasicBlockVisit::Abort;
5317+
return BasicBlockVisit::Continue;
5318+
}) == BasicBlockVisit::Abort;
53435319

5344-
if (hasCall)
5345-
{
5346-
// Heuristic: it is not valuable to align loops with calls.
5347-
JITDUMP("Skipping alignment for " FMT_LP "; loop contains call\n", loop->GetIndex());
5348-
continue;
5349-
}
5320+
if (hasCall)
5321+
{
5322+
// Heuristic: it is not valuable to align loops with calls.
5323+
JITDUMP("Skipping alignment for " FMT_LP "; loop contains call\n", loop->GetIndex());
5324+
return false;
5325+
}
5326+
5327+
assert(!top->IsFirst());
53505328

5351-
if (!top->IsFirst())
5352-
{
53535329
#if FEATURE_EH_CALLFINALLY_THUNKS
5354-
if (top->Prev()->KindIs(BBJ_CALLFINALLY))
5355-
{
5356-
// It must be a retless BBJ_CALLFINALLY if we get here.
5357-
assert(!top->Prev()->isBBCallFinallyPair());
5330+
if (top->Prev()->KindIs(BBJ_CALLFINALLY))
5331+
{
5332+
// It must be a retless BBJ_CALLFINALLY if we get here.
5333+
assert(!top->Prev()->isBBCallFinallyPair());
53585334

5359-
// If the block before the loop start is a retless BBJ_CALLFINALLY
5360-
// with FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment
5361-
// because it will affect reported EH region range. For x86 (where
5362-
// !FEATURE_EH_CALLFINALLY_THUNKS), we can allow this.
5335+
// If the block before the loop start is a retless BBJ_CALLFINALLY
5336+
// with FEATURE_EH_CALLFINALLY_THUNKS, we can't add alignment
5337+
// because it will affect reported EH region range. For x86 (where
5338+
// !FEATURE_EH_CALLFINALLY_THUNKS), we can allow this.
53635339

5364-
JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY block\n",
5365-
loop->GetIndex());
5366-
continue;
5367-
}
5340+
JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY block\n", loop->GetIndex());
5341+
return false;
5342+
}
53685343
#endif // FEATURE_EH_CALLFINALLY_THUNKS
53695344

5370-
if (top->Prev()->isBBCallFinallyPairTail())
5371-
{
5372-
// If the previous block is the BBJ_CALLFINALLYRET of a
5373-
// BBJ_CALLFINALLY/BBJ_CALLFINALLYRET pair, then we can't add alignment
5374-
// because we can't add instructions in that block. In the
5375-
// FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the
5376-
// reported EH, as above.
5377-
JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY/ALWAYS pair\n",
5378-
loop->GetIndex());
5379-
continue;
5380-
}
5381-
}
5345+
if (top->Prev()->isBBCallFinallyPairTail())
5346+
{
5347+
// If the previous block is the BBJ_ALWAYS of a
5348+
// BBJ_CALLFINALLY/BBJ_ALWAYS pair, then we can't add alignment
5349+
// because we can't add instructions in that block. In the
5350+
// FEATURE_EH_CALLFINALLY_THUNKS case, it would affect the
5351+
// reported EH, as above.
5352+
JITDUMP("Skipping alignment for " FMT_LP "; its top block follows a CALLFINALLY/ALWAYS pair\n",
5353+
loop->GetIndex());
5354+
return false;
5355+
}
53825356

5383-
// Now we have an innerloop candidate that might need alignment
5357+
// Now we have an innerloop candidate that might need alignment
53845358

5385-
weight_t topWeight = top->getBBWeight(this);
5386-
weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT;
5387-
if (topWeight >= compareWeight)
5388-
{
5389-
loopAlignCandidates++;
5390-
assert(!top->isLoopAlign());
5391-
top->SetFlags(BBF_LOOP_ALIGN);
5392-
JITDUMP("Aligning " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " >= " FMT_WT ".\n",
5393-
loop->GetIndex(), top->bbNum, topWeight, compareWeight);
5394-
}
5395-
else
5396-
{
5397-
JITDUMP("Skipping alignment for " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " < " FMT_WT ".\n",
5398-
loop->GetIndex(), top->bbNum, topWeight, compareWeight);
5399-
}
5359+
weight_t topWeight = top->getBBWeight(this);
5360+
weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT;
5361+
if (topWeight < compareWeight)
5362+
{
5363+
JITDUMP("Skipping alignment for " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " < " FMT_WT ".\n",
5364+
loop->GetIndex(), top->bbNum, topWeight, compareWeight);
5365+
return false;
54005366
}
5367+
5368+
JITDUMP("Aligning " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " >= " FMT_WT ".\n", loop->GetIndex(),
5369+
top->bbNum, topWeight, compareWeight);
5370+
return true;
54015371
}
54025372

54035373
//------------------------------------------------------------------------
@@ -5453,94 +5423,78 @@ PhaseStatus Compiler::placeLoopAlignInstructions()
54535423

54545424
BlockToNaturalLoopMap* blockToLoop = BlockToNaturalLoopMap::Build(loops);
54555425

5456-
optIdentifyLoopsForAlignment(loops, blockToLoop);
5426+
BitVecTraits loopTraits((unsigned)loops->NumLoops(), this);
5427+
BitVec seenLoops(BitVecOps::MakeEmpty(&loopTraits));
5428+
BitVec alignedLoops(BitVecOps::MakeEmpty(&loopTraits));
54575429

5458-
// Add align only if there were any loops that needed alignment
5459-
if (loopAlignCandidates == 0)
5460-
{
5461-
JITDUMP("Found no candidates for loop alignment\n");
5462-
return PhaseStatus::MODIFIED_NOTHING;
5463-
}
5464-
5465-
JITDUMP("Inside placeLoopAlignInstructions for %d loop candidates.\n", loopAlignCandidates);
5466-
5467-
bool madeChanges = false;
5468-
weight_t minBlockSoFar = BB_MAX_WEIGHT;
5469-
BasicBlock* bbHavingAlign = nullptr;
5470-
FlowGraphNaturalLoop* currentAlignedLoop = nullptr;
5471-
5472-
int loopsToProcess = loopAlignCandidates;
5430+
bool madeChanges = false;
5431+
weight_t minBlockSoFar = BB_MAX_WEIGHT;
5432+
BasicBlock* bbHavingAlign = nullptr;
54735433

54745434
for (BasicBlock* const block : Blocks())
54755435
{
54765436
FlowGraphNaturalLoop* loop = blockToLoop->GetLoop(block);
54775437

5478-
if (currentAlignedLoop != nullptr)
5438+
// If this is the first block of a loop then see if we should align it
5439+
if ((loop != nullptr) && BitVecOps::TryAddElemD(&loopTraits, seenLoops, loop->GetIndex()) &&
5440+
shouldAlignLoop(loop, block))
54795441
{
5480-
// We've been processing blocks within an aligned loop. Are we out of that loop now?
5481-
if (currentAlignedLoop != loop)
5482-
{
5483-
currentAlignedLoop = nullptr;
5484-
}
5485-
}
5486-
5487-
// If there is an unconditional jump that won't be removed
5488-
if (opts.compJitHideAlignBehindJmp && block->KindIs(BBJ_ALWAYS) && !block->CanRemoveJumpToNext(this))
5489-
{
5490-
// Track the lower weight blocks
5491-
if (block->bbWeight < minBlockSoFar)
5492-
{
5493-
if (currentAlignedLoop == nullptr)
5494-
{
5495-
// Ok to insert align instruction in this block because it is not part of any aligned loop.
5496-
minBlockSoFar = block->bbWeight;
5497-
bbHavingAlign = block;
5498-
JITDUMP(FMT_BB ", bbWeight=" FMT_WT " ends with unconditional 'jmp' \n", block->bbNum,
5499-
block->bbWeight);
5500-
}
5501-
}
5502-
}
5442+
block->SetFlags(BBF_LOOP_ALIGN);
5443+
BitVecOps::AddElemD(&loopTraits, alignedLoops, loop->GetIndex());
5444+
INDEBUG(loopAlignCandidates++);
55035445

5504-
if (!block->IsLast() && block->Next()->isLoopAlign())
5505-
{
5506-
// Loop alignment is disabled for cold blocks
5507-
assert(!block->HasFlag(BBF_COLD));
5508-
BasicBlock* const loopTop = block->Next();
5446+
BasicBlock* prev = block->Prev();
5447+
// shouldAlignLoop should have guaranteed these properties.
5448+
assert((prev != nullptr) && !prev->HasFlag(BBF_COLD));
55095449

55105450
if (bbHavingAlign == nullptr)
55115451
{
55125452
// If jmp was not found, then block before the loop start is where align instruction will be added.
55135453

5514-
bbHavingAlign = block;
5515-
JITDUMP("Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB "\n", block->bbNum,
5516-
loopTop->bbNum);
5454+
bbHavingAlign = prev;
5455+
JITDUMP("Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB "\n", prev->bbNum,
5456+
block->bbNum);
55175457
}
55185458
else
55195459
{
55205460
JITDUMP("Marking " FMT_BB " that ends with unconditional jump with BBF_HAS_ALIGN for loop at " FMT_BB
55215461
"\n",
5522-
bbHavingAlign->bbNum, loopTop->bbNum);
5462+
bbHavingAlign->bbNum, block->bbNum);
55235463
}
55245464

55255465
madeChanges = true;
55265466
bbHavingAlign->SetFlags(BBF_HAS_ALIGN);
55275467

5528-
minBlockSoFar = BB_MAX_WEIGHT;
5529-
bbHavingAlign = nullptr;
5530-
currentAlignedLoop = blockToLoop->GetLoop(loopTop);
5531-
assert(currentAlignedLoop != nullptr);
5468+
minBlockSoFar = BB_MAX_WEIGHT;
5469+
bbHavingAlign = nullptr;
55325470

5533-
if (--loopsToProcess == 0)
5471+
continue;
5472+
}
5473+
5474+
// Otherwise check if this is a candidate block for placing alignment for a future loop.
5475+
// If there is an unconditional jump that won't be removed
5476+
if (opts.compJitHideAlignBehindJmp && block->KindIs(BBJ_ALWAYS) && !block->CanRemoveJumpToNext(this))
5477+
{
5478+
// Track the lower weight blocks
5479+
if (block->bbWeight < minBlockSoFar)
55345480
{
5535-
break;
5481+
if ((loop == nullptr) || !BitVecOps::IsMember(&loopTraits, alignedLoops, loop->GetIndex()))
5482+
{
5483+
// Ok to insert align instruction in this block because it is not part of any aligned loop.
5484+
minBlockSoFar = block->bbWeight;
5485+
bbHavingAlign = block;
5486+
JITDUMP(FMT_BB ", bbWeight=" FMT_WT " ends with unconditional 'jmp' \n", block->bbNum,
5487+
block->bbWeight);
5488+
}
55365489
}
55375490
}
55385491
}
55395492

5540-
assert(loopsToProcess == 0);
5493+
JITDUMP("Found %u candidates for loop alignment\n", loopAlignCandidates);
55415494

55425495
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
55435496
}
5497+
55445498
#endif
55455499

55465500
//------------------------------------------------------------------------

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5375,7 +5375,7 @@ class Compiler
53755375
bool fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast);
53765376

53775377
#if FEATURE_LOOP_ALIGN
5378-
void optIdentifyLoopsForAlignment(FlowGraphNaturalLoops* loops, BlockToNaturalLoopMap* blockToLoop);
5378+
bool shouldAlignLoop(FlowGraphNaturalLoop* loop, BasicBlock* top);
53795379
PhaseStatus placeLoopAlignInstructions();
53805380
#endif
53815381

@@ -7104,7 +7104,6 @@ class Compiler
71047104
bool optLoopTableValid; // info in loop table should be valid
71057105
bool optLoopsRequirePreHeaders; // Do we require that all loops (in the loop table) have pre-headers?
71067106
unsigned char optLoopCount; // number of tracked loops
7107-
unsigned loopAlignCandidates; // number of loops identified for alignment
71087107

71097108
// Every time we rebuild the loop table, we increase the global "loop epoch". Any loop indices or
71107109
// loop table pointers from the previous epoch are invalid.
@@ -7118,7 +7117,8 @@ class Compiler
71187117
}
71197118

71207119
#ifdef DEBUG
7121-
unsigned char loopsAligned; // number of loops actually aligned
7120+
unsigned loopAlignCandidates; // number of candidates identified by placeLoopAlignInstructions
7121+
unsigned loopsAligned; // number of loops actually aligned
71227122
#endif // DEBUG
71237123

71247124
bool optRecordLoop(BasicBlock* head,

0 commit comments

Comments
 (0)