Skip to content

Commit a5ff7e6

Browse files
authored
Intrinsify Interlocked.And and Interlocked.Or on XARCH (#96258)
1 parent 51f6d8d commit a5ff7e6

4 files changed

Lines changed: 75 additions & 11 deletions

File tree

src/coreclr/jit/codegenxarch.cpp

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2047,12 +2047,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
20472047

20482048
case GT_XCHG:
20492049
case GT_XADD:
2050-
genLockedInstructions(treeNode->AsOp());
2051-
break;
2052-
20532050
case GT_XORR:
20542051
case GT_XAND:
2055-
NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet.");
2052+
genLockedInstructions(treeNode->AsOp());
20562053
break;
20572054

20582055
case GT_MEMORYBARRIER:
@@ -4413,7 +4410,7 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node)
44134410
//
44144411
void CodeGen::genLockedInstructions(GenTreeOp* node)
44154412
{
4416-
assert(node->OperIs(GT_XADD, GT_XCHG));
4413+
assert(node->OperIs(GT_XADD, GT_XCHG, GT_XORR, GT_XAND));
44174414

44184415
GenTree* addr = node->gtGetOp1();
44194416
GenTree* data = node->gtGetOp2();
@@ -4425,6 +4422,56 @@ void CodeGen::genLockedInstructions(GenTreeOp* node)
44254422

44264423
genConsumeOperands(node);
44274424

4425+
if (node->OperIs(GT_XORR, GT_XAND))
4426+
{
4427+
const instruction ins = node->OperIs(GT_XORR) ? INS_or : INS_and;
4428+
4429+
if (node->IsUnusedValue())
4430+
{
4431+
// If value is not used we can emit a short form:
4432+
//
4433+
// lock
4434+
// or/and dword ptr [addrReg], val
4435+
//
4436+
instGen(INS_lock);
4437+
GetEmitter()->emitIns_AR_R(ins, size, data->GetRegNum(), addr->GetRegNum(), 0);
4438+
}
4439+
else
4440+
{
4441+
// When value is used (it's the original value of the memory location)
4442+
// we fallback to cmpxchg-loop idiom.
4443+
4444+
// for cmpxchg we need to keep the original value in RAX
4445+
assert(node->GetRegNum() == REG_RAX);
4446+
4447+
// mov RAX, dword ptr [addrReg]
4448+
//.LOOP:
4449+
// mov tmp, RAX
4450+
// or/and tmp, val
4451+
// lock
4452+
// cmpxchg dword ptr [addrReg], tmp
4453+
// jne .LOOP
4454+
// ret
4455+
4456+
// Extend liveness of addr
4457+
gcInfo.gcMarkRegPtrVal(addr->GetRegNum(), addr->TypeGet());
4458+
4459+
const regNumber tmpReg = node->GetSingleTempReg();
4460+
GetEmitter()->emitIns_R_AR(INS_mov, size, REG_RAX, addr->GetRegNum(), 0);
4461+
BasicBlock* loop = genCreateTempLabel();
4462+
genDefineTempLabel(loop);
4463+
GetEmitter()->emitIns_Mov(INS_mov, size, tmpReg, REG_RAX, false);
4464+
GetEmitter()->emitIns_R_R(ins, size, tmpReg, data->GetRegNum());
4465+
instGen(INS_lock);
4466+
GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, tmpReg, addr->GetRegNum(), 0);
4467+
inst_JMP(EJ_jne, loop);
4468+
4469+
gcInfo.gcMarkRegSetNpt(genRegMask(addr->GetRegNum()));
4470+
genProduceReg(node);
4471+
}
4472+
return;
4473+
}
4474+
44284475
// If the destination register is different from the data register then we need
44294476
// to first move the data to the target register. Make sure we don't overwrite
44304477
// the address, the register allocator should have taken care of this.

src/coreclr/jit/importercalls.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,14 +3231,20 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
32313231
break;
32323232
}
32333233

3234-
#if defined(TARGET_ARM64) || defined(TARGET_RISCV64)
3235-
// Intrinsify Interlocked.Or and Interlocked.And only for arm64-v8.1 (and newer) and for RV64A
3236-
// TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239).
3234+
#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_XARCH)
32373235
case NI_System_Threading_Interlocked_Or:
32383236
case NI_System_Threading_Interlocked_And:
32393237
{
3240-
ARM64_ONLY(if (compOpportunisticallyDependsOn(InstructionSet_Atomics)))
3238+
#if defined(TARGET_ARM64)
3239+
if (compOpportunisticallyDependsOn(InstructionSet_Atomics))
3240+
#endif
32413241
{
3242+
#if defined(TARGET_X86)
3243+
if (genActualType(callType) == TYP_LONG)
3244+
{
3245+
break;
3246+
}
3247+
#endif
32423248
assert(sig->numArgs == 2);
32433249
GenTree* op2 = impPopStack().val;
32443250
GenTree* op1 = impPopStack().val;

src/coreclr/jit/lower.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,8 +640,6 @@ GenTree* Lowering::LowerNode(GenTree* node)
640640
CheckImmedAndMakeContained(node, node->AsOp()->gtOp2);
641641
break;
642642
#elif defined(TARGET_XARCH)
643-
case GT_XORR:
644-
case GT_XAND:
645643
case GT_XADD:
646644
if (node->IsUnusedValue())
647645
{

src/coreclr/jit/lsraxarch.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,19 @@ int LinearScan::BuildNode(GenTree* tree)
436436

437437
case GT_XORR:
438438
case GT_XAND:
439+
if (!tree->IsUnusedValue())
440+
{
441+
// if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX)
442+
buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX);
443+
BuildUse(tree->gtGetOp1(), availableIntRegs & ~RBM_RAX);
444+
BuildUse(tree->gtGetOp2(), availableIntRegs & ~RBM_RAX);
445+
BuildDef(tree, RBM_RAX);
446+
buildInternalRegisterUses();
447+
srcCount = 2;
448+
assert(dstCount == 1);
449+
break;
450+
}
451+
FALLTHROUGH;
439452
case GT_XADD:
440453
case GT_XCHG:
441454
{

0 commit comments

Comments
 (0)