Skip to content

Commit 7bc7693

Browse files
authored
Merge pull request #19280 from hrydgard/ir-interpreter-rounding
Implement FPU rounding mode support in the IR interpreter
2 parents 03b68fd + 06e636b commit 7bc7693

File tree

6 files changed

+74
-7
lines changed

6 files changed

+74
-7
lines changed

Core/HLE/sceKernel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ void sceKernelExitGame()
322322
__KernelSwitchOffThread("game exited");
323323
Core_Stop();
324324

325-
g_OSD.Show(OSDType::MESSAGE_INFO, "sceKernelExitGame()");
325+
g_OSD.Show(OSDType::MESSAGE_INFO, "sceKernelExitGame()", 0.0f, "kernelexit");
326326
}
327327

328328
void sceKernelExitGameWithStatus()

Core/MIPS/IR/IRCompFPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ void IRFrontend::Comp_mxc1(MIPSOpcode op) {
223223
// Set rounding mode
224224
RestoreRoundingMode();
225225
ir.Write(IROp::FpCtrlFromReg, 0, rt);
226+
// TODO: Do the UpdateRoundingMode check at runtime?
226227
UpdateRoundingMode();
227228
ApplyRoundingMode();
228229
} else {

Core/MIPS/IR/IRFrontend.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,9 @@ bool IRFrontend::CheckRounding(u32 blockAddress) {
133133
js.startDefaultPrefix = false;
134134
cleanSlate = true;
135135
}
136-
137136
return cleanSlate;
138137
}
139138

140-
141139
void IRFrontend::Comp_ReplacementFunc(MIPSOpcode op) {
142140
int index = op.encoding & MIPS_EMUHACK_VALUE_MASK;
143141

Core/MIPS/IR/IRInterpreter.cpp

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,64 @@ u32 IRRunMemCheck(u32 pc, u32 addr) {
8989
return coreState != CORE_RUNNING ? 1 : 0;
9090
}
9191

92+
void IRApplyRounding(MIPSState *mips) {
93+
u32 fcr1Bits = mips->fcr31 & 0x01000003;
94+
// If these are 0, we just leave things as they are.
95+
if (fcr1Bits) {
96+
int rmode = fcr1Bits & 3;
97+
bool ftz = (fcr1Bits & 0x01000000) != 0;
98+
#if PPSSPP_ARCH(SSE2)
99+
u32 csr = _mm_getcsr() & ~0x6000;
100+
// Translate the rounding mode bits to X86, the same way as in Asm.cpp.
101+
if (rmode & 1) {
102+
rmode ^= 2;
103+
}
104+
csr |= rmode << 13;
105+
106+
if (ftz) {
107+
// Flush to zero
108+
csr |= 0x8000;
109+
}
110+
_mm_setcsr(csr);
111+
#elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
112+
// On ARM64 we need to use inline assembly for a portable solution.
113+
// Unfortunately we don't have this possibility on Windows with MSVC, so ifdeffed out above.
114+
// Note that in the JIT, for fcvts, we use specific conversions. We could use the FCVTS variants
115+
// directly through inline assembly.
116+
u64 fpcr; // not really 64-bit, just to match the register size.
117+
asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
118+
119+
// Translate MIPS to ARM rounding mode
120+
static const u8 lookup[4] = {0, 3, 1, 2};
121+
122+
fpcr &= ~(3 << 22); // Clear bits [23:22]
123+
fpcr |= (lookup[rmode] << 22);
124+
125+
if (ftz) {
126+
fpcr |= 1 << 24;
127+
}
128+
// Write back the modified FPCR
129+
asm volatile ("msr fpcr, %0" : : "r" (fpcr));
130+
#endif
131+
}
132+
}
133+
134+
void IRRestoreRounding() {
135+
#if PPSSPP_ARCH(SSE2)
136+
// TODO: We should avoid this if we didn't apply rounding in the first place.
137+
// In the meantime, clear out FTZ and rounding mode bits.
138+
u32 csr = _mm_getcsr();
139+
csr &= ~(7 << 13);
140+
_mm_setcsr(csr);
141+
#elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
142+
u64 fpcr; // not really 64-bit, just to match the regsiter size.
143+
asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
144+
fpcr &= ~(7 << 22); // Clear bits [23:22] for rounding, 24 for FTZ
145+
// Write back the modified FPCR
146+
asm volatile ("msr fpcr, %0" : : "r" (fpcr));
147+
#endif
148+
}
149+
92150
// We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
93151
u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
94152
while (true) {
@@ -565,9 +623,11 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
565623
}
566624
break;
567625

568-
// Not quickly implementable on all platforms, unfortunately.
569626
case IROp::Vec4Dot:
570627
{
628+
// Not quickly implementable on all platforms, unfortunately.
629+
// Though, this is still pretty fast compared to one split into multiple IR instructions.
630+
// This might be good though: https://stackoverflow.com/a/17004629
571631
float dot = mips->f[inst->src1] * mips->f[inst->src2];
572632
for (int i = 1; i < 4; i++)
573633
dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
@@ -826,9 +886,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
826886
mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f);
827887
break;
828888

829-
// Bitwise trickery
830889
case IROp::FSign:
831890
{
891+
// Bitwise trickery
832892
u32 val;
833893
memcpy(&val, &mips->f[inst->src1], sizeof(u32));
834894
if (val == 0 || val == 0x80000000)
@@ -956,6 +1016,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
9561016
mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL;
9571017
break;
9581018
}
1019+
// TODO: Inline assembly to use here would be better.
9591020
switch (IRRoundMode(mips->fcr31 & 3)) {
9601021
case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(src); break;
9611022
case IRRoundMode::CAST_1: mips->fs[inst->dest] = (int)src; break;
@@ -1097,10 +1158,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
10971158
break;
10981159

10991160
case IROp::ApplyRoundingMode:
1100-
// TODO: Implement
1161+
IRApplyRounding(mips);
11011162
break;
11021163
case IROp::RestoreRoundingMode:
1103-
// TODO: Implement
1164+
IRRestoreRounding();
11041165
break;
11051166
case IROp::UpdateRoundingMode:
11061167
// TODO: Implement

Core/MIPS/IR/IRInterpreter.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ u32 IRRunBreakpoint(u32 pc);
1111
u32 IRRunMemCheck(u32 pc, u32 addr);
1212
u32 IRInterpret(MIPSState *ms, const IRInst *inst);
1313

14+
void IRApplyRounding();
15+
void IRRestoreRounding();
16+
1417
template <uint32_t alignment>
1518
u32 RunValidateAddress(u32 pc, u32 addr, u32 isWrite) {
1619
const auto toss = [&](MemoryExceptionType t) {

android/jni/TestRunner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ bool TestsAvailable() {
6666
// Hack to easily run the tests on Windows from the submodule
6767
if (File::IsDirectory(Path("../pspautotests"))) {
6868
testDirectory = Path("..");
69+
} else if (File::IsDirectory(Path("pspautotests"))) {
70+
testDirectory = Path(".");
6971
}
7072
return File::Exists(testDirectory / "pspautotests" / "tests");
7173
}
@@ -80,6 +82,8 @@ bool RunTests() {
8082
// Hack to easily run the tests on Windows from the submodule
8183
if (File::IsDirectory(Path("../pspautotests"))) {
8284
baseDirectory = Path("..");
85+
} else if (File::IsDirectory(Path("pspautotests"))) {
86+
baseDirectory = Path(".");
8387
}
8488
#endif
8589

0 commit comments

Comments
 (0)