@@ -89,6 +89,64 @@ u32 IRRunMemCheck(u32 pc, u32 addr) {
8989 return coreState != CORE_RUNNING ? 1 : 0 ;
9090}
9191
92+ void IRApplyRounding (MIPSState *mips) {
93+ u32 fcr1Bits = mips->fcr31 & 0x01000003 ;
94+ // If these are 0, we just leave things as they are.
95+ if (fcr1Bits) {
96+ int rmode = fcr1Bits & 3 ;
97+ bool ftz = (fcr1Bits & 0x01000000 ) != 0 ;
98+ #if PPSSPP_ARCH(SSE2)
99+ u32 csr = _mm_getcsr () & ~0x6000 ;
100+ // Translate the rounding mode bits to X86, the same way as in Asm.cpp.
101+ if (rmode & 1 ) {
102+ rmode ^= 2 ;
103+ }
104+ csr |= rmode << 13 ;
105+
106+ if (ftz) {
107+ // Flush to zero
108+ csr |= 0x8000 ;
109+ }
110+ _mm_setcsr (csr);
111+ #elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
112+ // On ARM64 we need to use inline assembly for a portable solution.
113+ // Unfortunately we don't have this possibility on Windows with MSVC, so ifdeffed out above.
114+ // Note that in the JIT, for fcvts, we use specific conversions. We could use the FCVTS variants
115+ // directly through inline assembly.
116+ u64 fpcr; // not really 64-bit, just to match the register size.
117+ asm volatile (" mrs %0, fpcr" : " =r" (fpcr));
118+
119+ // Translate MIPS to ARM rounding mode
120+ static const u8 lookup[4 ] = {0 , 3 , 1 , 2 };
121+
122+ fpcr &= ~(3 << 22 ); // Clear bits [23:22]
123+ fpcr |= (lookup[rmode] << 22 );
124+
125+ if (ftz) {
126+ fpcr |= 1 << 24 ;
127+ }
128+ // Write back the modified FPCR
129+ asm volatile (" msr fpcr, %0" : : " r" (fpcr));
130+ #endif
131+ }
132+ }
133+
134+ void IRRestoreRounding () {
135+ #if PPSSPP_ARCH(SSE2)
136+ // TODO: We should avoid this if we didn't apply rounding in the first place.
137+ // In the meantime, clear out FTZ and rounding mode bits.
138+ u32 csr = _mm_getcsr ();
139+ csr &= ~(7 << 13 );
140+ _mm_setcsr (csr);
141+ #elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
142+ u64 fpcr; // not really 64-bit, just to match the regsiter size.
143+ asm volatile (" mrs %0, fpcr" : " =r" (fpcr));
144+ fpcr &= ~(7 << 22 ); // Clear bits [23:22] for rounding, 24 for FTZ
145+ // Write back the modified FPCR
146+ asm volatile (" msr fpcr, %0" : : " r" (fpcr));
147+ #endif
148+ }
149+
92150// We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
93151u32 IRInterpret (MIPSState *mips, const IRInst *inst) {
94152 while (true ) {
@@ -565,9 +623,11 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
565623 }
566624 break ;
567625
568- // Not quickly implementable on all platforms, unfortunately.
569626 case IROp::Vec4Dot:
570627 {
628+ // Not quickly implementable on all platforms, unfortunately.
629+ // Though, this is still pretty fast compared to one split into multiple IR instructions.
630+ // This might be good though: https://stackoverflow.com/a/17004629
571631 float dot = mips->f [inst->src1 ] * mips->f [inst->src2 ];
572632 for (int i = 1 ; i < 4 ; i++)
573633 dot += mips->f [inst->src1 + i] * mips->f [inst->src2 + i];
@@ -826,9 +886,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
826886 mips->f [inst->dest ] = vfpu_clamp (mips->f [inst->src1 ], -1 .0f , 1 .0f );
827887 break ;
828888
829- // Bitwise trickery
830889 case IROp::FSign:
831890 {
891+ // Bitwise trickery
832892 u32 val;
833893 memcpy (&val, &mips->f [inst->src1 ], sizeof (u32 ));
834894 if (val == 0 || val == 0x80000000 )
@@ -956,6 +1016,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
9561016 mips->fs [inst->dest ] = my_isinf (src) && src < 0 .0f ? -2147483648LL : 2147483647LL ;
9571017 break ;
9581018 }
1019+ // TODO: Inline assembly to use here would be better.
9591020 switch (IRRoundMode (mips->fcr31 & 3 )) {
9601021 case IRRoundMode::RINT_0: mips->fs [inst->dest ] = (int )round_ieee_754 (src); break ;
9611022 case IRRoundMode::CAST_1: mips->fs [inst->dest ] = (int )src; break ;
@@ -1097,10 +1158,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
10971158 break ;
10981159
10991160 case IROp::ApplyRoundingMode:
1100- // TODO: Implement
1161+ IRApplyRounding (mips);
11011162 break ;
11021163 case IROp::RestoreRoundingMode:
1103- // TODO: Implement
1164+ IRRestoreRounding ();
11041165 break ;
11051166 case IROp::UpdateRoundingMode:
11061167 // TODO: Implement
0 commit comments