From c81da30538bf7ff749b857dfb9659a51062c1766 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 10 Nov 2024 15:55:26 -0500 Subject: [PATCH] GXPerf OK --- configure.py | 4 +-- include/dolphin/gx/GX.h | 22 +++++++++--- src/dolphin/gx/GXBump.c | 4 +-- src/dolphin/gx/GXFifo.c | 16 ++++----- src/dolphin/gx/GXInit.c | 80 ++++++++++------------------------------- src/dolphin/gx/GXPerf.c | 29 +++++++-------- src/dolphin/gx/GXTev.c | 44 +++++++++++------------ 7 files changed, 85 insertions(+), 114 deletions(-) diff --git a/configure.py b/configure.py index 770a1f6d..82db35ad 100644 --- a/configure.py +++ b/configure.py @@ -375,7 +375,7 @@ def MatchingFor(*versions): Object(Matching, "f_ap/f_ap_game.cpp"), # f_op - Object(Matching, "f_op/f_op_actor.cpp", extra_cflags=['-pragma "nosyminline on"']), + Object(Matching, "f_op/f_op_actor.cpp", extra_cflags=["-sym off"]), Object(Matching, "f_op/f_op_actor_iter.cpp"), Object(Matching, "f_op/f_op_actor_tag.cpp"), Object(Matching, "f_op/f_op_actor_mng.cpp", extra_cflags=['-pragma "nosyminline on"']), @@ -1165,7 +1165,7 @@ def MatchingFor(*versions): Object(NonMatching, "dolphin/gx/GXStubs.c"), Object(NonMatching, "dolphin/gx/GXDisplayList.c"), Object(NonMatching, "dolphin/gx/GXTransform.c", extra_cflags=["-fp_contract off"]), - Object(NonMatching, "dolphin/gx/GXPerf.c"), + Object(Matching, "dolphin/gx/GXPerf.c"), ], ), DolphinLib( diff --git a/include/dolphin/gx/GX.h b/include/dolphin/gx/GX.h index ffc57848..dbd7f0345 100644 --- a/include/dolphin/gx/GX.h +++ b/include/dolphin/gx/GX.h @@ -40,10 +40,8 @@ extern "C" { (reg) = ((u32) (reg) & ~(((1 << (nbits)) - 1) << (shift))) | \ ((u32) (value) << (shift)); -#define FAST_FLAG_SET(regOrg, newFlag, shift, size) \ - do { \ - (regOrg) = (u32)__rlwimi((int)(regOrg), (int)(newFlag), (shift), (32 - (shift) - (size)), (31 - (shift))); \ - } while (0); +#define SET_REG_FIELD(reg, size, shift, val) \ + (reg) = ((u32)(reg) & ~(((1 << (size)) - 1) << (shift))) | ((u32)(val) << (shift)); \ #define GX_LOAD_BP_REG 0x61 #define GX_NOP 0 @@ -287,6 +285,22 @@ do { \ regAddr = addr; \ } while (0) +static inline u32 __GXReadCPCounterU32(u32 regAddrL, u32 regAddrH) { + u32 ctrH0; + u32 ctrH1; + u32 ctrL; + + ctrH0 = GX_GET_CP_REG(regAddrH); + + do { + ctrH1 = ctrH0; + ctrL = GX_GET_CP_REG(regAddrL); + ctrH0 = GX_GET_CP_REG(regAddrH); + } while (ctrH0 != ctrH1); + + return (ctrH0 << 0x10) | ctrL; +} + #ifdef __cplusplus }; #endif diff --git a/src/dolphin/gx/GXBump.c b/src/dolphin/gx/GXBump.c index 8e0c8e44..1d7aa812 100644 --- a/src/dolphin/gx/GXBump.c +++ b/src/dolphin/gx/GXBump.c @@ -10,7 +10,6 @@ void GXSetTevIndirect(GXTevStageID tevStage, GXIndTexStageID texStage, GXIndTexF GXIndTexBiasSel biasSel, GXIndTexMtxID mtxID, GXIndTexWrap wrapS, GXIndTexWrap wrapT, u8 addPrev, u8 utcLod, GXIndTexAlphaSel alphaSel) { u32 field = 0; - u32 stage = tevStage + 0x10; GX_BITFIELD_SET(field, 30, 2, texStage); GX_BITFIELD_SET(field, 28, 2, texFmt); @@ -21,7 +20,7 @@ void GXSetTevIndirect(GXTevStageID tevStage, GXIndTexStageID texStage, GXIndTexF GX_BITFIELD_SET(field, 13, 3, wrapT); GX_BITFIELD_SET(field, 12, 1, utcLod); GX_BITFIELD_SET(field, 11, 1, addPrev); - GX_BITFIELD_SET(field, 0, 8, stage); + GX_BITFIELD_SET(field, 0, 8, tevStage + 0x10); GXFIFO.u8 = 0x61; GXFIFO.s32 = field; @@ -33,6 +32,7 @@ void GXSetIndTexMtx(GXIndTexMtxID mtxID, f32 offset[6], s8 scale_exp) { u32 val; u32 field; f32 mtx2[6]; + u32 stack_padding[6]; scale_exp += 17; diff --git a/src/dolphin/gx/GXFifo.c b/src/dolphin/gx/GXFifo.c index 6cf71457..13e33336 100644 --- a/src/dolphin/gx/GXFifo.c +++ b/src/dolphin/gx/GXFifo.c @@ -37,7 +37,7 @@ static void GXUnderflowHandler() { static void GXBreakPointHandler(OSContext* context) { OSContext bpContext; - FAST_FLAG_SET(gx->cpEnable, 0, 5, 1); + SET_REG_FIELD(gx->cpEnable, 1, 5, 2); GX_SET_CP_REG(1, gx->cpEnable); if (BreakPointCB) { @@ -233,12 +233,12 @@ void __GXFifoInit(void) { } void __GXFifoReadEnable(void) { - FAST_FLAG_SET(gx->cpEnable, 1, 0, 1); + SET_REG_FIELD(gx->cpEnable, 1, 0, 2); GX_SET_CP_REG(1, gx->cpEnable); } void __GXFifoReadDisable(void) { - FAST_FLAG_SET(gx->cpEnable, 0, 0, 1); + SET_REG_FIELD(gx->cpEnable, 1, 0, 2); GX_SET_CP_REG(1, gx->cpEnable); } @@ -249,19 +249,19 @@ void __GXFifoLink(u8 link) { } else { b = 0; } - FAST_FLAG_SET(gx->cpEnable, b, 4, 1); + SET_REG_FIELD(gx->cpEnable, 1, 4, 2); GX_SET_CP_REG(1, gx->cpEnable); } void __GXWriteFifoIntEnable(u32 p1, u32 p2) { - FAST_FLAG_SET(gx->cpEnable, p1, 2, 1); - FAST_FLAG_SET(gx->cpEnable, (u8)p2, 3, 1); + SET_REG_FIELD(gx->cpEnable, 1, 2, 2); + SET_REG_FIELD(gx->cpEnable, 1, 3, 2); GX_SET_CP_REG(1, gx->cpEnable); } void __GXWriteFifoIntReset(u32 p1, u32 p2) { - FAST_FLAG_SET(gx->cpClr, p1, 0, 1); - FAST_FLAG_SET(gx->cpClr, (u8)p2, 1, 1); + SET_REG_FIELD(gx->cpClr, 1, 0, 2); + SET_REG_FIELD(gx->cpClr, 1, 1, 2); GX_SET_CP_REG(2, gx->cpClr); } diff --git a/src/dolphin/gx/GXInit.c b/src/dolphin/gx/GXInit.c index db232c22..0ee66d1b 100644 --- a/src/dolphin/gx/GXInit.c +++ b/src/dolphin/gx/GXInit.c @@ -48,48 +48,6 @@ u16* __cpReg; /* ############################################################################################## */ u32* __piReg; -inline void __GXInitRevisionBits(void) { - u32 i; - for (i = 0; i < 8; i++) { - FAST_FLAG_SET(gx->vatA[i], 1, 30, 33); - FAST_FLAG_SET(gx->vatB[i], 1, 31, 33); - - GXFIFO.u8 = 0x8; - GXFIFO.u8 = i | 0x80; - GXFIFO.u32 = gx->vatB[i]; - } - - { - u32 reg1 = 0; - u32 reg2 = 0; - - FAST_FLAG_SET(reg1, 1, 0, 1); - FAST_FLAG_SET(reg1, 1, 1, 1); - FAST_FLAG_SET(reg1, 1, 2, 1); - FAST_FLAG_SET(reg1, 1, 3, 1); - FAST_FLAG_SET(reg1, 1, 4, 1); - FAST_FLAG_SET(reg1, 1, 5, 1); - GXFIFO.u8 = 0x10; - GXFIFO.u32 = 0x1000; - GXFIFO.u32 = reg1; - - FAST_FLAG_SET(reg2, 1, 0, 1); - GXFIFO.u8 = 0x10; - GXFIFO.u32 = 0x1012; - GXFIFO.u32 = reg2; - } - - { - u32 reg = 0; - FAST_FLAG_SET(reg, 1, 0, 1); - FAST_FLAG_SET(reg, 1, 1, 1); - FAST_FLAG_SET(reg, 1, 2, 1); - FAST_FLAG_SET(reg, 1, 3, 1); - FAST_FLAG_SET(reg, 0x58, 24, 8); - GFWriteBPCmd(reg); - } -} - static u16 DefaultTexData[] ALIGN_DECL(32) = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, @@ -157,13 +115,13 @@ GXFifoObj* GXInit(void* base, u32 size) { EnableWriteGatherPipe(); gx->genMode = 0; - FAST_FLAG_SET(gx->genMode, 0, 24, 8); + SET_REG_FIELD(gx->genMode, 8, 24, 2); gx->bpMask = 255; - FAST_FLAG_SET(gx->bpMask, 0xF, 24, 8); + SET_REG_FIELD(gx->bpMask, 8, 24, 2); gx->lpSize = 0; - FAST_FLAG_SET(gx->lpSize, 34, 24, 8); + SET_REG_FIELD(gx->lpSize, 8, 24, 2); for (i = 0; i < GX_MAX_TEVSTAGE; i++) { gx->tevc[i] = 0; @@ -171,33 +129,33 @@ GXFifoObj* GXInit(void* base, u32 size) { gx->tref[i / 2] = 0; gx->texmapId[i] = GX_TEXMAP_NULL; - FAST_FLAG_SET(gx->tevc[i], 0xC0 + i * 2, 24, 8); - FAST_FLAG_SET(gx->teva[i], 0xC1 + i * 2, 24, 8); - FAST_FLAG_SET(gx->tevKsel[i / 2], 0xF6 + i / 2, 24, 8); - FAST_FLAG_SET(gx->tref[i / 2], 0x28 + i / 2, 24, 8); + SET_REG_FIELD(gx->tevc[i], 8, 24, 2); + SET_REG_FIELD(gx->teva[i], 8, 24, 2); + SET_REG_FIELD(gx->tevKsel[i / 2], 8, 24, 2); + SET_REG_FIELD(gx->tref[i / 2], 8, 24, 2); } gx->iref = 0; - FAST_FLAG_SET(gx->iref, 0x27, 24, 8); + SET_REG_FIELD(gx->iref, 8, 24, 2); for (i = 0; i < GX_MAXCOORD; i++) { gx->suTs0[i] = 0; gx->suTs1[i] = 0; - FAST_FLAG_SET(gx->suTs0[i], 0x30 + i * 2, 24, 8); - FAST_FLAG_SET(gx->suTs1[i], 0x31 + i * 2, 24, 8); + SET_REG_FIELD(gx->suTs0[i], 8, 24, 2); + SET_REG_FIELD(gx->suTs1[i], 8, 24, 2); } - FAST_FLAG_SET(gx->suScis0, 0x20, 24, 8); - FAST_FLAG_SET(gx->suScis1, 0x21, 24, 8); + SET_REG_FIELD(gx->suScis0, 8, 24, 2); + SET_REG_FIELD(gx->suScis1, 8, 24, 2); - FAST_FLAG_SET(gx->cmode0, 0x41, 24, 8); - FAST_FLAG_SET(gx->cmode1, 0x42, 24, 8); + SET_REG_FIELD(gx->cmode0, 8, 24, 2); + SET_REG_FIELD(gx->cmode1, 8, 24, 2); - FAST_FLAG_SET(gx->zmode, 0x40, 24, 8); - FAST_FLAG_SET(gx->peCtrl, 0x43, 24, 8); + SET_REG_FIELD(gx->zmode, 8, 24, 2); + SET_REG_FIELD(gx->peCtrl, 8, 24, 2); - FAST_FLAG_SET(gx->cpTex, 0, 7, 2); + SET_REG_FIELD(gx->cpTex, 2, 7, 2); gx->zScale = 1.6777216E7f; gx->zOffset = 0.0f; @@ -223,8 +181,6 @@ GXFifoObj* GXInit(void* base, u32 size) { GFWriteBPCmd(val1); } - __GXInitRevisionBits(); - for (i = 0; i < GX_MAX_TEXMAP; i++) { GXInitTexCacheRegion(&gx->TexRegions0[i], GX_FALSE, GXTexRegionAddrTable[i], GX_TEXCACHE_32K, GXTexRegionAddrTable[i + 8], GX_TEXCACHE_32K); @@ -244,7 +200,7 @@ GXFifoObj* GXInit(void* base, u32 size) { GX_SET_CP_REG(3, 0); - FAST_FLAG_SET(gx->perfSel, 0, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 2); GXFIFO.u8 = 0x8; GXFIFO.u8 = 0x20; diff --git a/src/dolphin/gx/GXPerf.c b/src/dolphin/gx/GXPerf.c index 5ff6d6a5..bb7cf584 100644 --- a/src/dolphin/gx/GXPerf.c +++ b/src/dolphin/gx/GXPerf.c @@ -75,7 +75,7 @@ void GXSetGPMetric(GXPerf0 perf0, GXPerf1 perf1) { case GX_PERF1_VC_STREAMBUF_LOW: case GX_PERF1_VC_ALL_STALLS: case GX_PERF1_VERTICES: - FAST_FLAG_SET(gx->perfSel, 0, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 0); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; @@ -266,49 +266,49 @@ void GXSetGPMetric(GXPerf0 perf0, GXPerf1 perf1) { break; case GX_PERF1_VC_ELEMQ_FULL: - FAST_FLAG_SET(gx->perfSel, 2, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 2); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VC_MISSQ_FULL: - FAST_FLAG_SET(gx->perfSel, 3, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 3); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VC_MEMREQ_FULL: - FAST_FLAG_SET(gx->perfSel, 4, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 4); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VC_STATUS7: - FAST_FLAG_SET(gx->perfSel, 5, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 5); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VC_MISSREP_FULL: - FAST_FLAG_SET(gx->perfSel, 6, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 6); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VC_STREAMBUF_LOW: - FAST_FLAG_SET(gx->perfSel, 7, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 7); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VC_ALL_STALLS: - FAST_FLAG_SET(gx->perfSel, 9, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 9); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; break; case GX_PERF1_VERTICES: - FAST_FLAG_SET(gx->perfSel, 8, 4, 4); + SET_REG_FIELD(gx->perfSel, 4, 4, 8); GXFIFO.u8 = 8; GXFIFO.u8 = 32; GXFIFO.u32 = gx->perfSel; @@ -339,10 +339,11 @@ void GXClearGPMetric(void) { } #pragma scheduling off -void GXReadXfRasMetric(u32* xfWaitIn, u32* xfWaitOut, u32* rasBusy, u32* clocks) { - // *rasBusy = GXReadCPReg(32, 33); - // *clocks = GXReadCPReg(34, 35); - // *xfWaitIn = GXReadCPReg(36, 37); - // *xfWaitOut = GXReadCPReg(38, 39); +void GXReadXfRasMetric(u32 *xf_wait_in, u32 *xf_wait_out, u32 *ras_busy, u32 *clocks) +{ + *ras_busy = __GXReadCPCounterU32(32, 33); + *clocks = __GXReadCPCounterU32(34, 35); + *xf_wait_in = __GXReadCPCounterU32(36, 37); + *xf_wait_out = __GXReadCPCounterU32(38, 39); } #pragma scheduling reset diff --git a/src/dolphin/gx/GXTev.c b/src/dolphin/gx/GXTev.c index b3c2946a..7ede4e9a 100644 --- a/src/dolphin/gx/GXTev.c +++ b/src/dolphin/gx/GXTev.c @@ -70,10 +70,10 @@ void GXSetTevColorIn(GXTevStageID stage, GXTevColorArg a, GXTevColorArg b, GXTev tevReg = gx->tevc[stage]; - FAST_FLAG_SET(tevReg, a, 12, 4); - FAST_FLAG_SET(tevReg, b, 8, 4); - FAST_FLAG_SET(tevReg, c, 4, 4); - FAST_FLAG_SET(tevReg, d, 0, 4); + SET_REG_FIELD(tevReg, 4, 12, 2); + SET_REG_FIELD(tevReg, 4, 8, 2); + SET_REG_FIELD(tevReg, 4, 4, 2); + SET_REG_FIELD(tevReg, 4, 0, 2); GFWriteBPCmd(tevReg); @@ -87,10 +87,10 @@ void GXSetTevAlphaIn(GXTevStageID stage, GXTevAlphaArg a, GXTevAlphaArg b, GXTev tevReg = gx->teva[stage]; - FAST_FLAG_SET(tevReg, a, 13, 3); - FAST_FLAG_SET(tevReg, b, 10, 3); - FAST_FLAG_SET(tevReg, c, 7, 3); - FAST_FLAG_SET(tevReg, d, 4, 3); + SET_REG_FIELD(tevReg, 3, 13, 2); + SET_REG_FIELD(tevReg, 3, 10, 2); + SET_REG_FIELD(tevReg, 3, 7, 2); + SET_REG_FIELD(tevReg, 3, 4, 2); GFWriteBPCmd(tevReg); @@ -103,18 +103,18 @@ void GXSetTevColorOp(GXTevStageID stage, GXTevOp op, GXTevBias bias, GXTevScale u32 tevReg; tevReg = gx->tevc[stage]; - FAST_FLAG_SET(tevReg, op & 1, 18, 1); + SET_REG_FIELD(tevReg, 1, 18, 2); if (op <= GX_TEV_SUB) { - FAST_FLAG_SET(tevReg, scale, 20, 2); - FAST_FLAG_SET(tevReg, bias, 16, 2); + SET_REG_FIELD(tevReg, 2, 20, 2); + SET_REG_FIELD(tevReg, 2, 16, 2); } else { - FAST_FLAG_SET(tevReg, (op >> 1) & 3, 20, 2); - FAST_FLAG_SET(tevReg, 3, 16, 2); + SET_REG_FIELD(tevReg, 2, 20, 2); + SET_REG_FIELD(tevReg, 2, 16, 2); } - FAST_FLAG_SET(tevReg, doClamp, 19, 1); - FAST_FLAG_SET(tevReg, outReg, 22, 2); + SET_REG_FIELD(tevReg, 1, 19, 2); + SET_REG_FIELD(tevReg, 2, 22, 2); GFWriteBPCmd(tevReg); gx->tevc[stage] = tevReg; @@ -126,18 +126,18 @@ void GXSetTevAlphaOp(GXTevStageID stage, GXTevOp op, GXTevBias bias, GXTevScale u32 tevReg; tevReg = gx->teva[stage]; - FAST_FLAG_SET(tevReg, op & 1, 18, 1); + SET_REG_FIELD(tevReg, 1, 18, 2); if (op <= GX_TEV_SUB) { - FAST_FLAG_SET(tevReg, scale, 20, 2); - FAST_FLAG_SET(tevReg, bias, 16, 2); + SET_REG_FIELD(tevReg, 2, 20, 2); + SET_REG_FIELD(tevReg, 2, 16, 2); } else { - FAST_FLAG_SET(tevReg, (op >> 1) & 3, 20, 2); - FAST_FLAG_SET(tevReg, 3, 16, 2); + SET_REG_FIELD(tevReg, 2, 20, 2); + SET_REG_FIELD(tevReg, 2, 16, 2); } - FAST_FLAG_SET(tevReg, doClamp, 19, 1); - FAST_FLAG_SET(tevReg, outReg, 22, 2); + SET_REG_FIELD(tevReg, 1, 19, 2); + SET_REG_FIELD(tevReg, 2, 22, 2); GFWriteBPCmd(tevReg); gx->teva[stage] = tevReg;