diff options
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_interpreter.cpp | 218 | ||||
-rw-r--r-- | src/core/arm/interpreter/armemu.cpp | 93 | ||||
-rw-r--r-- | src/core/arm/interpreter/armsupp.cpp | 50 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/armdefs.h | 6 |
4 files changed, 319 insertions, 48 deletions
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 9b355fc6e..4cd8fe6ac 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -1427,6 +1427,13 @@ typedef struct _blx_1_thumb { unsigned int instr; }blx_1_thumb; +typedef struct _pkh_inst { + u32 Rm; + u32 Rn; + u32 Rd; + u8 imm; +} pkh_inst; + typedef arm_inst * ARM_INST_PTR; #define CACHE_BUFFER_SIZE (64 * 1024 * 2000) @@ -2376,8 +2383,30 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(orr)(unsigned int inst, int index) } return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(pkhbt)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("PKHBT"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(pkhtb)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("PKHTB"); } + +ARM_INST_PTR INTERPRETER_TRANSLATE(pkhbt)(unsigned int inst, int index) +{ + arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(pkh_inst)); + pkh_inst *inst_cream = (pkh_inst *)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->imm = BITS(inst, 7, 11); + + return inst_base; +} + +ARM_INST_PTR INTERPRETER_TRANSLATE(pkhtb)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(pkhbt)(inst, index); +} + ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(pld_inst)); @@ -3249,14 +3278,66 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(blx_1_thumb)(unsigned int tinst, int index) return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(uqadd16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UQADD16"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(uqadd8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UQADD8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(uqaddsubx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UQADDSUBX"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(uqsub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UQSUB16"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(uqsub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UQSUB8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(uqsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UQSUBADDX"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(usad8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("USAD8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(usada8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("USADA8"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(uqadd8)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->op1 = BITS(inst, 20, 21); + inst_cream->op2 = BITS(inst, 5, 7); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(uqadd16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(uqadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(uqaddsubx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(uqadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(uqsub8)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(uqadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(uqsub16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(uqadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(uqsubaddx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(uqadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(usada8)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->op1 = BITS(inst, 20, 24); + inst_cream->op2 = BITS(inst, 5, 7); + inst_cream->Rm = BITS(inst, 8, 11); + inst_cream->Rn = BITS(inst, 0, 3); + inst_cream->Ra = BITS(inst, 12, 15); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(usad8)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(usada8)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(usat)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("USAT"); } ARM_INST_PTR INTERPRETER_TRANSLATE(usat16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("USAT16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(usub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("USUB16"); } @@ -5607,8 +5688,34 @@ unsigned InterpreterMainLoop(ARMul_State* state) FETCH_INST; GOTO_NEXT_INST; } + PKHBT_INST: + { + INC_ICOUNTER; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + pkh_inst *inst_cream = (pkh_inst *)inst_base->component; + RD = (RN & 0xFFFF) | ((RM << inst_cream->imm) & 0xFFFF0000); + } + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(pkh_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + PKHTB_INST: + { + INC_ICOUNTER; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + pkh_inst *inst_cream = (pkh_inst *)inst_base->component; + int shift_imm = inst_cream->imm ? inst_cream->imm : 31; + RD = ((static_cast<s32>(RM) >> shift_imm) & 0xFFFF) | (RN & 0xFFFF0000); + } + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(pkh_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + PLD_INST: { INC_ICOUNTER; @@ -6876,14 +6983,101 @@ unsigned InterpreterMainLoop(ARMul_State* state) goto DISPATCH; } - UQADD16_INST: UQADD8_INST: + UQADD16_INST: UQADDSUBX_INST: - UQSUB16_INST: UQSUB8_INST: + UQSUB16_INST: UQSUBADDX_INST: + { + INC_ICOUNTER; + + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + const u8 op2 = inst_cream->op2; + const u32 rm_val = RM; + const u32 rn_val = RN; + + u16 lo_val = 0; + u16 hi_val = 0; + + // UQADD16 + if (op2 == 0x00) { + lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, rm_val & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF); + } + // UQASX + else if (op2 == 0x01) { + lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF); + } + // UQSAX + else if (op2 == 0x02) { + lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF); + } + // UQSUB16 + else if (op2 == 0x03) { + lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, rm_val & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF); + } + // UQADD8 + else if (op2 == 0x04) { + lo_val = ARMul_UnsignedSaturatedAdd8(rn_val, rm_val) | + ARMul_UnsignedSaturatedAdd8(rn_val >> 8, rm_val >> 8) << 8; + hi_val = ARMul_UnsignedSaturatedAdd8(rn_val >> 16, rm_val >> 16) | + ARMul_UnsignedSaturatedAdd8(rn_val >> 24, rm_val >> 24) << 8; + } + // UQSUB8 + else { + lo_val = ARMul_UnsignedSaturatedSub8(rn_val, rm_val) | + ARMul_UnsignedSaturatedSub8(rn_val >> 8, rm_val >> 8) << 8; + hi_val = ARMul_UnsignedSaturatedSub8(rn_val >> 16, rm_val >> 16) | + ARMul_UnsignedSaturatedSub8(rn_val >> 24, rm_val >> 24) << 8; + } + + RD = ((lo_val & 0xFFFF) | hi_val << 16); + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + USAD8_INST: USADA8_INST: + { + INC_ICOUNTER; + + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* inst_cream = (generic_arm_inst*)inst_base->component; + + const u8 ra_idx = inst_cream->Ra; + const u32 rm_val = RM; + const u32 rn_val = RN; + + const u8 diff1 = ARMul_UnsignedAbsoluteDifference(rn_val & 0xFF, rm_val & 0xFF); + const u8 diff2 = ARMul_UnsignedAbsoluteDifference((rn_val >> 8) & 0xFF, (rm_val >> 8) & 0xFF); + const u8 diff3 = ARMul_UnsignedAbsoluteDifference((rn_val >> 16) & 0xFF, (rm_val >> 16) & 0xFF); + const u8 diff4 = ARMul_UnsignedAbsoluteDifference((rn_val >> 24) & 0xFF, (rm_val >> 24) & 0xFF); + + u32 finalDif = (diff1 + diff2 + diff3 + diff4); + + // Op is USADA8 if true. + if (ra_idx != 15) + finalDif += cpu->Reg[ra_idx]; + + RD = finalDif; + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + USAT_INST: USAT16_INST: USUB16_INST: diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp index 9b680c1e2..9c6602f09 100644 --- a/src/core/arm/interpreter/armemu.cpp +++ b/src/core/arm/interpreter/armemu.cpp @@ -3100,7 +3100,6 @@ mainswitch: break; case 0x68: /* Store Word, No WriteBack, Post Inc, Reg. */ - //ichfly PKHBT PKHTB todo check this if ((instr & 0x70) == 0x10) { //pkhbt u8 idest = BITS(12, 15); u8 rfis = BITS(16, 19); @@ -3109,18 +3108,11 @@ mainswitch: state->Reg[idest] = (state->Reg[rfis] & 0xFFFF) | ((state->Reg[rlast] << ishi) & 0xFFFF0000); break; } else if ((instr & 0x70) == 0x50) { //pkhtb - const u8 rd_idx = BITS(12, 15); - const u8 rn_idx = BITS(16, 19); - const u8 rm_idx = BITS(0, 3); - const u8 imm5 = BITS(7, 11); - - ARMword val; - if (imm5 >= 32) - val = (state->Reg[rm_idx] >> 31); - else - val = (state->Reg[rm_idx] >> imm5); - - state->Reg[rd_idx] = (val & 0xFFFF) | ((state->Reg[rn_idx]) & 0xFFFF0000); + u8 rd_idx = BITS(12, 15); + u8 rn_idx = BITS(16, 19); + u8 rm_idx = BITS(0, 3); + u8 imm5 = BITS(7, 11) ? BITS(7, 11) : 31; + state->Reg[rd_idx] = ((static_cast<s32>(state->Reg[rm_idx]) >> imm5) & 0xFFFF) | ((state->Reg[rn_idx]) & 0xFFFF0000); break; } else if (BIT (4)) { #ifdef MODE32 @@ -6117,26 +6109,55 @@ L_stm_s_takeabort: } printf("Unhandled v6 insn: uasx/usax\n"); break; - case 0x66: - if ((instr & 0x0FF00FF0) == 0x06600FF0) { //uqsub8 - u32 rd = (instr >> 12) & 0xF; - u32 rm = (instr >> 16) & 0xF; - u32 rn = (instr >> 0) & 0xF; - u32 subfrom = state->Reg[rm]; - u32 tosub = state->Reg[rn]; - - u8 b1 = (u8)((u8)(subfrom)-(u8)(tosub)); - if (b1 > (u8)(subfrom)) b1 = 0; - u8 b2 = (u8)((u8)(subfrom >> 8) - (u8)(tosub >> 8)); - if (b2 > (u8)(subfrom >> 8)) b2 = 0; - u8 b3 = (u8)((u8)(subfrom >> 16) - (u8)(tosub >> 16)); - if (b3 > (u8)(subfrom >> 16)) b3 = 0; - u8 b4 = (u8)((u8)(subfrom >> 24) - (u8)(tosub >> 24)); - if (b4 > (u8)(subfrom >> 24)) b4 = 0; - state->Reg[rd] = (u32)(b1 | b2 << 8 | b3 << 16 | b4 << 24); + case 0x66: // UQADD16, UQASX, UQSAX, UQSUB16, UQADD8, and UQSUB8 + { + const u8 rd_idx = BITS(12, 15); + const u8 rm_idx = BITS(0, 3); + const u8 rn_idx = BITS(16, 19); + const u8 op2 = BITS(5, 7); + const u32 rm_val = state->Reg[rm_idx]; + const u32 rn_val = state->Reg[rn_idx]; + + u16 lo_val = 0; + u16 hi_val = 0; + + // UQADD16 + if (op2 == 0x00) { + lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, rm_val & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF); + } + // UQASX + else if (op2 == 0x01) { + lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedAdd16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF); + } + // UQSAX + else if (op2 == 0x02) { + lo_val = ARMul_UnsignedSaturatedAdd16(rn_val & 0xFFFF, (rm_val >> 16) & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, rm_val & 0xFFFF); + } + // UQSUB16 + else if (op2 == 0x03) { + lo_val = ARMul_UnsignedSaturatedSub16(rn_val & 0xFFFF, rm_val & 0xFFFF); + hi_val = ARMul_UnsignedSaturatedSub16((rn_val >> 16) & 0xFFFF, (rm_val >> 16) & 0xFFFF); + } + // UQADD8 + else if (op2 == 0x04) { + lo_val = ARMul_UnsignedSaturatedAdd8(rn_val, rm_val) | + ARMul_UnsignedSaturatedAdd8(rn_val >> 8, rm_val >> 8) << 8; + hi_val = ARMul_UnsignedSaturatedAdd8(rn_val >> 16, rm_val >> 16) | + ARMul_UnsignedSaturatedAdd8(rn_val >> 24, rm_val >> 24) << 8; + } + // UQSUB8 + else { + lo_val = ARMul_UnsignedSaturatedSub8(rn_val, rm_val) | + ARMul_UnsignedSaturatedSub8(rn_val >> 8, rm_val >> 8) << 8; + hi_val = ARMul_UnsignedSaturatedSub8(rn_val >> 16, rm_val >> 16) | + ARMul_UnsignedSaturatedSub8(rn_val >> 24, rm_val >> 24) << 8; + } + + state->Reg[rd_idx] = ((lo_val & 0xFFFF) | hi_val << 16); return 1; - } else { - printf ("Unhandled v6 insn: uqsub16\n"); } break; case 0x67: // UHADD16, UHASX, UHSAX, UHSUB16, UHADD8, and UHSUB8. @@ -6643,10 +6664,10 @@ L_stm_s_takeabort: const u32 rm_val = state->Reg[rm_idx]; const u32 rn_val = state->Reg[rn_idx]; - const u8 diff1 = (u8)std::labs((rn_val & 0xFF) - (rm_val & 0xFF)); - const u8 diff2 = (u8)std::labs(((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF)); - const u8 diff3 = (u8)std::labs(((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF)); - const u8 diff4 = (u8)std::labs(((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF)); + const u8 diff1 = ARMul_UnsignedAbsoluteDifference(rn_val & 0xFF, rm_val & 0xFF); + const u8 diff2 = ARMul_UnsignedAbsoluteDifference((rn_val >> 8) & 0xFF, (rm_val >> 8) & 0xFF); + const u8 diff3 = ARMul_UnsignedAbsoluteDifference((rn_val >> 16) & 0xFF, (rm_val >> 16) & 0xFF); + const u8 diff4 = ARMul_UnsignedAbsoluteDifference((rn_val >> 24) & 0xFF, (rm_val >> 24) & 0xFF); u32 finalDif = (diff1 + diff2 + diff3 + diff4); diff --git a/src/core/arm/interpreter/armsupp.cpp b/src/core/arm/interpreter/armsupp.cpp index 6774f8a74..8f158e2c8 100644 --- a/src/core/arm/interpreter/armsupp.cpp +++ b/src/core/arm/interpreter/armsupp.cpp @@ -392,6 +392,15 @@ ARMul_NthReg (ARMword instr, unsigned number) return (bit - 1); } +/* Unsigned sum of absolute difference */ +u8 ARMul_UnsignedAbsoluteDifference(u8 left, u8 right) +{ + if (left > right) + return left - right; + + return right - left; +} + /* Assigns the N and Z flags depending on the value of result. */ void @@ -469,6 +478,47 @@ ARMul_SubOverflow (ARMul_State * state, ARMword a, ARMword b, ARMword result) ASSIGNV (SubOverflow (a, b, result)); } +/* 8-bit unsigned saturated addition */ +u8 ARMul_UnsignedSaturatedAdd8(u8 left, u8 right) +{ + u8 result = left + right; + + if (result < left) + result = 0xFF; + + return result; +} + +/* 16-bit unsigned saturated addition */ +u16 ARMul_UnsignedSaturatedAdd16(u16 left, u16 right) +{ + u16 result = left + right; + + if (result < left) + result = 0xFFFF; + + return result; +} + +/* 8-bit unsigned saturated subtraction */ +u8 ARMul_UnsignedSaturatedSub8(u8 left, u8 right) +{ + if (left <= right) + return 0; + + return left - right; +} + +/* 16-bit unsigned saturated subtraction */ +u16 ARMul_UnsignedSaturatedSub16(u16 left, u16 right) +{ + if (left <= right) + return 0; + + return left - right; +} + + /* This function does the work of generating the addresses used in an LDC instruction. The code here is always post-indexed, it's up to the caller to get the input address correct and to handle base register diff --git a/src/core/arm/skyeye_common/armdefs.h b/src/core/arm/skyeye_common/armdefs.h index 34eb5aaf7..c7509fcb2 100644 --- a/src/core/arm/skyeye_common/armdefs.h +++ b/src/core/arm/skyeye_common/armdefs.h @@ -790,6 +790,12 @@ extern void ARMul_FixSPSR(ARMul_State*, ARMword, ARMword); extern void ARMul_ConsolePrint(ARMul_State*, const char*, ...); extern void ARMul_SelectProcessor(ARMul_State*, unsigned); +extern u8 ARMul_UnsignedSaturatedAdd8(u8, u8); +extern u16 ARMul_UnsignedSaturatedAdd16(u16, u16); +extern u8 ARMul_UnsignedSaturatedSub8(u8, u8); +extern u16 ARMul_UnsignedSaturatedSub16(u16, u16); +extern u8 ARMul_UnsignedAbsoluteDifference(u8, u8); + #define DIFF_LOG 0 #define SAVE_LOG 0 |