diff options
3 files changed, 28 insertions, 4 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4fc7d2f2f..f110fd7f8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -281,7 +281,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "OPTION NV_shader_atomic_fp16_vector;"; } - if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote) { + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_fswzadd) { header += "OPTION NV_shader_thread_group;"; } if (info.uses_subgroup_shuffles) { @@ -416,12 +417,25 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi if (program.local_memory_size > 0) { header += fmt::format("lmem[{}],", program.local_memory_size); } + if (program.info.uses_fswzadd) { + header += "FSWZA[4],FSWZB[4],"; + } header += "RC;" "LONG TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { header += fmt::format("D{},", index); } header += "DC;"; + if (program.info.uses_fswzadd) { + header += "MOV.F FSWZA[0],-1;" + "MOV.F FSWZA[1],1;" + "MOV.F FSWZA[2],-1;" + "MOV.F FSWZA[3],0;" + "MOV.F FSWZB[0],-1;" + "MOV.F FSWZB[1],-1;" + "MOV.F FSWZB[2],1;" + "MOV.F FSWZB[3],-1;"; + } ctx.code.insert(0, header); ctx.code += "END"; return ctx.code; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 2eb1eb123..15380a955 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -616,7 +616,8 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU3 const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, const IR::Value& clamp, const IR::Value& segmentation_mask); -void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle); void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 0f987daeb..af0e13d43 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -95,8 +95,17 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, Sca Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); } -void EmitFSwizzleAdd(EmitContext&, ScalarF32, ScalarF32, ScalarU32) { - throw NotImplementedException("GLASM instruction"); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle) { + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("AND.U RC.z,{}.threadid,3;" + "SHL.U RC.z,RC.z,1;" + "SHR.U RC.z,{},RC.z;" + "AND.U RC.z,RC.z,3;" + "MUL.F RC.x,{},FSWZA[RC.z];" + "MUL.F RC.y,{},FSWZB[RC.z];" + "ADD.F {}.x,RC.x,RC.y;", + ctx.stage_name, swizzle, op_a, op_b, ret); } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { |