summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2018-10-28 18:06:21 +0100
committerGitHub <noreply@github.com>2018-10-28 18:06:21 +0100
commitaa1cf608ed20984c410fc215d9f73937abe76ddc (patch)
tree1167c5b03eb2526f2704195a27d50f11430f583c /src/video_core
parentMerge pull request #1606 from FearlessTobi/revert-1581-macosx-target-version (diff)
parentRefactor precise usage and add FMNMX, MUFU, FMUL32 and FADD332 (diff)
downloadyuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.gz
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.bz2
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.lz
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.xz
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.zst
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp55
1 files changed, 35 insertions, 20 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index dec291a7d..dcf6941b0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -341,10 +341,10 @@ public:
*/
void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
u64 dest_num_components, u64 value_num_components,
- bool is_saturated = false, u64 dest_elem = 0) {
+ bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {
SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
- dest_num_components, value_num_components, dest_elem);
+ dest_num_components, value_num_components, dest_elem, precise);
}
/**
@@ -368,7 +368,7 @@ public:
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
- dest_num_components, value_num_components, dest_elem);
+ dest_num_components, value_num_components, dest_elem, false);
if (sets_cc) {
const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
@@ -416,7 +416,7 @@ public:
}
}();
- SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem);
+ SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
}
/**
@@ -757,7 +757,8 @@ private:
* @param dest_elem Optional, the destination element to use for the operation.
*/
void SetRegister(const Register& reg, u64 elem, const std::string& value,
- u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
+ u64 dest_num_components, u64 value_num_components, u64 dest_elem,
+ bool precise) {
if (reg == Register::ZeroIndex) {
LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
UNREACHABLE();
@@ -774,7 +775,18 @@ private:
src += GetSwizzle(elem);
}
- shader.AddLine(dest + " = " + src + ';');
+ if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
+ shader.AddLine('{');
+ ++shader.scope;
+ // This avoids optimizations of constant propagation and keeps the code as the original
+ // Sadly using the precise keyword causes "linking" errors on fragment shaders.
+ shader.AddLine("precise float tmp = " + src + ';');
+ shader.AddLine(dest + " = tmp;");
+ --shader.scope;
+ shader.AddLine('}');
+ } else {
+ shader.AddLine(dest + " = " + src + ';');
+ }
}
/// Build the GLSL register list.
@@ -1510,8 +1522,9 @@ private:
ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
+
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
}
case OpCode::Id::FADD_C:
@@ -1519,8 +1532,9 @@ private:
case OpCode::Id::FADD_IMM: {
op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
}
case OpCode::Id::MUFU: {
@@ -1528,31 +1542,31 @@ private:
switch (instr.sub_op) {
case SubOp::Cos:
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Sin:
regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Ex2:
regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Lg2:
regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Rcp:
regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Rsq:
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Sqrt:
regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -1573,7 +1587,7 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0,
'(' + condition + ") ? min(" + parameters + ") : max(" +
parameters + ')',
- 1, 1);
+ 1, 1, false, 0, true);
break;
}
case OpCode::Id::RRO_C:
@@ -1602,7 +1616,7 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0,
regs.GetRegisterAsFloat(instr.gpr8) + " * " +
GetImmediate32(instr),
- 1, 1, instr.fmul32.saturate);
+ 1, 1, instr.fmul32.saturate, 0, true);
break;
}
case OpCode::Id::FADD32I: {
@@ -1625,7 +1639,7 @@ private:
op_b = "-(" + op_b + ')';
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
break;
}
}
@@ -2087,8 +2101,9 @@ private:
}
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
- instr.alu.saturate_d);
+ regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
+ 1, 1, instr.alu.saturate_d, 0, true);
+
break;
}
case OpCode::Type::Hfma2: {