diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
-rw-r--r-- | src/video_core/morton.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 22 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 241 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 2 | ||||
-rw-r--r-- | src/video_core/surface.cpp | 7 | ||||
-rw-r--r-- | src/video_core/surface.h | 92 |
7 files changed, 173 insertions, 197 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index eb703bb5a..e53c77f2b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1049,7 +1049,7 @@ union Instruction { BitField<49, 1, u64> nodep_flag; BitField<50, 3, u64> component_mask_selector; BitField<53, 4, u64> texture_info; - BitField<60, 1, u64> fp32_flag; + BitField<59, 1, u64> fp32_flag; TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 47e76d8fe..b68f4fb13 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -66,8 +66,6 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::BC6H_UF16>, MortonCopy<true, PixelFormat::BC6H_SF16>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8U>, - MortonCopy<true, PixelFormat::G8R8S>, MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, @@ -138,8 +136,6 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::BC6H_SF16>, // TODO(Subv): Swizzling ASTC formats are not supported nullptr, - MortonCopy<false, PixelFormat::G8R8U>, - MortonCopy<false, PixelFormat::G8R8S>, MortonCopy<false, PixelFormat::BGRA8>, MortonCopy<false, PixelFormat::RGBA32F>, MortonCopy<false, PixelFormat::RG32F>, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 352c391a9..d3dcb9a46 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -299,8 +299,6 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, true}, // BC6H_SF16 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U - {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F @@ -626,18 +624,6 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo } } -static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)}; - for (std::size_t y = 0; y < height; ++y) { - for (std::size_t x = 0; x < width; ++x) { - const std::size_t offset{bpp * (y * width + x)}; - const u8 temp{data[offset]}; - data[offset] = data[offset + 1]; - data[offset + 1] = temp; - } - } -} - /** * Helper function to perform software conversion (as needed) when loading a buffer from Switch * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with @@ -670,12 +656,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. ConvertS8Z24ToZ24S8(data, width, height, false); break; - - case PixelFormat::G8R8U: - case PixelFormat::G8R8S: - // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. - ConvertG8R8ToR8G8(data, width, height); - break; } } @@ -687,8 +667,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, u32 width, u32 height) { switch (pixel_format) { - case PixelFormat::G8R8U: - case PixelFormat::G8R8S: case PixelFormat::ASTC_2D_4X4: case PixelFormat::ASTC_2D_8X8: case PixelFormat::ASTC_2D_4X4_SRGB: diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4e685fa2c..1bb09e61b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -347,6 +347,15 @@ public: BuildInputList(); } + void SetConditionalCodesFromExpression(const std::string& expresion) { + SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0"); + LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); + } + + void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) { + SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem))); + } + /** * Returns code that does an integer size conversion for the specified size. * @param value Value to perform integer size conversion on. @@ -401,14 +410,24 @@ public: * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. * @param is_saturated Optional, when True, saturates the provided value. + * @param sets_cc Optional, when True, sets the corresponding values to the implemented + * condition flags. * @param dest_elem Optional, the destination element to use for the operation. */ void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, - bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { - - SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, - dest_num_components, value_num_components, dest_elem, precise); + bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0, + bool precise = false) { + const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value; + SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem, + precise); + if (sets_cc) { + if (reg == Register::ZeroIndex) { + SetConditionalCodesFromExpression(clamped_value); + } else { + SetConditionalCodesFromRegister(reg, dest_elem); + } + } } /** @@ -419,25 +438,29 @@ public: * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. * @param is_saturated Optional, when True, saturates the provided value. + * @param sets_cc Optional, when True, sets the corresponding values to the implemented + * condition flags. * @param dest_elem Optional, the destination element to use for the operation. * @param size Register size to use for conversion instructions. */ void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, const std::string& value, u64 dest_num_components, u64 value_num_components, bool is_saturated = false, - u64 dest_elem = 0, Register::Size size = Register::Size::Word, - bool sets_cc = false) { + bool sets_cc = false, u64 dest_elem = 0, + Register::Size size = Register::Size::Word) { UNIMPLEMENTED_IF(is_saturated); - + const std::string final_value = ConvertIntegerSize(value, size); const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; - SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', - dest_num_components, value_num_components, dest_elem, false); + SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components, + value_num_components, dest_elem, false); if (sets_cc) { - const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; - SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); + if (reg == Register::ZeroIndex) { + SetConditionalCodesFromExpression(final_value); + } else { + SetConditionalCodesFromRegister(reg, dest_elem); + } } } @@ -470,10 +493,10 @@ public: // pack. I couldn't test this on hardware but it shouldn't really matter since most // of the time when a Mrg_* flag is used both components will be mirrored. That // being said, it deserves a test. - return "((" + GetRegisterAsInteger(reg, 0, false) + + return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; case Tegra::Shader::HalfMerge::Mrg_H1: - return "((" + GetRegisterAsInteger(reg, 0, false) + + return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; default: UNREACHABLE(); @@ -1275,7 +1298,7 @@ private: void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, const std::string& op_b, Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate) { + Tegra::Shader::Pred predicate, const bool set_cc) { std::string result{}; switch (logic_op) { case LogicOperation::And: { @@ -1299,7 +1322,7 @@ private: } if (dest != Tegra::Shader::Register::ZeroIndex) { - regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); } using Tegra::Shader::PredicateResultMode; @@ -1319,7 +1342,8 @@ private: } void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, - const std::string& op_c, const std::string& imm_lut) { + const std::string& op_c, const std::string& imm_lut, + const bool set_cc) { if (dest == Tegra::Shader::Register::ZeroIndex) { return; } @@ -1342,7 +1366,7 @@ private: result += ')'; - regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); } void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { @@ -1357,12 +1381,12 @@ private: if (written_components < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false, written_components % 2); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, + regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false, written_components % 2); } @@ -1755,7 +1779,7 @@ private: instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL; constexpr std::array<const char*, 4> coord_container{ - {"", "int coord = (", "ivec2 coord = ivec2(", "ivec3 coord = ivec3("}}; + {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}}; std::string coord = coord_container[total_coord_count]; @@ -1871,8 +1895,6 @@ private: instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", instr.fmul.tab5c68_0 .Value()); // SMO typical sends 1 here which seems to be the default - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMUL is not implemented"); op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); @@ -1896,20 +1918,17 @@ private: } regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, instr.generates_cc, 0, true); break; } case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FADD is not implemented"); - op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, instr.generates_cc, 0, true); break; } case OpCode::Id::MUFU: { @@ -1917,31 +1936,31 @@ private: switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Sin: regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Ex2: regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Lg2: regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Rcp: regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Rsq: regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; case SubOp::Sqrt: regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, - instr.alu.saturate_d, 0, true); + instr.alu.saturate_d, false, 0, true); break; default: UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", @@ -1952,8 +1971,9 @@ private: case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FMNMX is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in FMNMX is partially implemented"); op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); @@ -1964,7 +1984,7 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', - 1, 1, false, 0, true); + 1, 1, false, instr.generates_cc, 0, true); break; } case OpCode::Id::RRO_C: @@ -1989,18 +2009,16 @@ private: break; } case OpCode::Id::FMUL32_IMM: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FMUL32 is not implemented"); - - regs.SetRegisterToFloat(instr.gpr0, 0, - regs.GetRegisterAsFloat(instr.gpr8) + " * " + - GetImmediate32(instr), - 1, 1, instr.fmul32.saturate, 0, true); + regs.SetRegisterToFloat( + instr.gpr0, 0, + regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1, + instr.fmul32.saturate, instr.op_32.generates_cc, 0, true); break; } case OpCode::Id::FADD32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in FADD32I is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.op_32.generates_cc, + "Condition codes generation in FADD32I is partially implemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); std::string op_b = GetImmediate32(instr); @@ -2021,7 +2039,8 @@ private: op_b = "-(" + op_b + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, + instr.op_32.generates_cc, 0, true); break; } } @@ -2035,16 +2054,14 @@ private: switch (opcode->get().GetId()) { case OpCode::Id::BFE_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in BFE is not implemented"); - std::string inner_shift = '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; std::string outer_shift = '(' + inner_shift + " >> " + std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; - regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false, + instr.generates_cc); break; } default: { @@ -2055,8 +2072,6 @@ private: break; } case OpCode::Type::Bfi: { - UNIMPLEMENTED_IF(instr.generates_cc); - const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { switch (opcode->get().GetId()) { case OpCode::Id::BFI_IMM_R: @@ -2071,9 +2086,10 @@ private: const std::string offset = '(' + packed_shift + " & 0xff)"; const std::string bits = "((" + packed_shift + " >> 8) & 0xff)"; const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false); - regs.SetRegisterToInteger( - instr.gpr0, false, 0, - "bitfieldInsert(" + base + ", " + insert + ", " + offset + ", " + bits + ')', 1, 1); + regs.SetRegisterToInteger(instr.gpr0, false, 0, + "bitfieldInsert(" + base + ", " + insert + ", " + offset + + ", " + bits + ')', + 1, 1, false, instr.generates_cc); break; } case OpCode::Type::Shift: { @@ -2095,9 +2111,6 @@ private: case OpCode::Id::SHR_C: case OpCode::Id::SHR_R: case OpCode::Id::SHR_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in SHR is not implemented"); - if (!instr.shift.is_signed) { // Logical shift right op_a = "uint(" + op_a + ')'; @@ -2105,7 +2118,7 @@ private: // Cast to int is superfluous for arithmetic shift, it's only for a logical shift regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', - 1, 1); + 1, 1, false, instr.generates_cc); break; } case OpCode::Id::SHL_C: @@ -2113,7 +2126,8 @@ private: case OpCode::Id::SHL_IMM: UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in SHL is not implemented"); - regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false, + instr.generates_cc); break; default: { UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); @@ -2127,18 +2141,17 @@ private: switch (opcode->get().GetId()) { case OpCode::Id::IADD32I: - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in IADD32I is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.op_32.generates_cc, + "Condition codes generation in IADD32I is partially implemented"); if (instr.iadd32i.negate_a) op_a = "-(" + op_a + ')'; regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, - instr.iadd32i.saturate != 0); + instr.iadd32i.saturate, instr.op_32.generates_cc); break; case OpCode::Id::LOP32I: { - UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc, - "Condition codes generation in LOP32I is not implemented"); if (instr.alu.lop32i.invert_a) op_a = "~(" + op_a + ')'; @@ -2148,7 +2161,7 @@ private: WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, Tegra::Shader::PredicateResultMode::None, - Tegra::Shader::Pred::UnusedIndex); + Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc); break; } default: { @@ -2177,7 +2190,7 @@ private: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD is not implemented"); + "Condition codes generation in IADD is partially implemented"); if (instr.alu_integer.negate_a) op_a = "-(" + op_a + ')'; @@ -2186,14 +2199,15 @@ private: op_b = "-(" + op_b + ')'; regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, - instr.alu.saturate_d); + instr.alu.saturate_d, instr.generates_cc); break; } case OpCode::Id::IADD3_C: case OpCode::Id::IADD3_R: case OpCode::Id::IADD3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IADD3 is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in IADD3 is partially implemented"); std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); @@ -2249,14 +2263,16 @@ private: result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; } - regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false, + instr.generates_cc); break; } case OpCode::Id::ISCADD_C: case OpCode::Id::ISCADD_R: case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in ISCADD is partially implemented"); if (instr.alu_integer.negate_a) op_a = "-(" + op_a + ')'; @@ -2267,7 +2283,8 @@ private: const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); regs.SetRegisterToInteger(instr.gpr0, true, 0, - "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); + "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1, + false, instr.generates_cc); break; } case OpCode::Id::POPC_C: @@ -2291,8 +2308,6 @@ private: case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP is not implemented"); if (instr.alu.lop.invert_a) op_a = "~(" + op_a + ')'; @@ -2301,15 +2316,13 @@ private: op_b = "~(" + op_b + ')'; WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, + instr.generates_cc); break; } case OpCode::Id::LOP3_C: case OpCode::Id::LOP3_R: case OpCode::Id::LOP3_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in LOP3 is not implemented"); - const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); std::string lut; @@ -2319,15 +2332,16 @@ private: lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; } - WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut); + WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); break; } case OpCode::Id::IMNMX_C: case OpCode::Id::IMNMX_R: case OpCode::Id::IMNMX_IMM: { UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in IMNMX is not implemented"); + UNIMPLEMENTED_IF_MSG( + instr.generates_cc, + "Condition codes generation in IMNMX is partially implemented"); const std::string condition = GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); @@ -2335,7 +2349,7 @@ private: regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, '(' + condition + ") ? min(" + parameters + ") : max(" + parameters + ')', - 1, 1); + 1, 1, false, instr.generates_cc); break; } case OpCode::Id::LEA_R2: @@ -2396,7 +2410,8 @@ private: UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), "Unhandled LEA Predicate"); const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; - regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false, + instr.generates_cc); break; } @@ -2501,7 +2516,7 @@ private: UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in FFMA is not implemented"); + "Condition codes generation in FFMA is partially implemented"); switch (opcode->get().GetId()) { case OpCode::Id::FFMA_CR: { @@ -2532,7 +2547,7 @@ private: } regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', - 1, 1, instr.alu.saturate_d, 0, true); + 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true); break; } case OpCode::Type::Hfma2: { @@ -2603,16 +2618,14 @@ private: } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1, instr.alu.saturate_d, 0, instr.conversion.dest_size, - instr.generates_cc.Value() != 0); + 1, instr.alu.saturate_d, instr.generates_cc, 0, + instr.conversion.dest_size); break; } case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.selector); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); std::string op_a; if (instr.is_b_gpr) { @@ -2635,14 +2648,12 @@ private: op_a = "-(" + op_a + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc); break; } case OpCode::Id::F2F_R: { UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); if (instr.conversion.abs_a) { @@ -2674,14 +2685,13 @@ private: break; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d, + instr.generates_cc); break; } case OpCode::Id::F2I_R: case OpCode::Id::F2I_C: { UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); std::string op_a{}; if (instr.is_b_gpr) { @@ -2724,7 +2734,8 @@ private: } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1, false, 0, instr.conversion.dest_size); + 1, false, instr.generates_cc, 0, + instr.conversion.dest_size); break; } default: { @@ -2887,7 +2898,7 @@ private: shader.AddLine(coord); if (depth_compare) { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1); } else { shader.AddLine("vec4 texture_tmp = " + texture + ';'); std::size_t dest_elem{}; @@ -2896,7 +2907,7 @@ private: // Skip disabled components continue; } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, dest_elem); ++dest_elem; } @@ -2982,7 +2993,7 @@ private: // Skip disabled components continue; } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, dest_elem); ++dest_elem; } @@ -3231,7 +3242,7 @@ private: } case OpCode::Type::PredicateSetRegister: { UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); + "Condition codes generation in PSET is partially implemented"); const std::string op_a = GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); @@ -3248,10 +3259,11 @@ private: const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; if (instr.pset.bf == 0) { const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; - regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false, + instr.generates_cc); } else { const std::string value = '(' + result + ") ? 1.0 : 0.0"; - regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc); } break; } @@ -3368,14 +3380,11 @@ private: ") " + combiner + " (" + second_pred + "))"; if (instr.fset.bf) { - regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false, + instr.generates_cc); } else { regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, - 1); - } - if (instr.generates_cc.Value() != 0) { - regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate); - LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete"); + 1, false, instr.generates_cc); } break; } @@ -3462,7 +3471,7 @@ private: UNIMPLEMENTED_IF(instr.xmad.sign_a); UNIMPLEMENTED_IF(instr.xmad.sign_b); UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); + "Condition codes generation in XMAD is partially implemented"); std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; std::string op_b; @@ -3548,7 +3557,8 @@ private: sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; } - regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); + regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false, + instr.generates_cc); break; } default: { @@ -3752,8 +3762,7 @@ private: } regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, - instr.vmad.saturate == 1, 0, Register::Size::Word, - instr.vmad.cc); + instr.vmad.saturate, instr.vmad.cc); break; } case OpCode::Id::VSETP: { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 067fad81b..b85cc262f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -17,7 +17,7 @@ class EmuWindow; } namespace Layout { -class FramebufferLayout; +struct FramebufferLayout; } namespace OpenGL { diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index a97b1562b..1a344229f 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -196,11 +196,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::G8R8: + // TextureFormat::G8R8 is actually ordered red then green, as such we can use + // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath + // of the Wild, which uses this format to render the hearts on the UI. switch (component_type) { case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::G8R8U; + return PixelFormat::RG8U; case Tegra::Texture::ComponentType::SNORM: - return PixelFormat::G8R8S; + return PixelFormat::RG8S; } LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); UNREACHABLE(); diff --git a/src/video_core/surface.h b/src/video_core/surface.h index e23cfecbc..c2259c3c2 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -38,57 +38,55 @@ enum class PixelFormat { BC6H_UF16 = 20, BC6H_SF16 = 21, ASTC_2D_4X4 = 22, - G8R8U = 23, - G8R8S = 24, - BGRA8 = 25, - RGBA32F = 26, - RG32F = 27, - R32F = 28, - R16F = 29, - R16U = 30, - R16S = 31, - R16UI = 32, - R16I = 33, - RG16 = 34, - RG16F = 35, - RG16UI = 36, - RG16I = 37, - RG16S = 38, - RGB32F = 39, - RGBA8_SRGB = 40, - RG8U = 41, - RG8S = 42, - RG32UI = 43, - R32UI = 44, - ASTC_2D_8X8 = 45, - ASTC_2D_8X5 = 46, - ASTC_2D_5X4 = 47, - BGRA8_SRGB = 48, - DXT1_SRGB = 49, - DXT23_SRGB = 50, - DXT45_SRGB = 51, - BC7U_SRGB = 52, - ASTC_2D_4X4_SRGB = 53, - ASTC_2D_8X8_SRGB = 54, - ASTC_2D_8X5_SRGB = 55, - ASTC_2D_5X4_SRGB = 56, - ASTC_2D_5X5 = 57, - ASTC_2D_5X5_SRGB = 58, - ASTC_2D_10X8 = 59, - ASTC_2D_10X8_SRGB = 60, + BGRA8 = 23, + RGBA32F = 24, + RG32F = 25, + R32F = 26, + R16F = 27, + R16U = 28, + R16S = 29, + R16UI = 30, + R16I = 31, + RG16 = 32, + RG16F = 33, + RG16UI = 34, + RG16I = 35, + RG16S = 36, + RGB32F = 37, + RGBA8_SRGB = 38, + RG8U = 39, + RG8S = 40, + RG32UI = 41, + R32UI = 42, + ASTC_2D_8X8 = 43, + ASTC_2D_8X5 = 44, + ASTC_2D_5X4 = 45, + BGRA8_SRGB = 46, + DXT1_SRGB = 47, + DXT23_SRGB = 48, + DXT45_SRGB = 49, + BC7U_SRGB = 50, + ASTC_2D_4X4_SRGB = 51, + ASTC_2D_8X8_SRGB = 52, + ASTC_2D_8X5_SRGB = 53, + ASTC_2D_5X4_SRGB = 54, + ASTC_2D_5X5 = 55, + ASTC_2D_5X5_SRGB = 56, + ASTC_2D_10X8 = 57, + ASTC_2D_10X8_SRGB = 58, MaxColorFormat, // Depth formats - Z32F = 61, - Z16 = 62, + Z32F = 59, + Z16 = 60, MaxDepthFormat, // DepthStencil formats - Z24S8 = 63, - S8Z24 = 64, - Z32FS8 = 65, + Z24S8 = 61, + S8Z24 = 62, + Z32FS8 = 63, MaxDepthStencilFormat, @@ -149,8 +147,6 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ 4, // BC6H_UF16 4, // BC6H_SF16 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S 1, // BGRA8 1, // RGBA32F 1, // RG32F @@ -232,8 +228,6 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 4, // BC6H_UF16 4, // BC6H_SF16 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S 1, // BGRA8 1, // RGBA32F 1, // RG32F @@ -309,8 +303,6 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 4, // BC6H_UF16 4, // BC6H_SF16 4, // ASTC_2D_4X4 - 1, // G8R8U - 1, // G8R8S 1, // BGRA8 1, // RGBA32F 1, // RG32F @@ -386,8 +378,6 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 128, // BC6H_UF16 128, // BC6H_SF16 128, // ASTC_2D_4X4 - 16, // G8R8U - 16, // G8R8S 32, // BGRA8 128, // RGBA32F 64, // RG32F |