diff options
Diffstat (limited to 'src/video_core')
23 files changed, 409 insertions, 168 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index eb9bf1878..669541b4b 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() { } bool DmaPusher::Step() { - if (dma_get != dma_put) { - // Push buffer non-empty, read a word - const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); - ASSERT_MSG(address, "Invalid GPU address"); + if (!ib_enable || dma_pushbuffer.empty()) { + // pushbuffer empty and IB empty or nonexistent - nothing to do + return false; + } - const CommandHeader command_header{Memory::Read32(*address)}; + const CommandList& command_list{dma_pushbuffer.front()}; + const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; + GPUVAddr dma_get = command_list_header.addr; + GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); + bool non_main = command_list_header.is_non_main; - dma_get += sizeof(u32); + if (dma_pushbuffer_subindex >= command_list.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } - if (!non_main) { - dma_mget = dma_get; - } + if (command_list_header.size == 0) { + return true; + } + + // Push buffer non-empty, read a word + const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); + ASSERT_MSG(address, "Invalid GPU address"); + + command_headers.resize(command_list_header.size); + + Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); + + for (const CommandHeader& command_header : command_headers) { // now, see if we're in the middle of a command if (dma_state.length_pending) { @@ -91,22 +109,11 @@ bool DmaPusher::Step() { break; } } - } else if (ib_enable && !dma_pushbuffer.empty()) { - // Current pushbuffer empty, but we have more IB entries to read - const CommandList& command_list{dma_pushbuffer.front()}; - const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; - dma_get = command_list_header.addr; - dma_put = dma_get + command_list_header.size * sizeof(u32); - non_main = command_list_header.is_non_main; - - if (dma_pushbuffer_subindex >= command_list.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; - } - } else { - // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do - return {}; + } + + if (!non_main) { + // TODO (degasus): This is dead code, as dma_mget is never read. + dma_mget = dma_put; } return true; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1097e5c49..27a36348c 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -75,6 +75,8 @@ private: GPU& gpu; + std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once + std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer @@ -89,11 +91,8 @@ private: DmaState dma_state{}; bool dma_increment_once{}; - GPUVAddr dma_put{}; ///< pushbuffer current end address - GPUVAddr dma_get{}; ///< pushbuffer current read address GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled - bool non_main{}; ///< non-main pushbuffer active }; } // namespace Tegra diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ec1a57226..540dcc52c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() { const u32 src_blit_y2{ static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; - const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, - regs.blit_dst_x + regs.blit_dst_width, - regs.blit_dst_y + regs.blit_dst_height}; + const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; + const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, + regs.blit_dst_x + regs.blit_dst_width, + regs.blit_dst_y + regs.blit_dst_height}; if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { UNIMPLEMENTED(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2d2136067..144e7fa82 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { auto debug_context = system.GetGPUDebugContext(); + const u32 method = method_call.method; + // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { - ASSERT(method_call.method == executing_macro + 1); + ASSERT(method == executing_macro + 1); } // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. - if (method_call.method >= MacroRegistersStart) { + if (method >= MacroRegistersStart) { // We're trying to execute a macro if (executing_macro == 0) { // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method_call.method % 2) == 0, + ASSERT_MSG((method % 2) == 0, "Can't start macro execution by writing to the ARGS register"); - executing_macro = method_call.method; + executing_macro = method; } macro_params.push_back(method_call.argument); @@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { return; } - ASSERT_MSG(method_call.method < Regs::NUM_REGS, + ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - if (regs.reg_array[method_call.method] != method_call.argument) { - regs.reg_array[method_call.method] = method_call.argument; + if (regs.reg_array[method] != method_call.argument) { + regs.reg_array[method] = method_call.argument; // Color buffers constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); - if (method_call.method >= first_rt_reg && - method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { - const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; - dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); + if (method >= first_rt_reg && + method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { + const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; + dirty_flags.color_buffer.set(rt_index); } // Zeta buffer constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); - if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || - method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || - method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || - (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && - method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { + if (method == MAXWELL3D_REG_INDEX(zeta_enable) || + method == MAXWELL3D_REG_INDEX(zeta_width) || + method == MAXWELL3D_REG_INDEX(zeta_height) || + (method >= MAXWELL3D_REG_INDEX(zeta) && + method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { dirty_flags.zeta_buffer = true; } // Shader constexpr u32 shader_registers_count = sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); - if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && - method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { + if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && + method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { dirty_flags.shaders = true; } // Vertex format - if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && - method_call.method < - MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { + if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && + method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { dirty_flags.vertex_attrib_format = true; } // Vertex buffer - if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && - method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { - dirty_flags.vertex_array |= - 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); - } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && - method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { - dirty_flags.vertex_array |= - 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); - } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && - method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { - dirty_flags.vertex_array |= - 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays)); + if (method >= MAXWELL3D_REG_INDEX(vertex_array) && + method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { + dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); + } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && + method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); + } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && + method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { + dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); } } - switch (method_call.method) { + switch (method) { case MAXWELL3D_REG_INDEX(macros.data): { ProcessMacroUpload(method_call.argument); break; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0e3873ffd..7fbf1026e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -5,8 +5,10 @@ #pragma once #include <array> +#include <bitset> #include <unordered_map> #include <vector> + #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" @@ -503,7 +505,7 @@ public: f32 translate_z; INSERT_PADDING_WORDS(2); - MathUtil::Rectangle<s32> GetRect() const { + Common::Rectangle<s32> GetRect() const { return { GetX(), // left GetY() + GetHeight(), // top @@ -1094,19 +1096,18 @@ public: MemoryManager& memory_manager; struct DirtyFlags { - u8 color_buffer = 0xFF; - bool zeta_buffer = true; - - bool shaders = true; + std::bitset<8> color_buffer{0xFF}; + std::bitset<32> vertex_array{0xFFFFFFFF}; bool vertex_attrib_format = true; - u32 vertex_array = 0xFFFFFFFF; + bool zeta_buffer = true; + bool shaders = true; void OnMemoryWrite() { - color_buffer = 0xFF; zeta_buffer = true; shaders = true; - vertex_array = 0xFFFFFFFF; + color_buffer.set(); + vertex_array.set(); } }; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 1f425f90b..252592edd 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -376,9 +376,9 @@ enum class R2pMode : u64 { }; enum class IpaInterpMode : u64 { - Linear = 0, - Perspective = 1, - Flat = 2, + Pass = 0, + Multiply = 1, + Constant = 2, Sc = 3, }; diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index cf2b76ff6..e86a7f04a 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -16,6 +16,13 @@ enum class OutputTopology : u32 { TriangleStrip = 7, }; +enum class AttributeUse : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + // Documentation in: // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture struct Header { @@ -84,9 +91,15 @@ struct Header { } vtg; struct { - INSERT_PADDING_BYTES(3); // ImapSystemValuesA - INSERT_PADDING_BYTES(1); // ImapSystemValuesB - INSERT_PADDING_BYTES(32); // ImapGenericVector[32] + INSERT_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_PADDING_BYTES(1); // ImapSystemValuesB + union { + BitField<0, 2, AttributeUse> x; + BitField<2, 2, AttributeUse> y; + BitField<4, 2, AttributeUse> w; + BitField<6, 2, AttributeUse> z; + u8 raw; + } imap_generic_vector[32]; INSERT_PADDING_BYTES(2); // ImapColor INSERT_PADDING_BYTES(2); // ImapSystemValuesC INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] @@ -103,6 +116,28 @@ struct Header { const u32 bit = render_target * 4 + component; return omap.target & (1 << bit); } + AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { + return static_cast<AttributeUse>( + (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); + } + AttributeUse GetAttributeUse(u32 attribute) const { + AttributeUse result = AttributeUse::Unused; + for (u32 i = 0; i < 4; i++) { + const auto index = GetAttributeIndexUse(attribute, i); + if (index == AttributeUse::Unused) { + continue; + } + if (result == AttributeUse::Unused || result == index) { + result = index; + continue; + } + LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); + if (index == AttributeUse::Perspective) { + result = index; + } + } + return result; + } } ps; }; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 0f5bfdcbf..6313702f2 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -100,7 +100,7 @@ struct FramebufferConfig { using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; TransformFlags transform_flags; - MathUtil::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect; }; namespace Engines { diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index bcf0c15a4..a7bcf26fb 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -129,6 +129,15 @@ protected: return ++modified_ticks; } + /// Flushes the specified object, updating appropriate cache state as needed + void FlushObject(const T& object) { + if (!object->IsDirty()) { + return; + } + object->Flush(); + object->MarkAsModified(false, *this); + } + private: /// Returns a list of cached objects from the specified memory region, ordered by access time std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { @@ -154,15 +163,6 @@ private: return objects; } - /// Flushes the specified object, updating appropriate cache state as needed - void FlushObject(const T& object) { - if (!object->IsDirty()) { - return; - } - object->Flush(); - object->MarkAsModified(false, *this); - } - using ObjectSet = std::set<T>; using ObjectCache = std::unordered_map<VAddr, T>; using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index b2a223705..6a1dc9cf6 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,8 +47,8 @@ public: /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12d876120..c8c1d6911 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -102,8 +102,8 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { + : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window}, + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { } // Rebinding the VAO invalidates the vertex buffer bindings. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); state.draw.vertex_array = vao_entry.handle; return vao_entry.handle; @@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; - if (!gpu.dirty_flags.vertex_array) + if (gpu.dirty_flags.vertex_array.none()) return; MICROPROFILE_SCOPE(OpenGL_VB); // Upload all guest vertex arrays sequentially to our buffer for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (~gpu.dirty_flags.vertex_array & (1u << index)) + if (!gpu.dirty_flags.vertex_array[index]) continue; const auto& vertex_array = regs.vertex_array[index]; @@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { } } - gpu.dirty_flags.vertex_array = 0; + gpu.dirty_flags.vertex_array.reset(); } DrawParameters RasterizerOpenGL::SetupDraw() { @@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && - !gpu.dirty_flags.zeta_buffer) { + if (fb_config_state == current_framebuffer_config_state && + gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the // host framebuffer may contain different attachments @@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() { // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); - bool invalidate = buffer_cache.Map(buffer_size); + const bool invalidate = buffer_cache.Map(buffer_size); if (invalidate) { // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); } const GLuint vao = SetupVertexFormat(); @@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); + res_cache.SignalPreDrawCall(); + // Execute draw call params.DispatchDraw(); + res_cache.SignalPostDrawCall(); + // Disable scissor test state.viewports[0].scissor.enabled = false; @@ -779,8 +783,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); return true; @@ -1034,7 +1038,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { for (std::size_t i = 0; i < viewport_count; i++) { auto& viewport = current_state.viewports[i]; const auto& src = regs.viewports[i]; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; + const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; viewport.x = viewport_rect.left; viewport.y = viewport_rect.bottom; viewport.width = viewport_rect.GetWidth(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 258d62259..2f0524f85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -62,8 +62,8 @@ public: void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) override; + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 74200914e..5fdf1164d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <optional> #include <glad/glad.h> #include "common/alignment.h" @@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return format; } -MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { +Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; if (IsPixelFormatASTC(pixel_format)) { // ASTC formats must stop at the ATSC block size boundary @@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) // alternatives. This signals a bug on those functions. const auto width = static_cast<GLsizei>(params.MipWidth(0)); const auto height = static_cast<GLsizei>(params.MipHeight(0)); + memory_size = params.MemorySize(); + reinterpreted = false; const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); gl_internal_format = format_tuple.internal_format; @@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; const auto& regs{gpu.regs}; - if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { + if (!gpu.dirty_flags.color_buffer[index]) { return last_color_buffers[index]; } - gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); + gpu.dirty_flags.color_buffer.reset(index); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); if (index >= regs.rt_control.count) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - return last_color_buffers[index] = GetSurface(color_params, preserve_contents); + return current_color_buffers[index] = GetSurface(color_params, preserve_contents); } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->LoadGLBuffer(); surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); + surface->MarkForReload(false); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { @@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres Surface surface{TryGet(params.addr)}; if (surface) { if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is + // Use the cached surface as-is unless it's not synced with memory + if (surface->MustReload()) + LoadSurface(surface); return surface; } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); + UnregisterSurface(surface); Register(new_surface); + if (new_surface->IsUploaded()) { + RegisterReinterpretSurface(new_surface); + } return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); + UnregisterSurface(surface); } } @@ -1062,8 +1071,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, } static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -1193,7 +1202,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, void RasterizerCacheOpenGL::FermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); @@ -1257,7 +1266,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - FastLayeredCopySurface(old_surface, new_surface); + if (old_params.pixel_format == new_params.pixel_format) + FastLayeredCopySurface(old_surface, new_surface); + else { + AccurateCopySurface(old_surface, new_surface); + } break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", @@ -1286,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params return {}; } +static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, + u32 height) { + for (u32 i = 0; i < params.max_mip_level; i++) { + if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { + return {i}; + } + } + return {}; +} + +static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { + const std::size_t size = params.LayerMemorySize(); + VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); + for (u32 i = 0; i < params.depth; i++) { + if (start == addr) { + return {i}; + } + start += size; + } + return {}; +} + +static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + const std::size_t src_memory_size = src_params.size_in_bytes; + const std::optional<u32> level = + TryFindBestMipMap(src_memory_size, dst_params, src_params.height); + if (level.has_value()) { + if (src_params.width == dst_params.MipWidthGobAligned(*level) && + src_params.height == dst_params.MipHeight(*level) && + src_params.block_height >= dst_params.MipBlockHeight(*level)) { + const std::optional<u32> slot = + TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); + if (slot.has_value()) { + glCopyImageSubData(render_surface->Texture().handle, + SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, + blitted_surface->Texture().handle, + SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, + dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); + blitted_surface->MarkAsModified(true, cache); + return true; + } + } + } + return false; +} + +static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { + const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); + const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); + if (bound2 > bound1) + return true; + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.component_type != src_params.component_type); +} + +static bool IsReinterpretInvalidSecond(const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.height > src_params.height && dst_params.width > src_params.width); +} + +bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, + Surface intersect) { + if (IsReinterpretInvalid(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { + if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + FlushObject(intersect); + FlushObject(triggering_surface); + intersect->MarkForReload(true); + } + return true; +} + +void RasterizerCacheOpenGL::SignalPreDrawCall() { + if (texception && GLAD_GL_ARB_texture_barrier) { + glTextureBarrier(); + } + texception = false; +} + +void RasterizerCacheOpenGL::SignalPostDrawCall() { + for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { + if (current_color_buffers[i] != nullptr) { + Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); + if (intersect != nullptr) { + PartialReinterpretSurface(current_color_buffers[i], intersect); + texception = true; + } + } + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 89d733c50..797bbdc9c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -28,12 +28,13 @@ namespace OpenGL { class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; +using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; using SurfaceTarget = VideoCore::Surface::SurfaceTarget; using SurfaceType = VideoCore::Surface::SurfaceType; using PixelFormat = VideoCore::Surface::PixelFormat; using ComponentType = VideoCore::Surface::ComponentType; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct SurfaceParams { enum class SurfaceClass { @@ -71,7 +72,7 @@ struct SurfaceParams { } /// Returns the rectangle corresponding to this surface - MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; + Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; /// Returns the total size of this surface in bytes, adjusted for compression std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { @@ -140,10 +141,18 @@ struct SurfaceParams { return offset; } + std::size_t GetMipmapSingleSize(u32 mip_level) const { + return InnerMipmapMemorySize(mip_level, false, is_layered); + } + u32 MipWidth(u32 mip_level) const { return std::max(1U, width >> mip_level); } + u32 MipWidthGobAligned(u32 mip_level) const { + return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); + } + u32 MipHeight(u32 mip_level) const { return std::max(1U, height >> mip_level); } @@ -346,6 +355,10 @@ public: return cached_size_in_bytes; } + std::size_t GetMemorySize() const { + return memory_size; + } + void Flush() override { FlushGLBuffer(); } @@ -395,6 +408,26 @@ public: Tegra::Texture::SwizzleSource swizzle_z, Tegra::Texture::SwizzleSource swizzle_w); + void MarkReinterpreted() { + reinterpreted = true; + } + + bool IsReinterpreted() const { + return reinterpreted; + } + + void MarkForReload(bool reload) { + must_reload = reload; + } + + bool MustReload() const { + return must_reload; + } + + bool IsUploaded() const { + return params.identity == SurfaceParams::SurfaceClass::Uploaded; + } + private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); @@ -408,6 +441,9 @@ private: GLenum gl_internal_format{}; std::size_t cached_size_in_bytes{}; std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; + std::size_t memory_size; + bool reinterpreted = false; + bool must_reload = false; }; class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { @@ -430,8 +466,11 @@ public: /// Copies the contents of one surface to another void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect); + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect); + + void SignalPreDrawCall(); + void SignalPostDrawCall(); private: void LoadSurface(const Surface& surface); @@ -449,6 +488,10 @@ private: /// Tries to get a reserved surface for the specified parameters Surface TryGetReservedSurface(const SurfaceParams& params); + // Partialy reinterpret a surface based on a triggering_surface that collides with it. + // returns true if the reinterpret was successful, false in case it was not. + bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); + /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); @@ -465,12 +508,50 @@ private: OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; + bool texception = false; + /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one /// using the new format. OGLBuffer copy_pbo; - std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + + using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; + using SurfaceInterval = typename SurfaceIntervalCache::interval_type; + + static auto GetReinterpretInterval(const Surface& object) { + return SurfaceInterval::right_open(object->GetAddr() + 1, + object->GetAddr() + object->GetMemorySize() - 1); + } + + // Reinterpreted surfaces are very fragil as the game may keep rendering into them. + SurfaceIntervalCache reinterpreted_surfaces; + + void RegisterReinterpretSurface(Surface reinterpret_surface) { + auto interval = GetReinterpretInterval(reinterpret_surface); + reinterpreted_surfaces.insert({interval, reinterpret_surface}); + reinterpret_surface->MarkReinterpreted(); + } + + Surface CollideOnReinterpretedSurface(VAddr addr) const { + const SurfaceInterval interval{addr}; + for (auto& pair : + boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { + return pair.second; + } + return nullptr; + } + + /// Unregisters an object from the cache + void UnregisterSurface(const Surface& object) { + if (object->IsReinterpreted()) { + auto interval = GetReinterpretInterval(object); + reinterpreted_surfaces.erase(interval); + } + Unregister(object); + } }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index db18f4dbe..72ff6ac6a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -20,6 +20,7 @@ namespace OpenGL::GLShader { using Tegra::Shader::Attribute; +using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; @@ -288,34 +289,22 @@ private: code.AddNewLine(); } - std::string GetInputFlags(const IpaMode& input_mode) { - const IpaSampleMode sample_mode = input_mode.sampling_mode; - const IpaInterpMode interp_mode = input_mode.interpolation_mode; + std::string GetInputFlags(AttributeUse attribute) { std::string out; - switch (interp_mode) { - case IpaInterpMode::Flat: + switch (attribute) { + case AttributeUse::Constant: out += "flat "; break; - case IpaInterpMode::Linear: + case AttributeUse::ScreenLinear: out += "noperspective "; break; - case IpaInterpMode::Perspective: + case AttributeUse::Perspective: // Default, Smooth break; default: - UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); - } - switch (sample_mode) { - case IpaSampleMode::Centroid: - // It can be implemented with the "centroid " keyword in GLSL - UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); - break; - case IpaSampleMode::Default: - // Default, n/a - break; - default: - UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); + LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); + UNREACHABLE(); } return out; } @@ -324,16 +313,11 @@ private: const auto& attributes = ir.GetInputAttributes(); for (const auto element : attributes) { const Attribute::Index index = element.first; - const IpaMode& input_mode = *element.second.begin(); if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { // Skip when it's not a generic attribute continue; } - ASSERT(element.second.size() > 0); - UNIMPLEMENTED_IF_MSG(element.second.size() > 1, - "Multiple input flag modes are not supported in GLSL"); - // TODO(bunnei): Use proper number of elements for these u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); if (stage != ShaderStage::Vertex) { @@ -345,8 +329,14 @@ private: if (stage == ShaderStage::Geometry) { attr = "gs_" + attr + "[]"; } - code.AddLine("layout (location = " + std::to_string(idx) + ") " + - GetInputFlags(input_mode) + "in vec4 " + attr + ';'); + std::string suffix; + if (stage == ShaderStage::Fragment) { + const auto input_mode = + header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); + suffix = GetInputFlags(input_mode); + } + code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + + attr + ';'); } if (!attributes.empty()) code.AddNewLine(); @@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st return {decompiler.GetResult(), decompiler.GetShaderEntries()}; } -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 81882822b..82fc4d44b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <cstring> #include <fmt/format.h> #include <lz4.h> diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 04e1db911..7d96649af 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in vec4 position; +layout (location = 0) in noperspective vec4 position; layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; @@ -172,4 +172,4 @@ void main() { return {out, program.second}; } -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 272fc2e8e..e60b2eb44 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -257,6 +257,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer) { texture.width = framebuffer.width; texture.height = framebuffer.height; + texture.pixel_format = framebuffer.pixel_format; GLint internal_format; switch (framebuffer.pixel_format) { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 7e13e566b..c168fa89e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -39,7 +39,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; + const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; @@ -102,7 +102,7 @@ private: /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; - MathUtil::Rectangle<int> framebuffer_crop_rect; + Common::Rectangle<int> framebuffer_crop_rect; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 17ee93b91..0451babbf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, u64 begin, u64 end) - : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} + : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 55ec601ff..38f01ca50 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); - Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, + Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass, Tegra::Shader::IpaSampleMode::Default}; u64 next_element = instr.attribute.fmt20.element; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index f9502e3d0..d750a2936 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { instr.ipa.sample_mode.Value()}; const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); - const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); + Node value = attr; + const Tegra::Shader::Attribute::Index index = attribute.index.Value(); + if (index >= Tegra::Shader::Attribute::Index::Attribute_0 && + index <= Tegra::Shader::Attribute::Index::Attribute_31) { + // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. + // In theory by setting them as perspective, OpenGL does the perspective correction. + // A way must figured to reverse the last step of it. + if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + } + } + value = GetSaturatedFloat(value, instr.ipa.saturate); SetRegister(bb, instr.gpr0, value); break; @@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index be4635342..33b071747 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, return {node, cursor}; } if (const auto conditional = std::get_if<ConditionalNode>(node)) { - const auto& code = conditional->GetCode(); - const auto [found, internal_cursor] = - FindOperation(code, static_cast<s64>(code.size() - 1), operation_code); + const auto& conditional_code = conditional->GetCode(); + const auto [found, internal_cursor] = FindOperation( + conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); if (found) return {found, cursor}; } @@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { - const auto& code = conditional->GetCode(); - return TrackCbuf(tracked, code, static_cast<s64>(code.size())); + const auto& conditional_code = conditional->GetCode(); + return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); } return nullptr; } |