diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 2 | ||||
-rw-r--r-- | src/video_core/macro/macro_hle.cpp | 63 | ||||
-rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 62 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/textures/astc.cpp | 2 |
5 files changed, 79 insertions, 52 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 3441a5fe5..d608678a3 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -1065,7 +1065,7 @@ TexelWeightParams DecodeBlockInfo() { void FillError(ivec3 coord) { for (uint j = 0; j < block_dims.y; j++) { for (uint i = 0; i < block_dims.x; i++) { - imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0)); + imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); } } } diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 58382755b..cabe8dcbf 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -3,6 +3,8 @@ #include <array> #include <vector> +#include "common/scope_exit.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" @@ -58,6 +60,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_array.first = parameters[3]; maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? maxwell3d.regs.index_array.count = parameters[1]; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.regs.vb_element_base = element_base; maxwell3d.regs.vb_base_instance = base_instance; maxwell3d.mme_draw.instance_count = instance_count; @@ -80,10 +83,67 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; } -constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ +// Multidraw Indirect +void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { + SCOPE_EXIT({ + // Clean everything. + maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? + maxwell3d.regs.index_array.count = 0; + maxwell3d.regs.vb_element_base = 0x0; + maxwell3d.regs.vb_base_instance = 0x0; + maxwell3d.mme_draw.instance_count = 0; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, 0x0); + maxwell3d.CallMethodFromMME(0x8e5, 0x0); + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + }); + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + const auto topology = + static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); + maxwell3d.regs.draw.topology.Assign(topology); + const u32 padding = parameters[3]; + const std::size_t max_draws = parameters[4]; + + const u32 indirect_words = 5 + padding; + const std::size_t first_draw = start_indirect; + const std::size_t effective_draws = end_indirect - start_indirect; + const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws); + + for (std::size_t index = first_draw; index < last_draw; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 num_vertices = parameters[base]; + const u32 instance_count = parameters[base + 1]; + const u32 first_index = parameters[base + 2]; + const u32 base_vertex = parameters[base + 3]; + const u32 base_instance = parameters[base + 4]; + maxwell3d.regs.index_array.first = first_index; + maxwell3d.regs.reg_array[0x446] = base_vertex; + maxwell3d.regs.index_array.count = num_vertices; + maxwell3d.regs.vb_element_base = base_vertex; + maxwell3d.regs.vb_base_instance = base_instance; + maxwell3d.mme_draw.instance_count = instance_count; + maxwell3d.CallMethodFromMME(0x8e3, 0x640); + maxwell3d.CallMethodFromMME(0x8e4, base_vertex); + maxwell3d.CallMethodFromMME(0x8e5, base_instance); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + if (maxwell3d.ShouldExecute()) { + maxwell3d.Rasterizer().Draw(true, true); + } + maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; + } +} + +constexpr std::array<std::pair<u64, HLEFunction>, 4> hle_funcs{{ {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, {0x0217920100488FF7, &HLE_0217920100488FF7}, + {0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164}, }}; class HLEMacroImpl final : public CachedMacro { @@ -99,6 +159,7 @@ private: Engines::Maxwell3D& maxwell3d; HLEFunction func; }; + } // Anonymous namespace HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index aca25d902..a302a9603 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -279,28 +279,13 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { auto dst = Compile_GetRegister(opcode.src_a, RESULT); auto src = Compile_GetRegister(opcode.src_b, eax); - if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { - shr(src, opcode.bf_src_bit); - } else if (opcode.bf_src_bit == 31) { - xor_(src, src); - } - // Don't bother masking the whole register since we're using a 32 bit register - if (opcode.bf_size != 31 && opcode.bf_size != 0) { - and_(src, opcode.GetBitfieldMask()); - } else if (opcode.bf_size == 0) { - xor_(src, src); - } - if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { - shl(src, opcode.bf_dst_bit); - } else if (opcode.bf_dst_bit == 31) { - xor_(src, src); - } - const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); - if (mask != 0xffffffff) { - and_(dst, mask); - } + and_(dst, mask); + shr(src, opcode.bf_src_bit); + and_(src, opcode.GetBitfieldMask()); + shl(src, opcode.bf_dst_bit); or_(dst, src); + Compile_ProcessResult(opcode.result_operation, opcode.dst); } @@ -309,17 +294,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { const auto src = Compile_GetRegister(opcode.src_b, RESULT); shr(src, dst.cvt8()); - if (opcode.bf_size != 0 && opcode.bf_size != 31) { - and_(src, opcode.GetBitfieldMask()); - } else if (opcode.bf_size == 0) { - xor_(src, src); - } + and_(src, opcode.GetBitfieldMask()); + shl(src, opcode.bf_dst_bit); - if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { - shl(src, opcode.bf_dst_bit); - } else if (opcode.bf_dst_bit == 31) { - xor_(src, src); - } Compile_ProcessResult(opcode.result_operation, opcode.dst); } @@ -327,13 +304,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto src = Compile_GetRegister(opcode.src_b, RESULT); - if (opcode.bf_src_bit != 0) { - shr(src, opcode.bf_src_bit); - } - - if (opcode.bf_size != 31) { - and_(src, opcode.GetBitfieldMask()); - } + shr(src, opcode.bf_src_bit); + and_(src, opcode.GetBitfieldMask()); shl(src, dst.cvt8()); Compile_ProcessResult(opcode.result_operation, opcode.dst); @@ -429,17 +401,11 @@ void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { Xbyak::Label handle_post_exit{}; Xbyak::Label skip{}; jmp(skip, T_NEAR); - if (opcode.is_exit) { - L(handle_post_exit); - // Execute 1 instruction - mov(BRANCH_HOLDER, end_of_code); - // Jump to next instruction to skip delay slot check - jmp(labels[jump_address], T_NEAR); - } else { - L(handle_post_exit); - xor_(BRANCH_HOLDER, BRANCH_HOLDER); - jmp(labels[jump_address], T_NEAR); - } + + L(handle_post_exit); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + L(skip); mov(BRANCH_HOLDER, handle_post_exit); jmp(delay_skip[pc], T_NEAR); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 32450ee1d..08f4d69ab 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -168,7 +168,7 @@ void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { if (has_unified_vertex_buffers) { buffer.MakeResident(GL_READ_ONLY); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, - static_cast<GLsizeiptr>(size)); + static_cast<GLsizeiptr>(Common::AlignUp(size, 4))); } else { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); index_buffer_offset = offset; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index b159494c5..e3742ddf5 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1413,7 +1413,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 b static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { for (u32 j = 0; j < blockHeight; j++) { for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = 0xFFFF00FF; + outBuf[j * blockWidth + i] = 0x00000000; } } } |