summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp2
-rw-r--r--src/video_core/macro/macro_hle.cpp63
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp62
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp2
-rw-r--r--src/video_core/textures/astc.cpp2
5 files changed, 79 insertions, 52 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 3441a5fe5..d608678a3 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -1065,7 +1065,7 @@ TexelWeightParams DecodeBlockInfo() {
void FillError(ivec3 coord) {
for (uint j = 0; j < block_dims.y; j++) {
for (uint i = 0; i < block_dims.x; i++) {
- imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0));
+ imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
}
}
}
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 58382755b..cabe8dcbf 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -3,6 +3,8 @@
#include <array>
#include <vector>
+#include "common/scope_exit.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/macro/macro.h"
#include "video_core/macro/macro_hle.h"
@@ -58,6 +60,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_array.first = parameters[3];
maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.regs.vb_element_base = element_base;
maxwell3d.regs.vb_base_instance = base_instance;
maxwell3d.mme_draw.instance_count = instance_count;
@@ -80,10 +83,67 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
-constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
+// Multidraw Indirect
+void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
+ SCOPE_EXIT({
+ // Clean everything.
+ maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.regs.vb_element_base = 0x0;
+ maxwell3d.regs.vb_base_instance = 0x0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, 0x0);
+ maxwell3d.CallMethodFromMME(0x8e5, 0x0);
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ });
+ const u32 start_indirect = parameters[0];
+ const u32 end_indirect = parameters[1];
+ if (start_indirect >= end_indirect) {
+ // Nothing to do.
+ return;
+ }
+ const auto topology =
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
+ maxwell3d.regs.draw.topology.Assign(topology);
+ const u32 padding = parameters[3];
+ const std::size_t max_draws = parameters[4];
+
+ const u32 indirect_words = 5 + padding;
+ const std::size_t first_draw = start_indirect;
+ const std::size_t effective_draws = end_indirect - start_indirect;
+ const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
+
+ for (std::size_t index = first_draw; index < last_draw; index++) {
+ const std::size_t base = index * indirect_words + 5;
+ const u32 num_vertices = parameters[base];
+ const u32 instance_count = parameters[base + 1];
+ const u32 first_index = parameters[base + 2];
+ const u32 base_vertex = parameters[base + 3];
+ const u32 base_instance = parameters[base + 4];
+ maxwell3d.regs.index_array.first = first_index;
+ maxwell3d.regs.reg_array[0x446] = base_vertex;
+ maxwell3d.regs.index_array.count = num_vertices;
+ maxwell3d.regs.vb_element_base = base_vertex;
+ maxwell3d.regs.vb_base_instance = base_instance;
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, base_vertex);
+ maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.Rasterizer().Draw(true, true);
+ }
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ }
+}
+
+constexpr std::array<std::pair<u64, HLEFunction>, 4> hle_funcs{{
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
{0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD},
{0x0217920100488FF7, &HLE_0217920100488FF7},
+ {0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164},
}};
class HLEMacroImpl final : public CachedMacro {
@@ -99,6 +159,7 @@ private:
Engines::Maxwell3D& maxwell3d;
HLEFunction func;
};
+
} // Anonymous namespace
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index aca25d902..a302a9603 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -279,28 +279,13 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
auto dst = Compile_GetRegister(opcode.src_a, RESULT);
auto src = Compile_GetRegister(opcode.src_b, eax);
- if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) {
- shr(src, opcode.bf_src_bit);
- } else if (opcode.bf_src_bit == 31) {
- xor_(src, src);
- }
- // Don't bother masking the whole register since we're using a 32 bit register
- if (opcode.bf_size != 31 && opcode.bf_size != 0) {
- and_(src, opcode.GetBitfieldMask());
- } else if (opcode.bf_size == 0) {
- xor_(src, src);
- }
- if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) {
- shl(src, opcode.bf_dst_bit);
- } else if (opcode.bf_dst_bit == 31) {
- xor_(src, src);
- }
-
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
- if (mask != 0xffffffff) {
- and_(dst, mask);
- }
+ and_(dst, mask);
+ shr(src, opcode.bf_src_bit);
+ and_(src, opcode.GetBitfieldMask());
+ shl(src, opcode.bf_dst_bit);
or_(dst, src);
+
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
@@ -309,17 +294,9 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
shr(src, dst.cvt8());
- if (opcode.bf_size != 0 && opcode.bf_size != 31) {
- and_(src, opcode.GetBitfieldMask());
- } else if (opcode.bf_size == 0) {
- xor_(src, src);
- }
+ and_(src, opcode.GetBitfieldMask());
+ shl(src, opcode.bf_dst_bit);
- if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) {
- shl(src, opcode.bf_dst_bit);
- } else if (opcode.bf_dst_bit == 31) {
- xor_(src, src);
- }
Compile_ProcessResult(opcode.result_operation, opcode.dst);
}
@@ -327,13 +304,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
const auto dst = Compile_GetRegister(opcode.src_a, ecx);
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
- if (opcode.bf_src_bit != 0) {
- shr(src, opcode.bf_src_bit);
- }
-
- if (opcode.bf_size != 31) {
- and_(src, opcode.GetBitfieldMask());
- }
+ shr(src, opcode.bf_src_bit);
+ and_(src, opcode.GetBitfieldMask());
shl(src, dst.cvt8());
Compile_ProcessResult(opcode.result_operation, opcode.dst);
@@ -429,17 +401,11 @@ void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
Xbyak::Label handle_post_exit{};
Xbyak::Label skip{};
jmp(skip, T_NEAR);
- if (opcode.is_exit) {
- L(handle_post_exit);
- // Execute 1 instruction
- mov(BRANCH_HOLDER, end_of_code);
- // Jump to next instruction to skip delay slot check
- jmp(labels[jump_address], T_NEAR);
- } else {
- L(handle_post_exit);
- xor_(BRANCH_HOLDER, BRANCH_HOLDER);
- jmp(labels[jump_address], T_NEAR);
- }
+
+ L(handle_post_exit);
+ xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+ jmp(labels[jump_address], T_NEAR);
+
L(skip);
mov(BRANCH_HOLDER, handle_post_exit);
jmp(delay_skip[pc], T_NEAR);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 32450ee1d..08f4d69ab 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -168,7 +168,7 @@ void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
- static_cast<GLsizeiptr>(size));
+ static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
} else {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
index_buffer_offset = offset;
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b159494c5..e3742ddf5 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1413,7 +1413,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 b
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
- outBuf[j * blockWidth + i] = 0xFFFF00FF;
+ outBuf[j * blockWidth + i] = 0x00000000;
}
}
}