diff options
-rw-r--r-- | CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 4 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 3 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/rescaling_pass.cpp | 29 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 42 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 35 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 3 | ||||
-rw-r--r-- | src/yuzu_cmd/default_ini.h | 6 |
8 files changed, 118 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 4bf55d664..11a459813 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -363,7 +363,11 @@ if(ENABLE_QT) set(YUZU_QT_NO_CMAKE_SYSTEM_PATH "NO_CMAKE_SYSTEM_PATH") endif() - find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH}) + if ((${CMAKE_SYSTEM_NAME} STREQUAL "Linux") AND YUZU_USE_BUNDLED_QT) + find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets DBus ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH}) + else() + find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH}) + endif() if (YUZU_USE_QT_WEB_ENGINE) find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets) endif() diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 248ad3ced..b22725584 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::SsaRewritePass(program); + Optimization::ConstantPropagationPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - Optimization::ConstantPropagationPass(program); - if (Settings::values.resolution_info.active) { Optimization::RescalingPass(program); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 38592afd0..ddf497e32 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32 && + inst->GetOpcode() != IR::Opcode::GetCbufU32x2) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index c28500dd1..496d4667e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s } } +void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; + switch (info.type) { + case TextureType::ColorArray2D: + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); + break; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const IR::Value coord{inst.Arg(1)}; @@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { @@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void PatchImageRead(IR::Block& block, IR::Inst& inst) { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ba19d1ca2..54a902f56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(4); case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): return DrawArrays(); + case MAXWELL3D_REG_INDEX(small_index): + regs.index_array.count = regs.small_index.count; + regs.index_array.first = regs.small_index.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + return DrawArrays(); + case MAXWELL3D_REG_INDEX(topology_override): + use_topology_override = true; + return; case MAXWELL3D_REG_INDEX(clear_buffers): return ProcessClearBuffers(); case MAXWELL3D_REG_INDEX(query.query_get): @@ -352,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } } +void Maxwell3D::ProcessTopologyOverride() { + using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; + using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; + + PrimitiveTopology topology{}; + + switch (regs.topology_override) { + case PrimitiveTopologyOverride::None: + topology = regs.draw.topology; + break; + case PrimitiveTopologyOverride::Points: + topology = PrimitiveTopology::Points; + break; + case PrimitiveTopologyOverride::Lines: + topology = PrimitiveTopology::Lines; + break; + case PrimitiveTopologyOverride::LineStrip: + topology = PrimitiveTopology::LineStrip; + break; + default: + topology = static_cast<PrimitiveTopology>(regs.topology_override); + break; + } + + if (use_topology_override) { + regs.draw.topology.Assign(topology); + } +} + void Maxwell3D::FlushMMEInlineDraw() { LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); @@ -362,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { rasterizer->Draw(is_indexed, true); @@ -521,6 +561,8 @@ void Maxwell3D::DrawArrays() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + if (regs.draw.instance_next) { // Increment the current instance *before* drawing. state.current_instance += 1; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 38d9b6660..357a74c70 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -367,6 +367,22 @@ public: Patches = 0xe, }; + // Constants as from NVC0_3D_UNK1970_D3D + // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598 + enum class PrimitiveTopologyOverride : u32 { + None = 0x0, + Points = 0x1, + Lines = 0x2, + LineStrip = 0x3, + Triangles = 0x4, + TriangleStrip = 0x5, + LinesAdjacency = 0xa, + LineStripAdjacency = 0xb, + TrianglesAdjacency = 0xc, + TriangleStripAdjacency = 0xd, + Patches = 0xe, + }; + enum class IndexFormat : u32 { UnsignedByte = 0x0, UnsignedShort = 0x1, @@ -1200,7 +1216,12 @@ public: } } index_array; - INSERT_PADDING_WORDS_NOINIT(0x7); + union { + BitField<0, 16, u32> first; + BitField<16, 16, u32> count; + } small_index; + + INSERT_PADDING_WORDS_NOINIT(0x6); INSERT_PADDING_WORDS_NOINIT(0x1F); @@ -1244,7 +1265,11 @@ public: BitField<11, 1, u32> depth_clamp_disabled; } view_volume_clip_control; - INSERT_PADDING_WORDS_NOINIT(0x1F); + INSERT_PADDING_WORDS_NOINIT(0xC); + + PrimitiveTopologyOverride topology_override; + + INSERT_PADDING_WORDS_NOINIT(0x12); u32 depth_bounds_enable; @@ -1529,6 +1554,9 @@ private: /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); + /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) + void ProcessTopologyOverride(); + // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); @@ -1556,6 +1584,7 @@ private: Upload::State upload_state; bool execute_on{true}; + bool use_topology_override{false}; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1672,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); +ASSERT_REG_POSITION(small_index, 0x5F9); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(vp_point_size, 0x644); @@ -1681,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); +ASSERT_REG_POSITION(topology_override, 0x65C); ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0f62779de..ca6019a3a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::A8B8G8R8_UNORM: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM || + src_view.format == PixelFormat::D24_UNORM_S8_UINT) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); } break; diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 3ac1440c9..34782c378 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -124,7 +124,11 @@ keyboard_enabled = [Core] # Whether to use multi-core for CPU emulation # 0: Disabled, 1 (default): Enabled -use_multi_core= +use_multi_core = + +# Enable extended guest system memory layout (6GB DRAM) +# 0 (default): Disabled, 1: Enabled +use_extended_memory_layout = [Cpu] # Adjusts various optimizations. |