From 93ac5a6a6d316966c1d288f8b83610bb48143a04 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 21 Oct 2022 01:46:51 +0200 Subject: MacroHLE: Add Index Buffer size estimation. --- src/video_core/engines/maxwell_3d.cpp | 7 +++++ src/video_core/engines/maxwell_3d.h | 2 ++ src/video_core/macro/macro_hle.cpp | 20 ++++++++----- src/video_core/memory_manager.cpp | 53 +++++++++++++++++++++++++++++++++-- src/video_core/memory_manager.h | 2 ++ 5 files changed, 74 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b998a8e69..a0dd7400d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -179,6 +179,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() { return num_vertices; } +size_t Maxwell3D::EstimateIndexBufferSize() { + GPUVAddr start_address = regs.index_buffer.StartAddress(); + GPUVAddr end_address = regs.index_buffer.EndAddress(); + return std::min(memory_manager.GetMemoryLayoutSize(start_address), + static_cast(end_address - start_address)); +} + u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { // Keep track of the register value in shadow_state when requested. const auto control = shadow_state.shadow_ram_control; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e2256594d..cfe1e4883 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3077,6 +3077,8 @@ public: u32 GetMaxCurrentVertices(); + size_t EstimateIndexBufferSize(); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(u32 layer_count); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 79fab96e1..93b6d42a4 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -163,12 +163,16 @@ public: maxwell3d.RefreshParameters(); minimum_limit = std::max(parameters[3], minimum_limit); } - - const u32 base_vertex = parameters[8]; - const u32 base_instance = parameters[9]; - maxwell3d.regs.vertex_id_base = base_vertex; + const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); + const u32 base_size = std::max(minimum_limit, estimate); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.index_buffer.first = 0; + maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping + maxwell3d.regs.draw.topology.Assign(topology); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.CallMethod(0x8e3, 0x640, true); - maxwell3d.CallMethod(0x8e4, base_vertex, true); + maxwell3d.CallMethod(0x8e4, element_base, true); maxwell3d.CallMethod(0x8e5, base_instance, true); auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; @@ -179,7 +183,7 @@ public: params.max_draw_counts = 1; params.stride = 0; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, minimum_limit); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); maxwell3d.CallMethod(0x8e3, 0x640, true); maxwell3d.CallMethod(0x8e4, 0x0, true); maxwell3d.CallMethod(0x8e5, 0x0, true); @@ -271,9 +275,11 @@ public: if (check_limit) { minimum_limit = std::max(highest_limit, minimum_limit); } + const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); + const u32 base_size = std::max(minimum_limit, estimate); maxwell3d.regs.index_buffer.first = 0; - maxwell3d.regs.index_buffer.count = std::max(highest_limit, minimum_limit); + maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_indexed = true; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8c8dfcca6..8f6c51045 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -325,9 +325,15 @@ template ; - static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v; - static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v; + using FuncMappedReturn = + typename std::invoke_result::type; + using FuncReservedReturn = + typename std::invoke_result::type; + using FuncUnmappedReturn = + typename std::invoke_result::type; + static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v; + static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v; + static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v; u64 used_page_size; u64 used_page_mask; u64 used_page_bits; @@ -571,6 +577,47 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { return range_so_far; } +size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { + PTEKind base_kind = GetPageKind(gpu_addr); + if (base_kind == PTEKind::INVALID) { + return 0; + } + size_t range_so_far = 0; + bool result{false}; + auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, + std::size_t copy_amount) { + result = true; + return true; + }; + auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + PTEKind base_kind_other = GetKind((page_index << page_bits) + offset); + if (base_kind != base_kind_other) { + result = true; + return true; + } + range_so_far += copy_amount; + return false; + }; + auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { + PTEKind base_kind_other = GetKind((page_index << big_page_bits) + offset); + if (base_kind != base_kind_other) { + result = true; + return true; + } + range_so_far += copy_amount; + return false; + }; + auto check_short_pages = [&](std::size_t page_index, std::size_t offset, + std::size_t copy_amount) { + GPUVAddr base = (page_index << big_page_bits) + offset; + MemoryOperation(base, copy_amount, short_check, fail, fail); + return result; + }; + MemoryOperation(gpu_addr, address_space_size - gpu_addr, big_check, fail, + check_short_pages); + return range_so_far; +} + void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index ab4bc9ec6..65f6e8134 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -118,6 +118,8 @@ public: PTEKind GetPageKind(GPUVAddr gpu_addr) const; + size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; + private: template inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, -- cgit v1.2.3