From 99507d0188a1f7f7d8e11741f935918c7643ce76 Mon Sep 17 00:00:00 2001 From: FengChen Date: Sun, 16 Oct 2022 23:49:32 +0800 Subject: video_core: Implement memory manager page kind --- src/video_core/CMakeLists.txt | 1 + src/video_core/memory_manager.cpp | 61 +++++++-- src/video_core/memory_manager.h | 21 ++- src/video_core/pte_kind.h | 264 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 334 insertions(+), 13 deletions(-) create mode 100644 src/video_core/pte_kind.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 40e6d1ec4..cb8b46edf 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -82,6 +82,7 @@ add_library(video_core STATIC gpu_thread.h memory_manager.cpp memory_manager.h + pte_kind.h query_cache.h rasterizer_accelerated.cpp rasterizer_accelerated.h diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index cca401c74..d07b21bd6 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -41,7 +41,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_entries.resize(big_page_table_size / 32, 0); big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); + std::array kind_valus; + kind_valus.fill(PTEKind::INVALID); + big_kinds.resize(big_page_table_size / 32, kind_valus); entries.resize(page_table_size / 32, 0); + kinds.resize(big_page_table_size / 32, kind_valus); } MemoryManager::~MemoryManager() = default; @@ -78,6 +82,41 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { } } +PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { + auto entry = GetEntry(gpu_addr); + if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] { + return GetKind(gpu_addr); + } else { + return GetKind(gpu_addr); + } +} + +template +PTEKind MemoryManager::GetKind(size_t position) const { + if constexpr (is_big_page) { + position = position >> big_page_bits; + const size_t sub_index = position % 32; + return big_kinds[position / 32][sub_index]; + } else { + position = position >> page_bits; + const size_t sub_index = position % 32; + return kinds[position / 32][sub_index]; + } +} + +template +void MemoryManager::SetKind(size_t position, PTEKind kind) { + if constexpr (is_big_page) { + position = position >> big_page_bits; + const size_t sub_index = position % 32; + big_kinds[position / 32][sub_index] = kind; + } else { + position = position >> page_bits; + const size_t sub_index = position % 32; + kinds[position / 32][sub_index] = kind; + } +} + inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { const u64 entry_mask = big_page_continous[big_page_index / continous_bits]; const size_t sub_index = big_page_index % continous_bits; @@ -92,8 +131,8 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value } template -GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, - size_t size) { +GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, + PTEKind kind) { u64 remaining_size{size}; if constexpr (entry_type == EntryType::Mapped) { page_table.ReserveRange(gpu_addr, size); @@ -102,6 +141,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); + SetKind(current_gpu_addr, kind); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); } @@ -118,12 +158,13 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp template GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, - size_t size) { + size_t size, PTEKind kind) { u64 remaining_size{size}; for (u64 offset{}; offset < size; offset += big_page_size) { const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); + SetKind(current_gpu_addr, kind); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); } @@ -159,19 +200,19 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) rasterizer = rasterizer_; } -GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, +GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, bool is_big_pages) { if (is_big_pages) [[likely]] { - return BigPageTableOp(gpu_addr, cpu_addr, size); + return BigPageTableOp(gpu_addr, cpu_addr, size, kind); } - return PageTableOp(gpu_addr, cpu_addr, size); + return PageTableOp(gpu_addr, cpu_addr, size, kind); } GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { if (is_big_pages) [[likely]] { - return BigPageTableOp(gpu_addr, 0, size); + return BigPageTableOp(gpu_addr, 0, size, PTEKind::INVALID); } - return PageTableOp(gpu_addr, 0, size); + return PageTableOp(gpu_addr, 0, size, PTEKind::INVALID); } void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { @@ -188,8 +229,8 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { rasterizer->UnmapMemory(*cpu_addr, map_size); } - BigPageTableOp(gpu_addr, 0, size); - PageTableOp(gpu_addr, 0, size); + BigPageTableOp(gpu_addr, 0, size, PTEKind::INVALID); + PageTableOp(gpu_addr, 0, size, PTEKind::INVALID); } std::optional MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index f992e29f3..ab4bc9ec6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/multi_level_page_table.h" #include "common/virtual_buffer.h" +#include "video_core/pte_kind.h" namespace VideoCore { class RasterizerInterface; @@ -98,7 +99,8 @@ public: std::vector> GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const; - GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true); + GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, + PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); void Unmap(GPUVAddr gpu_addr, std::size_t size); @@ -114,6 +116,8 @@ public: return gpu_addr < address_space_size; } + PTEKind GetPageKind(GPUVAddr gpu_addr) const; + private: template inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, @@ -166,10 +170,12 @@ private: std::vector big_entries; template - GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); + GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, + PTEKind kind); template - GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); + GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, + PTEKind kind); template inline EntryType GetEntry(size_t position) const; @@ -177,6 +183,15 @@ private: template inline void SetEntry(size_t position, EntryType entry); + std::vector> kinds; + std::vector> big_kinds; + + template + inline PTEKind GetKind(size_t position) const; + + template + inline void SetKind(size_t position, PTEKind kind); + Common::MultiLevelPageTable page_table; Common::VirtualBuffer big_page_table_cpu; diff --git a/src/video_core/pte_kind.h b/src/video_core/pte_kind.h new file mode 100644 index 000000000..591d7214b --- /dev/null +++ b/src/video_core/pte_kind.h @@ -0,0 +1,264 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_types.h" + +namespace Tegra { + +// https://github.com/NVIDIA/open-gpu-doc/blob/master/manuals/volta/gv100/dev_mmu.ref.txt +enum class PTEKind : u8 { + INVALID = 0xff, + PITCH = 0x00, + Z16 = 0x01, + Z16_2C = 0x02, + Z16_MS2_2C = 0x03, + Z16_MS4_2C = 0x04, + Z16_MS8_2C = 0x05, + Z16_MS16_2C = 0x06, + Z16_2Z = 0x07, + Z16_MS2_2Z = 0x08, + Z16_MS4_2Z = 0x09, + Z16_MS8_2Z = 0x0a, + Z16_MS16_2Z = 0x0b, + Z16_2CZ = 0x36, + Z16_MS2_2CZ = 0x37, + Z16_MS4_2CZ = 0x38, + Z16_MS8_2CZ = 0x39, + Z16_MS16_2CZ = 0x5f, + Z16_4CZ = 0x0c, + Z16_MS2_4CZ = 0x0d, + Z16_MS4_4CZ = 0x0e, + Z16_MS8_4CZ = 0x0f, + Z16_MS16_4CZ = 0x10, + S8Z24 = 0x11, + S8Z24_1Z = 0x12, + S8Z24_MS2_1Z = 0x13, + S8Z24_MS4_1Z = 0x14, + S8Z24_MS8_1Z = 0x15, + S8Z24_MS16_1Z = 0x16, + S8Z24_2CZ = 0x17, + S8Z24_MS2_2CZ = 0x18, + S8Z24_MS4_2CZ = 0x19, + S8Z24_MS8_2CZ = 0x1a, + S8Z24_MS16_2CZ = 0x1b, + S8Z24_2CS = 0x1c, + S8Z24_MS2_2CS = 0x1d, + S8Z24_MS4_2CS = 0x1e, + S8Z24_MS8_2CS = 0x1f, + S8Z24_MS16_2CS = 0x20, + S8Z24_4CSZV = 0x21, + S8Z24_MS2_4CSZV = 0x22, + S8Z24_MS4_4CSZV = 0x23, + S8Z24_MS8_4CSZV = 0x24, + S8Z24_MS16_4CSZV = 0x25, + V8Z24_MS4_VC12 = 0x26, + V8Z24_MS4_VC4 = 0x27, + V8Z24_MS8_VC8 = 0x28, + V8Z24_MS8_VC24 = 0x29, + V8Z24_MS4_VC12_1ZV = 0x2e, + V8Z24_MS4_VC4_1ZV = 0x2f, + V8Z24_MS8_VC8_1ZV = 0x30, + V8Z24_MS8_VC24_1ZV = 0x31, + V8Z24_MS4_VC12_2CS = 0x32, + V8Z24_MS4_VC4_2CS = 0x33, + V8Z24_MS8_VC8_2CS = 0x34, + V8Z24_MS8_VC24_2CS = 0x35, + V8Z24_MS4_VC12_2CZV = 0x3a, + V8Z24_MS4_VC4_2CZV = 0x3b, + V8Z24_MS8_VC8_2CZV = 0x3c, + V8Z24_MS8_VC24_2CZV = 0x3d, + V8Z24_MS4_VC12_2ZV = 0x3e, + V8Z24_MS4_VC4_2ZV = 0x3f, + V8Z24_MS8_VC8_2ZV = 0x40, + V8Z24_MS8_VC24_2ZV = 0x41, + V8Z24_MS4_VC12_4CSZV = 0x42, + V8Z24_MS4_VC4_4CSZV = 0x43, + V8Z24_MS8_VC8_4CSZV = 0x44, + V8Z24_MS8_VC24_4CSZV = 0x45, + Z24S8 = 0x46, + Z24S8_1Z = 0x47, + Z24S8_MS2_1Z = 0x48, + Z24S8_MS4_1Z = 0x49, + Z24S8_MS8_1Z = 0x4a, + Z24S8_MS16_1Z = 0x4b, + Z24S8_2CS = 0x4c, + Z24S8_MS2_2CS = 0x4d, + Z24S8_MS4_2CS = 0x4e, + Z24S8_MS8_2CS = 0x4f, + Z24S8_MS16_2CS = 0x50, + Z24S8_2CZ = 0x51, + Z24S8_MS2_2CZ = 0x52, + Z24S8_MS4_2CZ = 0x53, + Z24S8_MS8_2CZ = 0x54, + Z24S8_MS16_2CZ = 0x55, + Z24S8_4CSZV = 0x56, + Z24S8_MS2_4CSZV = 0x57, + Z24S8_MS4_4CSZV = 0x58, + Z24S8_MS8_4CSZV = 0x59, + Z24S8_MS16_4CSZV = 0x5a, + Z24V8_MS4_VC12 = 0x5b, + Z24V8_MS4_VC4 = 0x5c, + Z24V8_MS8_VC8 = 0x5d, + Z24V8_MS8_VC24 = 0x5e, + YUV_B8C1_2Y = 0x60, + YUV_B8C2_2Y = 0x61, + YUV_B10C1_2Y = 0x62, + YUV_B10C2_2Y = 0x6b, + YUV_B12C1_2Y = 0x6c, + YUV_B12C2_2Y = 0x6d, + Z24V8_MS4_VC12_1ZV = 0x63, + Z24V8_MS4_VC4_1ZV = 0x64, + Z24V8_MS8_VC8_1ZV = 0x65, + Z24V8_MS8_VC24_1ZV = 0x66, + Z24V8_MS4_VC12_2CS = 0x67, + Z24V8_MS4_VC4_2CS = 0x68, + Z24V8_MS8_VC8_2CS = 0x69, + Z24V8_MS8_VC24_2CS = 0x6a, + Z24V8_MS4_VC12_2CZV = 0x6f, + Z24V8_MS4_VC4_2CZV = 0x70, + Z24V8_MS8_VC8_2CZV = 0x71, + Z24V8_MS8_VC24_2CZV = 0x72, + Z24V8_MS4_VC12_2ZV = 0x73, + Z24V8_MS4_VC4_2ZV = 0x74, + Z24V8_MS8_VC8_2ZV = 0x75, + Z24V8_MS8_VC24_2ZV = 0x76, + Z24V8_MS4_VC12_4CSZV = 0x77, + Z24V8_MS4_VC4_4CSZV = 0x78, + Z24V8_MS8_VC8_4CSZV = 0x79, + Z24V8_MS8_VC24_4CSZV = 0x7a, + ZF32 = 0x7b, + ZF32_1Z = 0x7c, + ZF32_MS2_1Z = 0x7d, + ZF32_MS4_1Z = 0x7e, + ZF32_MS8_1Z = 0x7f, + ZF32_MS16_1Z = 0x80, + ZF32_2CS = 0x81, + ZF32_MS2_2CS = 0x82, + ZF32_MS4_2CS = 0x83, + ZF32_MS8_2CS = 0x84, + ZF32_MS16_2CS = 0x85, + ZF32_2CZ = 0x86, + ZF32_MS2_2CZ = 0x87, + ZF32_MS4_2CZ = 0x88, + ZF32_MS8_2CZ = 0x89, + ZF32_MS16_2CZ = 0x8a, + X8Z24_X16V8S8_MS4_VC12 = 0x8b, + X8Z24_X16V8S8_MS4_VC4 = 0x8c, + X8Z24_X16V8S8_MS8_VC8 = 0x8d, + X8Z24_X16V8S8_MS8_VC24 = 0x8e, + X8Z24_X16V8S8_MS4_VC12_1CS = 0x8f, + X8Z24_X16V8S8_MS4_VC4_1CS = 0x90, + X8Z24_X16V8S8_MS8_VC8_1CS = 0x91, + X8Z24_X16V8S8_MS8_VC24_1CS = 0x92, + X8Z24_X16V8S8_MS4_VC12_1ZV = 0x97, + X8Z24_X16V8S8_MS4_VC4_1ZV = 0x98, + X8Z24_X16V8S8_MS8_VC8_1ZV = 0x99, + X8Z24_X16V8S8_MS8_VC24_1ZV = 0x9a, + X8Z24_X16V8S8_MS4_VC12_1CZV = 0x9b, + X8Z24_X16V8S8_MS4_VC4_1CZV = 0x9c, + X8Z24_X16V8S8_MS8_VC8_1CZV = 0x9d, + X8Z24_X16V8S8_MS8_VC24_1CZV = 0x9e, + X8Z24_X16V8S8_MS4_VC12_2CS = 0x9f, + X8Z24_X16V8S8_MS4_VC4_2CS = 0xa0, + X8Z24_X16V8S8_MS8_VC8_2CS = 0xa1, + X8Z24_X16V8S8_MS8_VC24_2CS = 0xa2, + X8Z24_X16V8S8_MS4_VC12_2CSZV = 0xa3, + X8Z24_X16V8S8_MS4_VC4_2CSZV = 0xa4, + X8Z24_X16V8S8_MS8_VC8_2CSZV = 0xa5, + X8Z24_X16V8S8_MS8_VC24_2CSZV = 0xa6, + ZF32_X16V8S8_MS4_VC12 = 0xa7, + ZF32_X16V8S8_MS4_VC4 = 0xa8, + ZF32_X16V8S8_MS8_VC8 = 0xa9, + ZF32_X16V8S8_MS8_VC24 = 0xaa, + ZF32_X16V8S8_MS4_VC12_1CS = 0xab, + ZF32_X16V8S8_MS4_VC4_1CS = 0xac, + ZF32_X16V8S8_MS8_VC8_1CS = 0xad, + ZF32_X16V8S8_MS8_VC24_1CS = 0xae, + ZF32_X16V8S8_MS4_VC12_1ZV = 0xb3, + ZF32_X16V8S8_MS4_VC4_1ZV = 0xb4, + ZF32_X16V8S8_MS8_VC8_1ZV = 0xb5, + ZF32_X16V8S8_MS8_VC24_1ZV = 0xb6, + ZF32_X16V8S8_MS4_VC12_1CZV = 0xb7, + ZF32_X16V8S8_MS4_VC4_1CZV = 0xb8, + ZF32_X16V8S8_MS8_VC8_1CZV = 0xb9, + ZF32_X16V8S8_MS8_VC24_1CZV = 0xba, + ZF32_X16V8S8_MS4_VC12_2CS = 0xbb, + ZF32_X16V8S8_MS4_VC4_2CS = 0xbc, + ZF32_X16V8S8_MS8_VC8_2CS = 0xbd, + ZF32_X16V8S8_MS8_VC24_2CS = 0xbe, + ZF32_X16V8S8_MS4_VC12_2CSZV = 0xbf, + ZF32_X16V8S8_MS4_VC4_2CSZV = 0xc0, + ZF32_X16V8S8_MS8_VC8_2CSZV = 0xc1, + ZF32_X16V8S8_MS8_VC24_2CSZV = 0xc2, + ZF32_X24S8 = 0xc3, + ZF32_X24S8_1CS = 0xc4, + ZF32_X24S8_MS2_1CS = 0xc5, + ZF32_X24S8_MS4_1CS = 0xc6, + ZF32_X24S8_MS8_1CS = 0xc7, + ZF32_X24S8_MS16_1CS = 0xc8, + ZF32_X24S8_2CSZV = 0xce, + ZF32_X24S8_MS2_2CSZV = 0xcf, + ZF32_X24S8_MS4_2CSZV = 0xd0, + ZF32_X24S8_MS8_2CSZV = 0xd1, + ZF32_X24S8_MS16_2CSZV = 0xd2, + ZF32_X24S8_2CS = 0xd3, + ZF32_X24S8_MS2_2CS = 0xd4, + ZF32_X24S8_MS4_2CS = 0xd5, + ZF32_X24S8_MS8_2CS = 0xd6, + ZF32_X24S8_MS16_2CS = 0xd7, + S8 = 0x2a, + S8_2S = 0x2b, + GENERIC_16BX2 = 0xfe, + C32_2C = 0xd8, + C32_2CBR = 0xd9, + C32_2CBA = 0xda, + C32_2CRA = 0xdb, + C32_2BRA = 0xdc, + C32_MS2_2C = 0xdd, + C32_MS2_2CBR = 0xde, + C32_MS2_4CBRA = 0xcc, + C32_MS4_2C = 0xdf, + C32_MS4_2CBR = 0xe0, + C32_MS4_2CBA = 0xe1, + C32_MS4_2CRA = 0xe2, + C32_MS4_2BRA = 0xe3, + C32_MS4_4CBRA = 0x2c, + C32_MS8_MS16_2C = 0xe4, + C32_MS8_MS16_2CRA = 0xe5, + C64_2C = 0xe6, + C64_2CBR = 0xe7, + C64_2CBA = 0xe8, + C64_2CRA = 0xe9, + C64_2BRA = 0xea, + C64_MS2_2C = 0xeb, + C64_MS2_2CBR = 0xec, + C64_MS2_4CBRA = 0xcd, + C64_MS4_2C = 0xed, + C64_MS4_2CBR = 0xee, + C64_MS4_2CBA = 0xef, + C64_MS4_2CRA = 0xf0, + C64_MS4_2BRA = 0xf1, + C64_MS4_4CBRA = 0x2d, + C64_MS8_MS16_2C = 0xf2, + C64_MS8_MS16_2CRA = 0xf3, + C128_2C = 0xf4, + C128_2CR = 0xf5, + C128_MS2_2C = 0xf6, + C128_MS2_2CR = 0xf7, + C128_MS4_2C = 0xf8, + C128_MS4_2CR = 0xf9, + C128_MS8_MS16_2C = 0xfa, + C128_MS8_MS16_2CR = 0xfb, + X8C24 = 0xfc, + PITCH_NO_SWIZZLE = 0xfd, + SMSKED_MESSAGE = 0xca, + SMHOST_MESSAGE = 0xcb, +}; + +constexpr bool IsPitchKind(PTEKind kind) { + return kind == PTEKind::PITCH || kind == PTEKind::PITCH_NO_SWIZZLE; +} + +} // namespace Tegra -- cgit v1.2.3 From 23b6569fc26cd1675737a0ef02d0a82021b4d998 Mon Sep 17 00:00:00 2001 From: FengChen Date: Fri, 14 Oct 2022 22:20:45 +0800 Subject: video_core: implement 1D copies based on VMM 'kind' --- src/video_core/engines/maxwell_dma.cpp | 127 +++++++++++++++++++-------------- src/video_core/engines/maxwell_dma.h | 2 - 2 files changed, 73 insertions(+), 56 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3909d36c1..4eb7a100d 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -56,66 +56,85 @@ void MaxwellDMA::Launch() { ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); - const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; - const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; - - if (!is_src_pitch && !is_dst_pitch) { - // If both the source and the destination are in block layout, assert. - UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); - return; - } + if (launch.multi_line_enable) { + const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; + const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; + + if (!is_src_pitch && !is_dst_pitch) { + // If both the source and the destination are in block layout, assert. + UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); + return; + } - if (is_src_pitch && is_dst_pitch) { - CopyPitchToPitch(); + if (is_src_pitch && is_dst_pitch) { + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr source_line = + regs.offset_in + static_cast(line) * regs.pitch_in; + const GPUVAddr dest_line = + regs.offset_out + static_cast(line) * regs.pitch_out; + memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); + } + } else { + if (!is_src_pitch && is_dst_pitch) { + CopyBlockLinearToPitch(); + } else { + CopyPitchToBlockLinear(); + } + } } else { - ASSERT(launch.multi_line_enable == 1); - - if (!is_src_pitch && is_dst_pitch) { - CopyBlockLinearToPitch(); + // TODO: allow multisized components. + auto& accelerate = rasterizer->AccessAccelerateDMA(); + const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; + if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { + ASSERT(regs.remap_const.component_size_minus_one == 3); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + std::vector tmp_buffer(regs.line_length_in, regs.remap_consta_value); + memory_manager.WriteBlockUnsafe(regs.offset_out, + reinterpret_cast(tmp_buffer.data()), + regs.line_length_in * sizeof(u32)); } else { - CopyPitchToBlockLinear(); + auto convert_linear_2_blocklinear_addr = [](u64 address) { + return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | + ((address & 0x180) >> 1) | ((address & 0x20) << 3); + }; + auto src_kind = memory_manager.GetPageKind(regs.offset_in); + auto dst_kind = memory_manager.GetPageKind(regs.offset_out); + const bool is_src_pitch = IsPitchKind(static_cast(src_kind)); + const bool is_dst_pitch = IsPitchKind(static_cast(dst_kind)); + if (!is_src_pitch && is_dst_pitch) { + std::vector tmp_buffer(regs.line_length_in); + std::vector dst_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + regs.line_length_in); + for (u32 offset = 0; offset < regs.line_length_in; ++offset) { + dst_buffer[offset] = + tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) - + regs.offset_in]; + } + memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in); + } else if (is_src_pitch && !is_dst_pitch) { + std::vector tmp_buffer(regs.line_length_in); + std::vector dst_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + regs.line_length_in); + for (u32 offset = 0; offset < regs.line_length_in; ++offset) { + dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) - + regs.offset_out] = tmp_buffer[offset]; + } + memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in); + } else { + if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { + std::vector tmp_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + regs.line_length_in); + memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), + regs.line_length_in); + } + } } } - ReleaseSemaphore(); -} -void MaxwellDMA::CopyPitchToPitch() { - // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions - // (line_length_in, line_count). - // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in. - const bool remap_enabled = regs.launch_dma.remap_enable != 0; - if (regs.launch_dma.multi_line_enable) { - UNIMPLEMENTED_IF(remap_enabled); - - // Perform a line-by-line copy. - // We're going to take a subrect of size (line_length_in, line_count) from the source - // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock - // does that for us. - for (u32 line = 0; line < regs.line_count; ++line) { - const GPUVAddr source_line = regs.offset_in + static_cast(line) * regs.pitch_in; - const GPUVAddr dest_line = regs.offset_out + static_cast(line) * regs.pitch_out; - memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); - } - return; - } - // TODO: allow multisized components. - auto& accelerate = rasterizer->AccessAccelerateDMA(); - const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; - const bool is_buffer_clear = remap_enabled && is_const_a_dst; - if (is_buffer_clear) { - ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector tmp_buffer(regs.line_length_in, regs.remap_consta_value); - memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast(tmp_buffer.data()), - regs.line_length_in * sizeof(u32)); - return; - } - UNIMPLEMENTED_IF(remap_enabled); - if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); - } + ReleaseSemaphore(); } void MaxwellDMA::CopyBlockLinearToPitch() { diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index bc48320ce..953e34adc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -219,8 +219,6 @@ private: /// registers. void Launch(); - void CopyPitchToPitch(); - void CopyBlockLinearToPitch(); void CopyPitchToBlockLinear(); -- cgit v1.2.3