diff options
author | bunnei <bunneidev@gmail.com> | 2018-04-25 05:22:24 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-25 05:22:24 +0200 |
commit | ea3151f475e170eaaec3ded306a0fe5c1e5944db (patch) | |
tree | 6f7e127c4f58de6071d9a7dbd2af464dbbd14b9b | |
parent | Merge pull request #393 from lioncash/loader (diff) | |
parent | renderer_opengl: Use correct byte order for framebuffer pixel format ABGR8. (diff) | |
download | yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.gz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.bz2 yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.lz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.xz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.zst yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.zip |
Diffstat (limited to '')
-rw-r--r-- | src/core/memory.cpp | 64 | ||||
-rw-r--r-- | src/core/memory.h | 3 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 8 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 22 | ||||
-rw-r--r-- | src/video_core/memory_manager.cpp | 94 | ||||
-rw-r--r-- | src/video_core/memory_manager.h | 30 | ||||
-rw-r--r-- | src/video_core/rasterizer_interface.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 147 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 84 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 6 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 1 | ||||
-rw-r--r-- | src/yuzu/debugger/graphics/graphics_surface.cpp | 8 |
14 files changed, 334 insertions, 175 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 291bf066f..ff0420c56 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -325,15 +325,29 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } -void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { - if (start == 0) { +void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) { + if (gpu_addr == 0) { return; } - u64 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1; - VAddr vaddr = start; + // Iterate over a contiguous CPU address space, which corresponds to the specified GPU address + // space, marking the region as un/cached. The region is marked un/cached at a granularity of + // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This + // assumes the specified GPU address region is contiguous as well. + + u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1; + for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) { + boost::optional<VAddr> maybe_vaddr = + Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(gpu_addr); + // The GPU <-> CPU virtual memory mapping is not 1:1 + if (!maybe_vaddr) { + LOG_ERROR(HW_Memory, + "Trying to flush a cached region to an invalid physical address %08X", + gpu_addr); + continue; + } + VAddr vaddr = *maybe_vaddr; - for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; if (cached) { @@ -347,6 +361,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { page_type = PageType::RasterizerCachedMemory; current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; break; + case PageType::RasterizerCachedMemory: + // There can be more than one GPU region mapped per CPU region, so it's common that + // this area is already marked as cached. + break; default: UNREACHABLE(); } @@ -357,6 +375,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { // It is not necessary for a process to have this region mapped into its address // space, for example, a system module need not have a VRAM mapping. break; + case PageType::Memory: + // There can be more than one GPU region mapped per CPU region, so it's common that + // this area is already unmarked as cached. + break; case PageType::RasterizerCachedMemory: { u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); if (pointer == nullptr) { @@ -394,19 +416,29 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { VAddr overlap_start = std::max(start, region_start); VAddr overlap_end = std::min(end, region_end); + + std::vector<Tegra::GPUVAddr> gpu_addresses = + Core::System::GetInstance().GPU().memory_manager->CpuToGpuAddress(overlap_start); + + if (gpu_addresses.empty()) { + return; + } + u64 overlap_size = overlap_end - overlap_start; - auto* rasterizer = VideoCore::g_renderer->Rasterizer(); - switch (mode) { - case FlushMode::Flush: - rasterizer->FlushRegion(overlap_start, overlap_size); - break; - case FlushMode::Invalidate: - rasterizer->InvalidateRegion(overlap_start, overlap_size); - break; - case FlushMode::FlushAndInvalidate: - rasterizer->FlushAndInvalidateRegion(overlap_start, overlap_size); - break; + for (const auto& gpu_address : gpu_addresses) { + auto* rasterizer = VideoCore::g_renderer->Rasterizer(); + switch (mode) { + case FlushMode::Flush: + rasterizer->FlushRegion(gpu_address, overlap_size); + break; + case FlushMode::Invalidate: + rasterizer->InvalidateRegion(gpu_address, overlap_size); + break; + case FlushMode::FlushAndInvalidate: + rasterizer->FlushAndInvalidateRegion(gpu_address, overlap_size); + break; + } } }; diff --git a/src/core/memory.h b/src/core/memory.h index e9b8ca873..3f56a2c6a 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -14,6 +14,7 @@ #include <boost/optional.hpp> #include "common/common_types.h" #include "core/memory_hook.h" +#include "video_core/memory_manager.h" namespace Kernel { class Process; @@ -258,7 +259,7 @@ enum class FlushMode { /** * Mark each page touching the region as cached. */ -void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached); +void RasterizerMarkRegionCached(Tegra::GPUVAddr start, u64 size, bool cached); /** * Flushes and invalidates any externally cached rasterizer resources touching the given virtual diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d4cdb4ab2..2c04daba3 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -90,11 +90,9 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) } void GPU::ProcessCommandList(GPUVAddr address, u32 size) { - // TODO(Subv): PhysicalToVirtualAddress is a misnomer, it converts a GPU VAddr into an - // application VAddr. - const VAddr head_address = memory_manager->PhysicalToVirtualAddress(address); - VAddr current_addr = head_address; - while (current_addr < head_address + size * sizeof(CommandHeader)) { + const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); + VAddr current_addr = *head_address; + while (current_addr < *head_address + size * sizeof(CommandHeader)) { const CommandHeader header = {Memory::Read32(current_addr)}; current_addr += sizeof(u32); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 35773a695..4e9aed380 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -145,7 +145,7 @@ void Maxwell3D::ProcessQueryGet() { GPUVAddr sequence_address = regs.query.QueryAddress(); // Since the sequence address is given as a GPU VAddr, we have to convert it to an application // VAddr before writing. - VAddr address = memory_manager.PhysicalToVirtualAddress(sequence_address); + boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address); // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, @@ -153,7 +153,7 @@ void Maxwell3D::ProcessQueryGet() { ASSERT_MSG(regs.query.query_get.short_query, "Writing the entire query result structure is unimplemented"); - u32 value = Memory::Read32(address); + u32 value = Memory::Read32(*address); u32 result = 0; // TODO(Subv): Support the other query variables @@ -173,7 +173,7 @@ void Maxwell3D::ProcessQueryGet() { case Regs::QueryMode::Write2: { // Write the current query sequence to the sequence address. u32 sequence = regs.query.query_sequence; - Memory::Write32(address, sequence); + Memory::Write32(*address, sequence); // TODO(Subv): Write the proper query response structure to the address when not using short // mode. @@ -225,10 +225,10 @@ void Maxwell3D::ProcessCBData(u32 value) { // Don't allow writing past the end of the buffer. ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); - VAddr address = - memory_manager.PhysicalToVirtualAddress(buffer_address + regs.const_buffer.cb_pos); + boost::optional<VAddr> address = + memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); - Memory::Write32(address, value); + Memory::Write32(*address, value); // Increment the current buffer position. regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; @@ -238,10 +238,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { GPUVAddr tic_base_address = regs.tic.TICAddress(); GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); - VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); + boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); Texture::TICEntry tic_entry; - Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); + Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || tic_entry.header_version == Texture::TICHeaderVersion::Pitch, @@ -268,10 +268,10 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); - VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu); + boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); Texture::TSCEntry tsc_entry; - Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); + Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } @@ -293,7 +293,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { Texture::TextureHandle tex_handle{ - Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))}; + Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))}; Texture::FullTextureInfo tex_info{}; // TODO(Subv): Use the shader to determine which textures are actually accessed. diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 2e1edee03..25984439d 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -8,90 +8,112 @@ namespace Tegra { -PAddr MemoryManager::AllocateSpace(u64 size, u64 align) { - boost::optional<PAddr> paddr = FindFreeBlock(size, align); - ASSERT(paddr); +GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { + boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align); + ASSERT(gpu_addr); for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - ASSERT(PageSlot(*paddr + offset) == static_cast<u64>(PageStatus::Unmapped)); - PageSlot(*paddr + offset) = static_cast<u64>(PageStatus::Allocated); + ASSERT(PageSlot(*gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped)); + PageSlot(*gpu_addr + offset) = static_cast<u64>(PageStatus::Allocated); } - return *paddr; + return *gpu_addr; } -PAddr MemoryManager::AllocateSpace(PAddr paddr, u64 size, u64 align) { +GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - ASSERT(PageSlot(paddr + offset) == static_cast<u64>(PageStatus::Unmapped)); - PageSlot(paddr + offset) = static_cast<u64>(PageStatus::Allocated); + ASSERT(PageSlot(gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped)); + PageSlot(gpu_addr + offset) = static_cast<u64>(PageStatus::Allocated); } - return paddr; + return gpu_addr; } -PAddr MemoryManager::MapBufferEx(VAddr vaddr, u64 size) { - boost::optional<PAddr> paddr = FindFreeBlock(size, PAGE_SIZE); - ASSERT(paddr); +GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { + boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE); + ASSERT(gpu_addr); for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - ASSERT(PageSlot(*paddr + offset) == static_cast<u64>(PageStatus::Unmapped)); - PageSlot(*paddr + offset) = vaddr + offset; + ASSERT(PageSlot(*gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped)); + PageSlot(*gpu_addr + offset) = cpu_addr + offset; } - return *paddr; + MappedRegion region{cpu_addr, *gpu_addr, size}; + mapped_regions.push_back(region); + + return *gpu_addr; } -PAddr MemoryManager::MapBufferEx(VAddr vaddr, PAddr paddr, u64 size) { - ASSERT((paddr & PAGE_MASK) == 0); +GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { + ASSERT((gpu_addr & PAGE_MASK) == 0); for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { - ASSERT(PageSlot(paddr + offset) == static_cast<u64>(PageStatus::Allocated)); - PageSlot(paddr + offset) = vaddr + offset; + ASSERT(PageSlot(gpu_addr + offset) == static_cast<u64>(PageStatus::Allocated)); + PageSlot(gpu_addr + offset) = cpu_addr + offset; } - return paddr; + MappedRegion region{cpu_addr, gpu_addr, size}; + mapped_regions.push_back(region); + + return gpu_addr; } -boost::optional<PAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { - PAddr paddr = 0; +boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { + GPUVAddr gpu_addr = 0; u64 free_space = 0; align = (align + PAGE_MASK) & ~PAGE_MASK; - while (paddr + free_space < MAX_ADDRESS) { - if (!IsPageMapped(paddr + free_space)) { + while (gpu_addr + free_space < MAX_ADDRESS) { + if (!IsPageMapped(gpu_addr + free_space)) { free_space += PAGE_SIZE; if (free_space >= size) { - return paddr; + return gpu_addr; } } else { - paddr += free_space + PAGE_SIZE; + gpu_addr += free_space + PAGE_SIZE; free_space = 0; - paddr = Common::AlignUp(paddr, align); + gpu_addr = Common::AlignUp(gpu_addr, align); } } return {}; } -VAddr MemoryManager::PhysicalToVirtualAddress(PAddr paddr) { - VAddr base_addr = PageSlot(paddr); +boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) { + VAddr base_addr = PageSlot(gpu_addr); ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped)); - return base_addr + (paddr & PAGE_MASK); + + if (base_addr == static_cast<u64>(PageStatus::Allocated)) { + return {}; + } + + return base_addr + (gpu_addr & PAGE_MASK); +} + +std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { + std::vector<GPUVAddr> results; + for (const auto& region : mapped_regions) { + if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { + u64 offset = cpu_addr - region.cpu_addr; + results.push_back(region.gpu_addr + offset); + } + } + return results; } -bool MemoryManager::IsPageMapped(PAddr paddr) { - return PageSlot(paddr) != static_cast<u64>(PageStatus::Unmapped); +bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) { + return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped); } -VAddr& MemoryManager::PageSlot(PAddr paddr) { - auto& block = page_table[(paddr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]; +VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { + auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]; if (!block) { block = std::make_unique<PageBlock>(); for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) { (*block)[index] = static_cast<u64>(PageStatus::Unmapped); } } - return (*block)[(paddr >> PAGE_BITS) & PAGE_BLOCK_MASK]; + return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK]; } } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b73e283f8..08140c83a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -6,8 +6,11 @@ #include <array> #include <memory> +#include <vector> + +#include <boost/optional.hpp> + #include "common/common_types.h" -#include "core/memory.h" namespace Tegra { @@ -18,20 +21,21 @@ class MemoryManager final { public: MemoryManager() = default; - PAddr AllocateSpace(u64 size, u64 align); - PAddr AllocateSpace(PAddr paddr, u64 size, u64 align); - PAddr MapBufferEx(VAddr vaddr, u64 size); - PAddr MapBufferEx(VAddr vaddr, PAddr paddr, u64 size); - VAddr PhysicalToVirtualAddress(PAddr paddr); + GPUVAddr AllocateSpace(u64 size, u64 align); + GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); + GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); + GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size); + boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); + std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const; static constexpr u64 PAGE_BITS = 16; static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; private: - boost::optional<PAddr> FindFreeBlock(u64 size, u64 align = 1); - bool IsPageMapped(PAddr paddr); - VAddr& PageSlot(PAddr paddr); + boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1); + bool IsPageMapped(GPUVAddr gpu_addr); + VAddr& PageSlot(GPUVAddr gpu_addr); enum class PageStatus : u64 { Unmapped = 0xFFFFFFFFFFFFFFFFULL, @@ -48,6 +52,14 @@ private: using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; + + struct MappedRegion { + VAddr cpu_addr; + GPUVAddr gpu_addr; + u64 size; + }; + + std::vector<MappedRegion> mapped_regions; }; } // namespace Tegra diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 36629dd11..f0e48a802 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" struct ScreenInfo; @@ -25,14 +26,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(VAddr addr, u64 size) = 0; + virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 virtual bool AccelerateDisplayTransfer(const void* config) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 82001e7b4..b457b1fbe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -150,9 +150,8 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, u64 size = end - start + 1; // Copy vertex array data - const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; - res_cache.FlushRegion(data_addr, size, nullptr); - Memory::ReadBlock(data_addr, array_ptr, size); + res_cache.FlushRegion(start, size, nullptr); + Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); // Bind the vertex array to the buffer at the current offset. glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); @@ -233,8 +232,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { // Fetch program code from memory GLShader::ProgramCode program_code; const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; - const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; - Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; + Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); GLShader::ShaderSetup setup{std::move(program_code)}; GLShader::ShaderEntries shader_resources; @@ -394,9 +393,9 @@ void RasterizerOpenGL::DrawArrays() { GLintptr index_buffer_offset = 0; if (is_indexed) { const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; - const VAddr index_data_addr{ - memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; - Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); + const boost::optional<VAddr> index_data_addr{ + memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; + Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); index_buffer_offset = buffer_offset; offseted_buffer += index_buffer_size; @@ -519,17 +518,17 @@ void RasterizerOpenGL::FlushAll() { res_cache.FlushAll(); } -void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); @@ -560,7 +559,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu MICROPROFILE_SCOPE(OpenGL_CacheManagement); SurfaceParams src_params; - src_params.addr = framebuffer_addr; + src_params.cpu_addr = framebuffer_addr; + src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; @@ -659,9 +659,9 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr buffer_draw_state.enabled = true; buffer_draw_state.bindpoint = current_bindpoint + bindpoint; - VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address); + boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); - Memory::ReadBlock(addr, data.data(), data.size()); + Memory::ReadBlock(*addr, data.data(), data.size()); glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 544714b95..9709e595e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -11,6 +11,7 @@ #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -29,9 +30,9 @@ public: void DrawArrays() override; void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; + void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; + void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7410471cc..501d15e98 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -41,18 +41,15 @@ struct FormatTuple { GLenum format; GLenum type; bool compressed; - // How many pixels in the original texture are equivalent to one pixel in the compressed - // texture. - u32 compression_factor; }; static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8 - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5 - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false, 1}, // A2B10G10R10 - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5 + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10 + {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -83,26 +80,30 @@ static u16 GetResolutionScaleFactor() { } template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, - VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, + Tegra::GPUVAddr start, Tegra::GPUVAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { auto data = Tegra::Texture::UnswizzleTexture( - base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, - block_height); + *gpu.memory_manager->GpuToCpuAddress(base), + SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); std::memcpy(gl_buffer, data.data(), data.size()); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check // the configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(base), gl_buffer, morton_to_gl); + NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128( + stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, + morton_to_gl); } } -static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, + Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, @@ -110,7 +111,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, + Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { MortonCopy<false, PixelFormat::ABGR8>, @@ -219,9 +221,9 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; const u32 tiled_size = is_tiled ? 8 : 1; const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - VAddr aligned_start = + Tegra::GPUVAddr aligned_start = addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - VAddr aligned_end = + Tegra::GPUVAddr aligned_end = addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); if (aligned_end - aligned_start > stride_tiled_bytes) { @@ -342,6 +344,13 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); } +VAddr SurfaceParams::GetCpuAddr() const { + // When this function is used, only cpu_addr or (GPU) addr should be set, not both + ASSERT(!(cpu_addr && addr)); + const auto& gpu = Core::System::GetInstance().GPU(); + return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); +} + bool CachedSurface::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const { if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && @@ -349,9 +358,9 @@ bool CachedSurface::CanFill(const SurfaceParams& dest_surface, boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range dest_surface.FromInterval(fill_interval).GetInterval() == fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * 8 != dest_surface.GetFormatBpp()) { + if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); for (u32 i = 0; i < dest_bytes_per_pixel; ++i) @@ -456,15 +465,15 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { +void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { ASSERT(type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(addr); + u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); if (texture_src_data == nullptr) return; if (gl_buffer == nullptr) { - gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); gl_buffer.reset(new u8[gl_buffer_size]); } @@ -479,14 +488,15 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, bytes_per_pixel * width * height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)]( - stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, + GetActualHeight(), &gl_buffer[0], addr, + load_start, load_end); } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { - u8* const dst_buffer = Memory::GetPointer(addr); +void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { + u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); if (dst_buffer == nullptr) return; @@ -536,7 +546,8 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + ASSERT(gl_buffer_size == + GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); // Load data from memory to the surface GLint x0 = static_cast<GLint>(rect.left); @@ -571,11 +582,9 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint glActiveTexture(GL_TEXTURE0); if (tuple.compressed) { glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, - static_cast<GLsizei>(rect.GetWidth()), - static_cast<GLsizei>(rect.GetHeight()), 0, - rect.GetWidth() * rect.GetHeight() * - GetGLBytesPerPixel(pixel_format) / tuple.compression_factor, - &gl_buffer[buffer_offset]); + static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), + static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, + size, &gl_buffer[buffer_offset]); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, @@ -945,6 +954,33 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc return surface; } +boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( + VAddr cpu_addr) const { + // Tries to find the GPU address of a framebuffer based on the CPU address. This is because + // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU + // addresses. We iterate through all cached framebuffers, and compare their starting CPU address + // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps + // surfaces. + + std::vector<Tegra::GPUVAddr> gpu_addresses; + for (const auto& pair : surface_cache) { + for (const auto& surface : pair.second) { + const VAddr surface_cpu_addr = surface->GetCpuAddr(); + if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { + ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); + gpu_addresses.push_back(surface->addr); + } + } + } + + if (gpu_addresses.empty()) { + return {}; + } + + ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); + return gpu_addresses[0]; +} + SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create) { @@ -1028,11 +1064,11 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu auto& gpu = Core::System::GetInstance().GPU(); SurfaceParams params; - params.addr = gpu.memory_manager->PhysicalToVirtualAddress(config.tic.Address()); - params.width = config.tic.Width(); - params.height = config.tic.Height(); + params.addr = config.tic.Address(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + params.width = config.tic.Width() / params.GetCompresssionFactor(); + params.height = config.tic.Height() / params.GetCompresssionFactor(); // TODO(Subv): Different types per component are not supported. ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && @@ -1045,7 +1081,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.block_height = config.tic.BlockHeight(); } else { // Use the texture-provided stride value if the texture isn't tiled. - params.stride = params.PixelsInBytes(config.tic.Pitch()); + params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); } params.UpdateParams(); @@ -1073,11 +1109,10 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; const auto& config = regs.rt[0]; // TODO(bunnei): This is hard corded to use just the first render buffer - LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); + NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); // update resolution_scale_factor and reset cache if changed // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We @@ -1106,7 +1141,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; SurfaceParams depth_params = color_params; - color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); + color_params.addr = config.Address(); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); color_params.UpdateParams(); @@ -1122,8 +1157,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( // Make sure that framebuffers don't overlap if both color and depth are being used if (using_color_fb && using_depth_fb && boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); + NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); using_depth_fb = false; } @@ -1222,7 +1257,8 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, } } -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, + u64 size) { if (size == 0) return; @@ -1261,7 +1297,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, } } -void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { if (size == 0) return; @@ -1297,7 +1333,8 @@ void RasterizerCacheOpenGL::FlushAll() { FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } -void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, + const Surface& region_owner) { if (size == 0) return; @@ -1390,10 +1427,10 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); } -void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - const u64 num_pages = - ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; - const u64 page_start = addr >> Memory::PAGE_BITS; +void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { + const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - + (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; + const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; const u64 page_end = page_start + num_pages; // Interval maps will erase segments if count reaches 0, so if delta is negative we have to @@ -1406,8 +1443,10 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int del const auto interval = pair.first & pages_interval; const int count = pair.second; - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) + << Tegra::MemoryManager::PAGE_BITS; + const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) + << Tegra::MemoryManager::PAGE_BITS; const u64 interval_size = interval_end_addr - interval_start_addr; if (delta > 0 && count == delta) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index bf0fabb29..55f1bdee8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -17,12 +17,14 @@ #ifdef __GNUC__ #pragma GCC diagnostic pop #endif +#include <boost/optional.hpp> #include <glad/glad.h> #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" @@ -30,9 +32,9 @@ struct CachedSurface; using Surface = std::shared_ptr<CachedSurface>; using SurfaceSet = std::set<Surface>; -using SurfaceRegions = boost::icl::interval_set<VAddr>; -using SurfaceMap = boost::icl::interval_map<VAddr, Surface>; -using SurfaceCache = boost::icl::interval_map<VAddr, SurfaceSet>; +using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; +using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; +using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; using SurfaceInterval = SurfaceCache::interval_type; static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && @@ -82,23 +84,49 @@ struct SurfaceParams { Invalid = 4, }; - static constexpr unsigned int GetFormatBpp(PixelFormat format) { + /** + * Gets the compression factor for the specified PixelFormat. This applies to just the + * "compressed width" and "compressed height", not the overall compression factor of a + * compressed image. This is used for maintaining proper surface sizes for compressed texture + * formats. + */ + static constexpr u32 GetCompresssionFactor(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = { + constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ + 1, // ABGR8 + 1, // B5G6R5 + 1, // A2B10G10R10 + 4, // DXT1 + 4, // DXT23 + 4, // DXT45 + }}; + + ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); + return compression_factor_table[static_cast<size_t>(format)]; + } + u32 GetCompresssionFactor() const { + return GetCompresssionFactor(pixel_format); + } + + static constexpr u32 GetFormatBpp(PixelFormat format) { + if (format == PixelFormat::Invalid) + return 0; + + constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 32, // ABGR8 16, // B5G6R5 32, // A2B10G10R10 64, // DXT1 128, // DXT23 128, // DXT45 - }; + }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); return bpp_table[static_cast<size_t>(format)]; } - unsigned int GetFormatBpp() const { + u32 GetFormatBpp() const { return GetFormatBpp(pixel_format); } @@ -253,6 +281,24 @@ struct SurfaceParams { // Returns the region of the biggest valid rectange within interval SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + /** + * Gets the actual width (in pixels) of the surface. This is provided because `width` is used + * for tracking the surface region in memory, which may be compressed for certain formats. In + * this scenario, `width` is actually the compressed width. + */ + u32 GetActualWidth() const { + return width * GetCompresssionFactor(); + } + + /** + * Gets the actual height (in pixels) of the surface. This is provided because `height` is used + * for tracking the surface region in memory, which may be compressed for certain formats. In + * this scenario, `height` is actually the compressed height. + */ + u32 GetActualHeight() const { + return height * GetCompresssionFactor(); + } + u32 GetScaledWidth() const { return width * res_scale; } @@ -277,6 +323,8 @@ struct SurfaceParams { return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; } + VAddr GetCpuAddr() const; + bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanExpand(const SurfaceParams& expanded_surface) const; @@ -285,8 +333,9 @@ struct SurfaceParams { MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; - VAddr addr = 0; - VAddr end = 0; + Tegra::GPUVAddr addr = 0; + Tegra::GPUVAddr end = 0; + boost::optional<VAddr> cpu_addr; u64 size = 0; u32 width = 0; @@ -325,15 +374,15 @@ struct CachedSurface : SurfaceParams { if (format == PixelFormat::Invalid) return 0; - return SurfaceParams::GetFormatBpp(format) / 8; + return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; } std::unique_ptr<u8[]> gl_buffer; size_t gl_buffer_size = 0; // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(VAddr load_start, VAddr load_end); - void FlushGLBuffer(VAddr flush_start, VAddr flush_end); + void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); + void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); // Upload/Download data in gl_buffer in/to this surface's texture void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, @@ -362,6 +411,9 @@ public: Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, bool load_if_create); + /// Tries to find a framebuffer GPU address based on the provided CPU address + boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from /// Switch memory to OpenGL and caches it (if not already cached) SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, @@ -381,10 +433,10 @@ public: SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr); + void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); /// Mark region as being invalidated by region_owner (nullptr if Switch memory) - void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner); + void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); /// Flush all cached resources tracked by this cache manager void FlushAll(); @@ -393,7 +445,7 @@ private: void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, VAddr addr, u64 size); + void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); /// Create a new surface Surface CreateSurface(const SurfaceParams& params); @@ -405,7 +457,7 @@ private: void UnregisterSurface(const Surface& surface); /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(VAddr addr, u64 size, int delta); + void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); SurfaceCache surface_cache; PageMap cached_pages; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index baff2c7af..5ca9821b7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -152,7 +152,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); - Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes); + Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, + Memory::FlushMode::Flush); VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, Memory::GetPointer(framebuffer_addr), @@ -269,10 +270,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, GLint internal_format; switch (framebuffer.pixel_format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - // Use RGBA8 and swap in the fragment shader internal_format = GL_RGBA; texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; + texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; gl_framebuffer_data.resize(texture.width * texture.height * 4); break; default: diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index e0509f0ce..9c3ae875c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -4,6 +4,7 @@ #include <cstring> #include "common/assert.h" +#include "core/memory.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 5fada74be..1fbca8ad0 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp @@ -378,10 +378,10 @@ void GraphicsSurfaceWidget::OnUpdate() { // TODO: Implement a good way to visualize alpha components! QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); - VAddr address = gpu.memory_manager->PhysicalToVirtualAddress(surface_address); + boost::optional<VAddr> address = gpu.memory_manager->GpuToCpuAddress(surface_address); auto unswizzled_data = - Tegra::Texture::UnswizzleTexture(address, surface_format, surface_width, surface_height); + Tegra::Texture::UnswizzleTexture(*address, surface_format, surface_width, surface_height); auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, surface_width, surface_height); @@ -437,9 +437,9 @@ void GraphicsSurfaceWidget::SaveSurface() { pixmap->save(&file, "PNG"); } else if (selectedFilter == bin_filter) { auto& gpu = Core::System::GetInstance().GPU(); - VAddr address = gpu.memory_manager->PhysicalToVirtualAddress(surface_address); + boost::optional<VAddr> address = gpu.memory_manager->GpuToCpuAddress(surface_address); - const u8* buffer = Memory::GetPointer(address); + const u8* buffer = Memory::GetPointer(*address); ASSERT_MSG(buffer != nullptr, "Memory not accessible"); QFile file(filename); |