From dd1aab5446cc043fe001f6cd118954a21edc9ad4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 15 Dec 2018 00:20:00 -0500 Subject: gl_rasterizer: Implement a more accurate fermi 2D copy. - This is a blit, use the blit registers. --- src/video_core/engines/fermi_2d.cpp | 62 +++++++++---------------------------- src/video_core/engines/fermi_2d.h | 29 ++++++++++++++--- 2 files changed, 39 insertions(+), 52 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 9f1533263..ec1a57226 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; switch (method_call.method) { - case FERMI2D_REG_INDEX(trigger): { + // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, + // so trigger on the second 32-bit write. + case FERMI2D_REG_INDEX(blit_src_y) + 1: { HandleSurfaceCopy(); break; } @@ -32,57 +34,23 @@ void Fermi2D::HandleSurfaceCopy() { LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", static_cast(regs.operation)); - const GPUVAddr source = regs.src.Address(); - const GPUVAddr dest = regs.dst.Address(); - - // TODO(Subv): Only same-format and same-size copies are allowed for now. - ASSERT(regs.src.format == regs.dst.format); - ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); - // TODO(Subv): Only raw copies are implemented. ASSERT(regs.operation == Regs::Operation::SrcCopy); - const auto source_cpu = memory_manager.GpuToCpuAddress(source); - const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); - ASSERT_MSG(source_cpu, "Invalid source GPU address"); - ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); - - u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); - u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); - - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { - // All copies here update the main memory, so mark all rasterizer states as invalid. - Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; + const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; + const u32 src_blit_x2{ + static_cast((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; + const u32 src_blit_y2{ + static_cast((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; - rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); - // We have to invalidate the destination region to evict any outdated surfaces from the - // cache. We do this before actually writing the new data because the destination address - // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_cpu, - dst_bytes_per_pixel * regs.dst.width * regs.dst.height); + const MathUtil::Rectangle src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; + const MathUtil::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, + regs.blit_dst_x + regs.blit_dst_width, + regs.blit_dst_y + regs.blit_dst_height}; - if (regs.src.linear == regs.dst.linear) { - // If the input layout and the output layout are the same, just perform a raw copy. - ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); - Memory::CopyBlock(*dest_cpu, *source_cpu, - src_bytes_per_pixel * regs.dst.width * regs.dst.height); - return; - } - u8* src_buffer = Memory::GetPointer(*source_cpu); - u8* dst_buffer = Memory::GetPointer(*dest_cpu); - if (!regs.src.linear && regs.dst.linear) { - // If the input is tiled and the output is linear, deswizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, - src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, - dst_buffer, true, regs.src.BlockHeight(), - regs.src.BlockDepth(), 0); - } else { - // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, - src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, - src_buffer, false, regs.dst.BlockHeight(), - regs.dst.BlockDepth(), 0); - } + if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { + UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 50009bf75..c69f74cc5 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -94,12 +94,22 @@ public: Operation operation; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x177); - // TODO(Subv): This is only a guess. - u32 trigger; + u32 blit_control; - INSERT_PADDING_WORDS(0x1A3); + INSERT_PADDING_WORDS(0x8); + + u32 blit_dst_x; + u32 blit_dst_y; + u32 blit_dst_width; + u32 blit_dst_height; + u64 blit_du_dx; + u64 blit_dv_dy; + u64 blit_src_x; + u64 blit_src_y; + + INSERT_PADDING_WORDS(0x21); }; std::array reg_array; }; @@ -122,7 +132,16 @@ private: ASSERT_REG_POSITION(dst, 0x80); ASSERT_REG_POSITION(src, 0x8C); ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(trigger, 0xB5); +ASSERT_REG_POSITION(blit_control, 0x223); +ASSERT_REG_POSITION(blit_dst_x, 0x22c); +ASSERT_REG_POSITION(blit_dst_y, 0x22d); +ASSERT_REG_POSITION(blit_dst_width, 0x22e); +ASSERT_REG_POSITION(blit_dst_height, 0x22f); +ASSERT_REG_POSITION(blit_du_dx, 0x230); +ASSERT_REG_POSITION(blit_dv_dy, 0x232); +ASSERT_REG_POSITION(blit_src_x, 0x234); +ASSERT_REG_POSITION(blit_src_y, 0x236); + #undef ASSERT_REG_POSITION } // namespace Tegra::Engines -- cgit v1.2.3