diff options
author | ameerj <aj662@drexel.edu> | 2020-10-27 04:07:36 +0100 |
---|---|---|
committer | ameerj <aj662@drexel.edu> | 2020-10-27 04:07:36 +0100 |
commit | eb67a45ca82bc01ac843c853fd3c17f2a90e0250 (patch) | |
tree | 11e78a1b728ef0a608fae43d966b613eb4e6d58a /src/core | |
parent | Merge pull request #4827 from lioncash/trunc (diff) | |
download | yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.gz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.bz2 yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.lz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.xz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.zst yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.zip |
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | 100 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 71 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | 234 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h | 168 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 90 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_vic.h | 88 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvmap.h | 1 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/nvdrv.cpp | 4 | ||||
-rw-r--r-- | src/core/settings.cpp | 2 | ||||
-rw-r--r-- | src/core/settings.h | 1 | ||||
-rw-r--r-- | src/core/telemetry_session.cpp | 2 |
12 files changed, 475 insertions, 288 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index db1c9fdef..e0f207f3e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -439,6 +439,8 @@ add_library(core STATIC hle/service/nvdrv/devices/nvhost_gpu.h hle/service/nvdrv/devices/nvhost_nvdec.cpp hle/service/nvdrv/devices/nvhost_nvdec.h + hle/service/nvdrv/devices/nvhost_nvdec_common.cpp + hle/service/nvdrv/devices/nvhost_nvdec_common.h hle/service/nvdrv/devices/nvhost_nvjpg.cpp hle/service/nvdrv/devices/nvhost_nvjpg.h hle/service/nvdrv/devices/nvhost_vic.cpp diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index fcb612864..b6df48360 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -2,15 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> - #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { -nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} +nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) + : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} nvhost_nvdec::~nvhost_nvdec() = default; u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -21,7 +23,7 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std:: switch (static_cast<IoctlCommand>(command.raw)) { case IoctlCommand::IocSetNVMAPfdCommand: - return SetNVMAPfd(input, output); + return SetNVMAPfd(input); case IoctlCommand::IocSubmit: return Submit(input, output); case IoctlCommand::IocGetSyncpoint: @@ -29,79 +31,29 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std:: case IoctlCommand::IocGetWaitbase: return GetWaitbase(input, output); case IoctlCommand::IocMapBuffer: - return MapBuffer(input, output); + case IoctlCommand::IocMapBuffer2: + case IoctlCommand::IocMapBuffer3: case IoctlCommand::IocMapBufferEx: - return MapBufferEx(input, output); - case IoctlCommand::IocUnmapBufferEx: - return UnmapBufferEx(input, output); + return MapBuffer(input, output); + case IoctlCommand::IocUnmapBufferEx: { + // This command is sent when the video stream has ended, flush all video contexts + // This is usually sent in the folowing order: vic, nvdec, vic. + // Inform the GPU to clear any remaining nvdec buffers when this is detected. + LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); + Tegra::ChCommandHeaderList cmdlist(1); + cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F}; + system.GPU().PushCommandBuffer(cmdlist); + [[fallthrough]]; // fallthrough to unmap buffers + }; + case IoctlCommand::IocUnmapBuffer: + case IoctlCommand::IocUnmapBuffer2: + case IoctlCommand::IocUnmapBuffer3: + return UnmapBuffer(input, output); + case IoctlCommand::IocSetSubmitTimeout: + return SetSubmitTimeout(input, output); } - UNIMPLEMENTED_MSG("Unimplemented ioctl"); - return 0; -} - -u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSetNvmapFD params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); - LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); - - nvmap_fd = params.nvmap_fd; - return 0; -} - -u32 nvhost_nvdec::Submit(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); - return 0; -} - -u32 nvhost_nvdec::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetSyncpoint params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); - return 0; -} - -u32 nvhost_nvdec::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetWaitbase params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); - return 0; -} - -u32 nvhost_nvdec::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - return 0; -} - -u32 nvhost_nvdec::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBufferEx)); - return 0; -} - -u32 nvhost_nvdec::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlUnmapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlUnmapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlUnmapBufferEx)); + UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 4332db118..102777ddd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -4,16 +4,14 @@ #pragma once -#include <vector> -#include "common/common_types.h" -#include "common/swap.h" -#include "core/hle/service/nvdrv/devices/nvdevice.h" +#include <memory> +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { -class nvhost_nvdec final : public nvdevice { +class nvhost_nvdec final : public nvhost_nvdec_common { public: - explicit nvhost_nvdec(Core::System& system); + explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); ~nvhost_nvdec() override; u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -27,62 +25,15 @@ private: IocGetSyncpoint = 0xC0080002, IocGetWaitbase = 0xC0080003, IocMapBuffer = 0xC01C0009, + IocMapBuffer2 = 0xC16C0009, + IocMapBuffer3 = 0xC15C0009, IocMapBufferEx = 0xC0A40009, - IocUnmapBufferEx = 0xC0A4000A, + IocUnmapBuffer = 0xC0A4000A, + IocUnmapBuffer2 = 0xC16C000A, + IocUnmapBufferEx = 0xC01C000A, + IocUnmapBuffer3 = 0xC15C000A, + IocSetSubmitTimeout = 0x40040007, }; - - struct IoctlSetNvmapFD { - u32_le nvmap_fd; - }; - static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size"); - - struct IoctlSubmit { - INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size"); - - struct IoctlGetSyncpoint { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size"); - - struct IoctlGetWaitbase { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size"); - - struct IoctlMapBuffer { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size"); - - struct IoctlMapBufferEx { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size"); - - struct IoctlUnmapBufferEx { - INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size"); - - u32_le nvmap_fd{}; - - u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); - u32 Submit(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); - u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp new file mode 100644 index 000000000..85792495f --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -0,0 +1,234 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstring> + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" +#include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/memory.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" + +namespace Service::Nvidia::Devices { + +namespace { +// Splice vectors will copy count amount of type T from the input vector into the dst vector. +template <typename T> +std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count, + std::size_t offset) { + std::memcpy(dst.data(), input.data() + offset, count * sizeof(T)); + offset += count * sizeof(T); + return offset; +} + +// Write vectors will write data to the output buffer +template <typename T> +std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { + std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T)); + offset += src.size() * sizeof(T); + return offset; +} +} // Anonymous namespace + +namespace NvErrCodes { +constexpr u32 Success{}; +constexpr u32 OutOfMemory{static_cast<u32>(-12)}; +constexpr u32 InvalidInput{static_cast<u32>(-22)}; +} // namespace NvErrCodes + +nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) + : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} +nvhost_nvdec_common::~nvhost_nvdec_common() = default; + +u32 nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); + LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + + nvmap_fd = params.nvmap_fd; + return 0; +} + +u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSubmit params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); + LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); + + // Instantiate param buffers + std::size_t offset = sizeof(IoctlSubmit); + std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count); + std::vector<Reloc> relocs(params.relocation_count); + std::vector<u32> reloc_shifts(params.relocation_count); + std::vector<SyncptIncr> syncpt_increments(params.syncpoint_count); + std::vector<SyncptIncr> wait_checks(params.syncpoint_count); + std::vector<Fence> fences(params.fence_count); + + // Splice input into their respective buffers + offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); + offset = SpliceVectors(input, relocs, params.relocation_count, offset); + offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset); + offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); + offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); + offset = SpliceVectors(input, fences, params.fence_count, offset); + + // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment + + auto& gpu = system.GPU(); + + for (const auto& cmd_buffer : command_buffers) { + auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); + ASSERT_OR_EXECUTE(object, return NvErrCodes::InvalidInput;); + const auto map = FindBufferMap(object->dma_map_addr); + if (!map) { + LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}", + object->addr, object->dma_map_addr); + return 0; + } + Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); + gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(), + cmdlist.size() * sizeof(u32)); + gpu.PushCommandBuffer(cmdlist); + } + + std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); + // Some games expect command_buffers to be written back + offset = sizeof(IoctlSubmit); + offset = WriteVectors(output, command_buffers, offset); + offset = WriteVectors(output, relocs, offset); + offset = WriteVectors(output, reloc_shifts, offset); + offset = WriteVectors(output, syncpt_increments, offset); + offset = WriteVectors(output, wait_checks, offset); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlGetSyncpoint params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); + LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); + + // We found that implementing this causes deadlocks with async gpu, along with degraded + // performance. TODO: RE the nvdec async implementation + params.value = 0; + std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlGetWaitbase params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); + params.value = 0; // Seems to be hard coded at 0 + std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); + return 0; +} + +u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlMapBuffer params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); + std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); + + SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); + + auto& gpu = system.GPU(); + + for (auto& cmf_buff : cmd_buffer_handles) { + auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; + if (!object) { + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); + std::memcpy(output.data(), ¶ms, output.size()); + return NvErrCodes::InvalidInput; + } + if (object->dma_map_addr == 0) { + // NVDEC and VIC memory is in the 32-bit address space + // MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space + const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size); + object->dma_map_addr = static_cast<u32>(low_addr); + // Ensure that the dma_map_addr is indeed in the lower 32-bit address space. + ASSERT(object->dma_map_addr == low_addr); + } + if (!object->dma_map_addr) { + LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); + } else { + cmf_buff.map_address = object->dma_map_addr; + AddBufferMap(object->dma_map_addr, object->size, object->addr, + object->status == nvmap::Object::Status::Allocated); + } + } + std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); + std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(), + cmd_buffer_handles.size() * sizeof(MapBufferEntry)); + + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlMapBuffer params{}; + std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); + std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); + SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); + + auto& gpu = system.GPU(); + + for (auto& cmf_buff : cmd_buffer_handles) { + const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; + if (!object) { + LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); + std::memcpy(output.data(), ¶ms, output.size()); + return NvErrCodes::InvalidInput; + } + if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) { + gpu.MemoryManager().Unmap(object->dma_map_addr, *size); + } else { + // This occurs quite frequently, however does not seem to impact functionality + LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr, + object->dma_map_addr); + } + object->dma_map_addr = 0; + } + std::memset(output.data(), 0, output.size()); + return NvErrCodes::Success; +} + +u32 nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output) { + std::memcpy(&submit_timeout, input.data(), input.size()); + LOG_WARNING(Service_NVDRV, "(STUBBED) called"); + return NvErrCodes::Success; +} + +std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap( + GPUVAddr gpu_addr) const { + const auto it = std::find_if( + buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) { + return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr()); + }); + + ASSERT(it != buffer_mappings.end()); + return it->second; +} + +void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, + bool is_allocated) { + buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated}); +} + +std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) { + const auto iter{buffer_mappings.find(gpu_addr)}; + if (iter == buffer_mappings.end()) { + return std::nullopt; + } + std::size_t size = 0; + if (iter->second.IsAllocated()) { + size = iter->second.Size(); + } + buffer_mappings.erase(iter); + return size; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h new file mode 100644 index 000000000..c249c5349 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -0,0 +1,168 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <vector> +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { +class nvmap; + +class nvhost_nvdec_common : public nvdevice { +public: + explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + ~nvhost_nvdec_common() override; + + virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, + std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl, + IoctlVersion version) = 0; + +protected: + class BufferMap final { + public: + constexpr BufferMap() = default; + + constexpr BufferMap(GPUVAddr start_addr, std::size_t size) + : start_addr{start_addr}, end_addr{start_addr + size} {} + + constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr, + bool is_allocated) + : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr}, + is_allocated{is_allocated} {} + + constexpr VAddr StartAddr() const { + return start_addr; + } + + constexpr VAddr EndAddr() const { + return end_addr; + } + + constexpr std::size_t Size() const { + return end_addr - start_addr; + } + + constexpr VAddr CpuAddr() const { + return cpu_addr; + } + + constexpr bool IsAllocated() const { + return is_allocated; + } + + private: + GPUVAddr start_addr{}; + GPUVAddr end_addr{}; + VAddr cpu_addr{}; + bool is_allocated{}; + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + struct IoctlSubmitCommandBuffer { + u32_le id; + u32_le offset; + u32_le count; + }; + static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC, + "IoctlSubmitCommandBuffer is incorrect size"); + struct IoctlSubmit { + u32_le cmd_buffer_count; + u32_le relocation_count; + u32_le syncpoint_count; + u32_le fence_count; + }; + static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size"); + + struct CommandBuffer { + s32 memory_id; + u32 offset; + s32 word_count; + }; + static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size"); + + struct Reloc { + s32 cmdbuffer_memory; + s32 cmdbuffer_offset; + s32 target; + s32 target_offset; + }; + static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size"); + + struct SyncptIncr { + u32 id; + u32 increments; + }; + static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size"); + + struct Fence { + u32 id; + u32 value; + }; + static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size"); + + struct IoctlGetSyncpoint { + // Input + u32_le param; + // Output + u32_le value; + }; + static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size"); + + struct IoctlGetWaitbase { + u32_le unknown; // seems to be ignored? Nintendo added this + u32_le value; + }; + static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size"); + + struct IoctlMapBuffer { + u32_le num_entries; + u32_le data_address; // Ignored by the driver. + u32_le attach_host_ch_das; + }; + static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size"); + + struct IocGetIdParams { + // Input + u32_le param; + // Output + u32_le value; + }; + static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); + + // Used for mapping and unmapping command buffers + struct MapBufferEntry { + u32_le map_handle; + u32_le map_address; + }; + static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size"); + + /// Ioctl command implementations + u32 SetNVMAPfd(const std::vector<u8>& input); + u32 Submit(const std::vector<u8>& input, std::vector<u8>& output); + u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output); + u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); + u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output); + u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output); + u32 SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output); + + std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; + void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); + std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); + + u32_le nvmap_fd{}; + u32_le submit_timeout{}; + std::shared_ptr<nvmap> nvmap_dev; + + // This is expected to be ordered, therefore we must use a map, not unordered_map + std::map<GPUVAddr, BufferMap> buffer_mappings; +}; +}; // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 9da19ad56..60db54d00 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -2,15 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> - #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "core/hle/service/nvdrv/devices/nvhost_vic.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_base.h" namespace Service::Nvidia::Devices { +nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) + : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} -nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {} nvhost_vic::~nvhost_vic() = default; u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, @@ -21,7 +23,7 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve switch (static_cast<IoctlCommand>(command.raw)) { case IoctlCommand::IocSetNVMAPfdCommand: - return SetNVMAPfd(input, output); + return SetNVMAPfd(input); case IoctlCommand::IocSubmit: return Submit(input, output); case IoctlCommand::IocGetSyncpoint: @@ -29,83 +31,19 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve case IoctlCommand::IocGetWaitbase: return GetWaitbase(input, output); case IoctlCommand::IocMapBuffer: - return MapBuffer(input, output); + case IoctlCommand::IocMapBuffer2: + case IoctlCommand::IocMapBuffer3: + case IoctlCommand::IocMapBuffer4: case IoctlCommand::IocMapBufferEx: return MapBuffer(input, output); + case IoctlCommand::IocUnmapBuffer: + case IoctlCommand::IocUnmapBuffer2: + case IoctlCommand::IocUnmapBuffer3: case IoctlCommand::IocUnmapBufferEx: - return UnmapBufferEx(input, output); + return UnmapBuffer(input, output); } - UNIMPLEMENTED_MSG("Unimplemented ioctl"); - return 0; -} - -u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSetNvmapFD params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSetNvmapFD)); - LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); - - nvmap_fd = params.nvmap_fd; - return 0; -} - -u32 nvhost_vic::Submit(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - - // Workaround for Luigi's Mansion 3, as nvhost_vic is not implemented for asynch GPU - params.command_buffer = {}; - - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); - return 0; -} - -u32 nvhost_vic::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetSyncpoint params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); - return 0; -} - -u32 nvhost_vic::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlGetWaitbase params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); - LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); - params.value = 0; // Seems to be hard coded at 0 - std::memcpy(output.data(), ¶ms, sizeof(IoctlGetWaitbase)); - return 0; -} - -u32 nvhost_vic::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - return 0; -} - -u32 nvhost_vic::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlMapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2, - params.address_1); - params.address_1 = 0; - params.address_2 = 0; - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBufferEx)); - return 0; -} - -u32 nvhost_vic::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { - IoctlUnmapBufferEx params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlUnmapBufferEx)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called"); - std::memcpy(output.data(), ¶ms, sizeof(IoctlUnmapBufferEx)); + UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index a7bb7bbd5..f975b190c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -4,19 +4,15 @@ #pragma once -#include <array> -#include <vector> -#include "common/common_types.h" -#include "common/swap.h" -#include "core/hle/service/nvdrv/devices/nvdevice.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" namespace Service::Nvidia::Devices { +class nvmap; -class nvhost_vic final : public nvdevice { +class nvhost_vic final : public nvhost_nvdec_common { public: - explicit nvhost_vic(Core::System& system); - ~nvhost_vic() override; - + explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); + ~nvhost_vic(); u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl, IoctlVersion version) override; @@ -28,74 +24,14 @@ private: IocGetSyncpoint = 0xC0080002, IocGetWaitbase = 0xC0080003, IocMapBuffer = 0xC01C0009, + IocMapBuffer2 = 0xC0340009, + IocMapBuffer3 = 0xC0140009, + IocMapBuffer4 = 0xC00C0009, IocMapBufferEx = 0xC03C0009, - IocUnmapBufferEx = 0xC03C000A, - }; - - struct IoctlSetNvmapFD { - u32_le nvmap_fd; - }; - static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); - - struct IoctlSubmitCommandBuffer { - u32 id; - u32 offset; - u32 count; - }; - static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC, - "IoctlSubmitCommandBuffer is incorrect size"); - - struct IoctlSubmit { - u32 command_buffer_count; - u32 relocations_count; - u32 syncpt_count; - u32 wait_count; - std::array<IoctlSubmitCommandBuffer, 4> command_buffer; - }; - static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size"); - - struct IoctlGetSyncpoint { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size"); - - struct IoctlGetWaitbase { - u32 unknown; // seems to be ignored? Nintendo added this - u32 value; - }; - static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size"); - - struct IoctlMapBuffer { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size"); - - struct IoctlMapBufferEx { - u32 unknown; - u32 address_1; - u32 address_2; - INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure + IocUnmapBuffer = 0xC03C000A, + IocUnmapBuffer2 = 0xC034000A, + IocUnmapBuffer3 = 0xC00C000A, + IocUnmapBufferEx = 0xC01C000A, }; - static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size"); - - struct IoctlUnmapBufferEx { - INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure - }; - static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size"); - - u32_le nvmap_fd{}; - - u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); - u32 Submit(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output); - u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output); - u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); - u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output); }; - } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 84624be00..04b9ef540 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -37,6 +37,7 @@ public: VAddr addr; Status status; u32 refcount; + u32 dma_map_addr; }; std::shared_ptr<Object> GetObject(u32 handle) const { diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 197c77db0..803c1a984 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -51,9 +51,9 @@ Module::Module(Core::System& system) { devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); - devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system); + devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); - devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system); + devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); } Module::~Module() = default; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 28d3f9099..e14c02045 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -63,6 +63,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); + log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); @@ -119,6 +120,7 @@ void RestoreGlobalState() { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); + values.use_nvdec_emulation.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); diff --git a/src/core/settings.h b/src/core/settings.h index 9834f44bb..604805615 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -111,6 +111,7 @@ struct Values { Setting<bool> use_disk_shader_cache; Setting<GPUAccuracy> gpu_accuracy; Setting<bool> use_asynchronous_gpu_emulation; + Setting<bool> use_nvdec_emulation; Setting<bool> use_vsync; Setting<bool> use_assembly_shaders; Setting<bool> use_asynchronous_shaders; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index da09c0dbc..ebc19e18a 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -206,6 +206,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); + AddField(field_type, "Renderer_UseNvdecEmulation", + Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); |