diff options
Diffstat (limited to 'src')
102 files changed, 2825 insertions, 883 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1a3647a67..d342cafe0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -15,14 +15,14 @@ add_library(core STATIC constants.h core.cpp core.h - core_cpu.cpp - core_cpu.h + core_manager.cpp + core_manager.h core_timing.cpp core_timing.h core_timing_util.cpp core_timing_util.h - cpu_core_manager.cpp - cpu_core_manager.h + cpu_manager.cpp + cpu_manager.h crypto/aes_util.cpp crypto/aes_util.h crypto/encryption_layer.cpp @@ -158,6 +158,8 @@ add_library(core STATIC hle/kernel/mutex.h hle/kernel/object.cpp hle/kernel/object.h + hle/kernel/physical_core.cpp + hle/kernel/physical_core.h hle/kernel/process.cpp hle/kernel/process.h hle/kernel/process_capability.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index e825c0526..791640a3a 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -10,11 +10,12 @@ #include "common/microprofile.h" #include "core/arm/dynarmic/arm_dynarmic.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" @@ -87,7 +88,7 @@ public: if (GDBStub::IsServerEnabled()) { parent.jit->HaltExecution(); parent.SetPC(pc); - Kernel::Thread* thread = Kernel::GetCurrentThread(); + Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); parent.SaveContext(thread->GetContext()); GDBStub::Break(); GDBStub::SendTrap(thread, 5); diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index abd59ff4b..94570e520 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -2,10 +2,24 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif #include "core/arm/exclusive_monitor.h" +#include "core/memory.h" namespace Core { ExclusiveMonitor::~ExclusiveMonitor() = default; +std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, + std::size_t num_cores) { +#ifdef ARCHITECTURE_x86_64 + return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores); +#else + // TODO(merry): Passthrough exclusive monitor + return nullptr; +#endif +} + } // namespace Core diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index f59aca667..4ef418b90 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -4,8 +4,14 @@ #pragma once +#include <memory> + #include "common/common_types.h" +namespace Memory { +class Memory; +} + namespace Core { class ExclusiveMonitor { @@ -22,4 +28,7 @@ public: virtual bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) = 0; }; +std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, + std::size_t num_cores); + } // namespace Core diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 48182c99a..f99ad5802 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -9,6 +9,7 @@ #include "core/arm/unicorn/arm_unicorn.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/svc.h" namespace Core { @@ -177,7 +178,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) { uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); } - Kernel::Thread* thread = Kernel::GetCurrentThread(); + Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); SaveContext(thread->GetContext()); if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { last_bkpt_hit = false; diff --git a/src/core/core.cpp b/src/core/core.cpp index d697b80ef..0eb0c0dca 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -11,9 +11,9 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" -#include "core/cpu_core_manager.h" +#include "core/cpu_manager.h" #include "core/file_sys/bis_factory.h" #include "core/file_sys/card_image.h" #include "core/file_sys/mode.h" @@ -28,6 +28,7 @@ #include "core/hardware_interrupt_manager.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" @@ -113,16 +114,25 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, struct System::Impl { explicit Impl(System& system) : kernel{system}, fs_controller{system}, memory{system}, - cpu_core_manager{system}, reporter{system}, applet_manager{system} {} + cpu_manager{system}, reporter{system}, applet_manager{system} {} - Cpu& CurrentCpuCore() { - return cpu_core_manager.GetCurrentCore(); + CoreManager& CurrentCoreManager() { + return cpu_manager.GetCurrentCoreManager(); + } + + Kernel::PhysicalCore& CurrentPhysicalCore() { + const auto index = cpu_manager.GetActiveCoreIndex(); + return kernel.PhysicalCore(index); + } + + Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { + return kernel.PhysicalCore(index); } ResultStatus RunLoop(bool tight_loop) { status = ResultStatus::Success; - cpu_core_manager.RunLoop(tight_loop); + cpu_manager.RunLoop(tight_loop); return status; } @@ -131,8 +141,8 @@ struct System::Impl { LOG_DEBUG(HW_Memory, "initialized OK"); core_timing.Initialize(); - cpu_core_manager.Initialize(); kernel.Initialize(); + cpu_manager.Initialize(); const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch()); @@ -205,7 +215,6 @@ struct System::Impl { // Main process has been loaded and been made current. // Begin GPU and CPU execution. gpu_core->Start(); - cpu_core_manager.StartThreads(); // Initialize cheat engine if (cheat_engine) { @@ -259,7 +268,9 @@ struct System::Impl { is_powered_on = false; exit_lock = false; - gpu_core->WaitIdle(); + if (gpu_core) { + gpu_core->WaitIdle(); + } // Shutdown emulation session renderer.reset(); @@ -272,7 +283,7 @@ struct System::Impl { gpu_core.reset(); // Close all CPU/threading state - cpu_core_manager.Shutdown(); + cpu_manager.Shutdown(); // Shutdown kernel and core timing kernel.Shutdown(); @@ -342,7 +353,7 @@ struct System::Impl { std::unique_ptr<Tegra::GPU> gpu_core; std::unique_ptr<Hardware::InterruptManager> interrupt_manager; Memory::Memory memory; - CpuCoreManager cpu_core_manager; + CpuManager cpu_manager; bool is_powered_on = false; bool exit_lock = false; @@ -377,12 +388,12 @@ struct System::Impl { System::System() : impl{std::make_unique<Impl>(*this)} {} System::~System() = default; -Cpu& System::CurrentCpuCore() { - return impl->CurrentCpuCore(); +CoreManager& System::CurrentCoreManager() { + return impl->CurrentCoreManager(); } -const Cpu& System::CurrentCpuCore() const { - return impl->CurrentCpuCore(); +const CoreManager& System::CurrentCoreManager() const { + return impl->CurrentCoreManager(); } System::ResultStatus System::RunLoop(bool tight_loop) { @@ -394,7 +405,7 @@ System::ResultStatus System::SingleStep() { } void System::InvalidateCpuInstructionCaches() { - impl->cpu_core_manager.InvalidateAllInstructionCaches(); + impl->kernel.InvalidateAllInstructionCaches(); } System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) { @@ -406,13 +417,11 @@ bool System::IsPoweredOn() const { } void System::PrepareReschedule() { - CurrentCpuCore().PrepareReschedule(); + impl->CurrentPhysicalCore().Stop(); } void System::PrepareReschedule(const u32 core_index) { - if (core_index < GlobalScheduler().CpuCoresCount()) { - CpuCore(core_index).PrepareReschedule(); - } + impl->kernel.PrepareReschedule(core_index); } PerfStatsResults System::GetAndResetPerfStats() { @@ -428,31 +437,31 @@ const TelemetrySession& System::TelemetrySession() const { } ARM_Interface& System::CurrentArmInterface() { - return CurrentCpuCore().ArmInterface(); + return impl->CurrentPhysicalCore().ArmInterface(); } const ARM_Interface& System::CurrentArmInterface() const { - return CurrentCpuCore().ArmInterface(); + return impl->CurrentPhysicalCore().ArmInterface(); } std::size_t System::CurrentCoreIndex() const { - return CurrentCpuCore().CoreIndex(); + return impl->cpu_manager.GetActiveCoreIndex(); } Kernel::Scheduler& System::CurrentScheduler() { - return CurrentCpuCore().Scheduler(); + return impl->CurrentPhysicalCore().Scheduler(); } const Kernel::Scheduler& System::CurrentScheduler() const { - return CurrentCpuCore().Scheduler(); + return impl->CurrentPhysicalCore().Scheduler(); } Kernel::Scheduler& System::Scheduler(std::size_t core_index) { - return CpuCore(core_index).Scheduler(); + return impl->GetPhysicalCore(core_index).Scheduler(); } const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { - return CpuCore(core_index).Scheduler(); + return impl->GetPhysicalCore(core_index).Scheduler(); } /// Gets the global scheduler @@ -474,28 +483,28 @@ const Kernel::Process* System::CurrentProcess() const { } ARM_Interface& System::ArmInterface(std::size_t core_index) { - return CpuCore(core_index).ArmInterface(); + return impl->GetPhysicalCore(core_index).ArmInterface(); } const ARM_Interface& System::ArmInterface(std::size_t core_index) const { - return CpuCore(core_index).ArmInterface(); + return impl->GetPhysicalCore(core_index).ArmInterface(); } -Cpu& System::CpuCore(std::size_t core_index) { - return impl->cpu_core_manager.GetCore(core_index); +CoreManager& System::GetCoreManager(std::size_t core_index) { + return impl->cpu_manager.GetCoreManager(core_index); } -const Cpu& System::CpuCore(std::size_t core_index) const { +const CoreManager& System::GetCoreManager(std::size_t core_index) const { ASSERT(core_index < NUM_CPU_CORES); - return impl->cpu_core_manager.GetCore(core_index); + return impl->cpu_manager.GetCoreManager(core_index); } ExclusiveMonitor& System::Monitor() { - return impl->cpu_core_manager.GetExclusiveMonitor(); + return impl->kernel.GetExclusiveMonitor(); } const ExclusiveMonitor& System::Monitor() const { - return impl->cpu_core_manager.GetExclusiveMonitor(); + return impl->kernel.GetExclusiveMonitor(); } Memory::Memory& System::Memory() { diff --git a/src/core/core.h b/src/core/core.h index e240c5c58..e69d68fcf 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -93,7 +93,7 @@ class Memory; namespace Core { class ARM_Interface; -class Cpu; +class CoreManager; class ExclusiveMonitor; class FrameLimiter; class PerfStats; @@ -218,10 +218,10 @@ public: const ARM_Interface& ArmInterface(std::size_t core_index) const; /// Gets a CPU interface to the CPU core with the specified index - Cpu& CpuCore(std::size_t core_index); + CoreManager& GetCoreManager(std::size_t core_index); /// Gets a CPU interface to the CPU core with the specified index - const Cpu& CpuCore(std::size_t core_index) const; + const CoreManager& GetCoreManager(std::size_t core_index) const; /// Gets a reference to the exclusive monitor ExclusiveMonitor& Monitor(); @@ -364,10 +364,10 @@ private: System(); /// Returns the currently running CPU core - Cpu& CurrentCpuCore(); + CoreManager& CurrentCoreManager(); /// Returns the currently running CPU core - const Cpu& CurrentCpuCore() const; + const CoreManager& CurrentCoreManager() const; /** * Initialize the emulated system. diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp deleted file mode 100644 index 630cd4feb..000000000 --- a/src/core/core_cpu.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> - -#include "common/logging/log.h" -#ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic.h" -#endif -#include "core/arm/exclusive_monitor.h" -#include "core/arm/unicorn/arm_unicorn.h" -#include "core/core.h" -#include "core/core_cpu.h" -#include "core/core_timing.h" -#include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/lock.h" -#include "core/settings.h" - -namespace Core { - -void CpuBarrier::NotifyEnd() { - std::unique_lock lock{mutex}; - end = true; - condition.notify_all(); -} - -bool CpuBarrier::Rendezvous() { - if (!Settings::values.use_multi_core) { - // Meaningless when running in single-core mode - return true; - } - - if (!end) { - std::unique_lock lock{mutex}; - - --cores_waiting; - if (!cores_waiting) { - cores_waiting = NUM_CPU_CORES; - condition.notify_all(); - return true; - } - - condition.wait(lock); - return true; - } - - return false; -} - -Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, - std::size_t core_index) - : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()}, - core_timing{system.CoreTiming()}, core_index{core_index} { -#ifdef ARCHITECTURE_x86_64 - arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); -#else - arm_interface = std::make_unique<ARM_Unicorn>(system); - LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); -#endif - - scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); -} - -Cpu::~Cpu() = default; - -std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor( - [[maybe_unused]] Memory::Memory& memory, [[maybe_unused]] std::size_t num_cores) { -#ifdef ARCHITECTURE_x86_64 - return std::make_unique<DynarmicExclusiveMonitor>(memory, num_cores); -#else - // TODO(merry): Passthrough exclusive monitor - return nullptr; -#endif -} - -void Cpu::RunLoop(bool tight_loop) { - // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step - if (!cpu_barrier.Rendezvous()) { - // If rendezvous failed, session has been killed - return; - } - - Reschedule(); - - // If we don't have a currently active thread then don't execute instructions, - // instead advance to the next event and try to yield to the next thread - if (Kernel::GetCurrentThread() == nullptr) { - LOG_TRACE(Core, "Core-{} idling", core_index); - core_timing.Idle(); - } else { - if (tight_loop) { - arm_interface->Run(); - } else { - arm_interface->Step(); - } - // We are stopping a run, exclusive state must be cleared - arm_interface->ClearExclusiveState(); - } - core_timing.Advance(); - - Reschedule(); -} - -void Cpu::SingleStep() { - return RunLoop(false); -} - -void Cpu::PrepareReschedule() { - arm_interface->PrepareReschedule(); -} - -void Cpu::Reschedule() { - // Lock the global kernel mutex when we manipulate the HLE state - std::lock_guard lock(HLE::g_hle_lock); - - global_scheduler.SelectThread(core_index); - scheduler->TryDoContextSwitch(); -} - -void Cpu::Shutdown() { - scheduler->Shutdown(); -} - -} // namespace Core diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h deleted file mode 100644 index 78f5021a2..000000000 --- a/src/core/core_cpu.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <condition_variable> -#include <cstddef> -#include <memory> -#include <mutex> -#include "common/common_types.h" - -namespace Kernel { -class GlobalScheduler; -class Scheduler; -} // namespace Kernel - -namespace Core { -class System; -} - -namespace Core::Timing { -class CoreTiming; -} - -namespace Memory { -class Memory; -} - -namespace Core { - -class ARM_Interface; -class ExclusiveMonitor; - -constexpr unsigned NUM_CPU_CORES{4}; - -class CpuBarrier { -public: - bool IsAlive() const { - return !end; - } - - void NotifyEnd(); - - bool Rendezvous(); - -private: - unsigned cores_waiting{NUM_CPU_CORES}; - std::mutex mutex; - std::condition_variable condition; - std::atomic<bool> end{}; -}; - -class Cpu { -public: - Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, - std::size_t core_index); - ~Cpu(); - - void RunLoop(bool tight_loop = true); - - void SingleStep(); - - void PrepareReschedule(); - - ARM_Interface& ArmInterface() { - return *arm_interface; - } - - const ARM_Interface& ArmInterface() const { - return *arm_interface; - } - - Kernel::Scheduler& Scheduler() { - return *scheduler; - } - - const Kernel::Scheduler& Scheduler() const { - return *scheduler; - } - - bool IsMainCore() const { - return core_index == 0; - } - - std::size_t CoreIndex() const { - return core_index; - } - - void Shutdown(); - - /** - * Creates an exclusive monitor to handle exclusive reads/writes. - * - * @param memory The current memory subsystem that the monitor may wish - * to keep track of. - * - * @param num_cores The number of cores to assume about the CPU. - * - * @returns The constructed exclusive monitor instance, or nullptr if the current - * CPU backend is unable to use an exclusive monitor. - */ - static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory, - std::size_t num_cores); - -private: - void Reschedule(); - - std::unique_ptr<ARM_Interface> arm_interface; - CpuBarrier& cpu_barrier; - Kernel::GlobalScheduler& global_scheduler; - std::unique_ptr<Kernel::Scheduler> scheduler; - Timing::CoreTiming& core_timing; - - std::atomic<bool> reschedule_pending = false; - std::size_t core_index; -}; - -} // namespace Core diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp new file mode 100644 index 000000000..8eacf92dd --- /dev/null +++ b/src/core/core_manager.cpp @@ -0,0 +1,70 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <condition_variable> +#include <mutex> + +#include "common/logging/log.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/lock.h" +#include "core/settings.h" + +namespace Core { + +CoreManager::CoreManager(System& system, std::size_t core_index) + : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore( + core_index)}, + core_timing{system.CoreTiming()}, core_index{core_index} {} + +CoreManager::~CoreManager() = default; + +void CoreManager::RunLoop(bool tight_loop) { + Reschedule(); + + // If we don't have a currently active thread then don't execute instructions, + // instead advance to the next event and try to yield to the next thread + if (Kernel::GetCurrentThread() == nullptr) { + LOG_TRACE(Core, "Core-{} idling", core_index); + core_timing.Idle(); + } else { + if (tight_loop) { + physical_core.Run(); + } else { + physical_core.Step(); + } + } + core_timing.Advance(); + + Reschedule(); +} + +void CoreManager::SingleStep() { + return RunLoop(false); +} + +void CoreManager::PrepareReschedule() { + physical_core.Stop(); +} + +void CoreManager::Reschedule() { + // Lock the global kernel mutex when we manipulate the HLE state + std::lock_guard lock(HLE::g_hle_lock); + + global_scheduler.SelectThread(core_index); + + physical_core.Scheduler().TryDoContextSwitch(); +} + +} // namespace Core diff --git a/src/core/core_manager.h b/src/core/core_manager.h new file mode 100644 index 000000000..b14e723d7 --- /dev/null +++ b/src/core/core_manager.h @@ -0,0 +1,63 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <cstddef> +#include <memory> +#include "common/common_types.h" + +namespace Kernel { +class GlobalScheduler; +class PhysicalCore; +} // namespace Kernel + +namespace Core { +class System; +} + +namespace Core::Timing { +class CoreTiming; +} + +namespace Memory { +class Memory; +} + +namespace Core { + +constexpr unsigned NUM_CPU_CORES{4}; + +class CoreManager { +public: + CoreManager(System& system, std::size_t core_index); + ~CoreManager(); + + void RunLoop(bool tight_loop = true); + + void SingleStep(); + + void PrepareReschedule(); + + bool IsMainCore() const { + return core_index == 0; + } + + std::size_t CoreIndex() const { + return core_index; + } + +private: + void Reschedule(); + + Kernel::GlobalScheduler& global_scheduler; + Kernel::PhysicalCore& physical_core; + Timing::CoreTiming& core_timing; + + std::atomic<bool> reschedule_pending = false; + std::size_t core_index; +}; + +} // namespace Core diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp deleted file mode 100644 index f04a34133..000000000 --- a/src/core/cpu_core_manager.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "core/arm/exclusive_monitor.h" -#include "core/core.h" -#include "core/core_cpu.h" -#include "core/core_timing.h" -#include "core/cpu_core_manager.h" -#include "core/gdbstub/gdbstub.h" -#include "core/settings.h" - -namespace Core { -namespace { -void RunCpuCore(const System& system, Cpu& cpu_state) { - while (system.IsPoweredOn()) { - cpu_state.RunLoop(true); - } -} -} // Anonymous namespace - -CpuCoreManager::CpuCoreManager(System& system) : system{system} {} -CpuCoreManager::~CpuCoreManager() = default; - -void CpuCoreManager::Initialize() { - barrier = std::make_unique<CpuBarrier>(); - exclusive_monitor = Cpu::MakeExclusiveMonitor(system.Memory(), cores.size()); - - for (std::size_t index = 0; index < cores.size(); ++index) { - cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); - } -} - -void CpuCoreManager::StartThreads() { - // Create threads for CPU cores 1-3, and build thread_to_cpu map - // CPU core 0 is run on the main thread - thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); - if (!Settings::values.use_multi_core) { - return; - } - - for (std::size_t index = 0; index < core_threads.size(); ++index) { - core_threads[index] = std::make_unique<std::thread>(RunCpuCore, std::cref(system), - std::ref(*cores[index + 1])); - thread_to_cpu[core_threads[index]->get_id()] = cores[index + 1].get(); - } -} - -void CpuCoreManager::Shutdown() { - barrier->NotifyEnd(); - if (Settings::values.use_multi_core) { - for (auto& thread : core_threads) { - thread->join(); - thread.reset(); - } - } - - thread_to_cpu.clear(); - for (auto& cpu_core : cores) { - cpu_core->Shutdown(); - cpu_core.reset(); - } - - exclusive_monitor.reset(); - barrier.reset(); -} - -Cpu& CpuCoreManager::GetCore(std::size_t index) { - return *cores.at(index); -} - -const Cpu& CpuCoreManager::GetCore(std::size_t index) const { - return *cores.at(index); -} - -ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() { - return *exclusive_monitor; -} - -const ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() const { - return *exclusive_monitor; -} - -Cpu& CpuCoreManager::GetCurrentCore() { - if (Settings::values.use_multi_core) { - const auto& search = thread_to_cpu.find(std::this_thread::get_id()); - ASSERT(search != thread_to_cpu.end()); - ASSERT(search->second); - return *search->second; - } - - // Otherwise, use single-threaded mode active_core variable - return *cores[active_core]; -} - -const Cpu& CpuCoreManager::GetCurrentCore() const { - if (Settings::values.use_multi_core) { - const auto& search = thread_to_cpu.find(std::this_thread::get_id()); - ASSERT(search != thread_to_cpu.end()); - ASSERT(search->second); - return *search->second; - } - - // Otherwise, use single-threaded mode active_core variable - return *cores[active_core]; -} - -void CpuCoreManager::RunLoop(bool tight_loop) { - // Update thread_to_cpu in case Core 0 is run from a different host thread - thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); - - if (GDBStub::IsServerEnabled()) { - GDBStub::HandlePacket(); - - // If the loop is halted and we want to step, use a tiny (1) number of instructions to - // execute. Otherwise, get out of the loop function. - if (GDBStub::GetCpuHaltFlag()) { - if (GDBStub::GetCpuStepFlag()) { - tight_loop = false; - } else { - return; - } - } - } - - auto& core_timing = system.CoreTiming(); - core_timing.ResetRun(); - bool keep_running{}; - do { - keep_running = false; - for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { - core_timing.SwitchContext(active_core); - if (core_timing.CanCurrentContextRun()) { - cores[active_core]->RunLoop(tight_loop); - } - keep_running |= core_timing.CanCurrentContextRun(); - } - } while (keep_running); - - if (GDBStub::IsServerEnabled()) { - GDBStub::SetCpuStepFlag(false); - } -} - -void CpuCoreManager::InvalidateAllInstructionCaches() { - for (auto& cpu : cores) { - cpu->ArmInterface().ClearInstructionCache(); - } -} - -} // namespace Core diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h deleted file mode 100644 index 2cbbf8216..000000000 --- a/src/core/cpu_core_manager.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <map> -#include <memory> -#include <thread> - -namespace Core { - -class Cpu; -class CpuBarrier; -class ExclusiveMonitor; -class System; - -class CpuCoreManager { -public: - explicit CpuCoreManager(System& system); - CpuCoreManager(const CpuCoreManager&) = delete; - CpuCoreManager(CpuCoreManager&&) = delete; - - ~CpuCoreManager(); - - CpuCoreManager& operator=(const CpuCoreManager&) = delete; - CpuCoreManager& operator=(CpuCoreManager&&) = delete; - - void Initialize(); - void StartThreads(); - void Shutdown(); - - Cpu& GetCore(std::size_t index); - const Cpu& GetCore(std::size_t index) const; - - Cpu& GetCurrentCore(); - const Cpu& GetCurrentCore() const; - - ExclusiveMonitor& GetExclusiveMonitor(); - const ExclusiveMonitor& GetExclusiveMonitor() const; - - void RunLoop(bool tight_loop); - - void InvalidateAllInstructionCaches(); - -private: - static constexpr std::size_t NUM_CPU_CORES = 4; - - std::unique_ptr<ExclusiveMonitor> exclusive_monitor; - std::unique_ptr<CpuBarrier> barrier; - std::array<std::unique_ptr<Cpu>, NUM_CPU_CORES> cores; - std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> core_threads; - std::size_t active_core{}; ///< Active core, only used in single thread mode - - /// Map of guest threads to CPU cores - std::map<std::thread::id, Cpu*> thread_to_cpu; - - System& system; -}; - -} // namespace Core diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp new file mode 100644 index 000000000..70ddbdcca --- /dev/null +++ b/src/core/cpu_manager.cpp @@ -0,0 +1,81 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/arm/exclusive_monitor.h" +#include "core/core.h" +#include "core/core_manager.h" +#include "core/core_timing.h" +#include "core/cpu_manager.h" +#include "core/gdbstub/gdbstub.h" + +namespace Core { + +CpuManager::CpuManager(System& system) : system{system} {} +CpuManager::~CpuManager() = default; + +void CpuManager::Initialize() { + for (std::size_t index = 0; index < core_managers.size(); ++index) { + core_managers[index] = std::make_unique<CoreManager>(system, index); + } +} + +void CpuManager::Shutdown() { + for (auto& cpu_core : core_managers) { + cpu_core.reset(); + } +} + +CoreManager& CpuManager::GetCoreManager(std::size_t index) { + return *core_managers.at(index); +} + +const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { + return *core_managers.at(index); +} + +CoreManager& CpuManager::GetCurrentCoreManager() { + // Otherwise, use single-threaded mode active_core variable + return *core_managers[active_core]; +} + +const CoreManager& CpuManager::GetCurrentCoreManager() const { + // Otherwise, use single-threaded mode active_core variable + return *core_managers[active_core]; +} + +void CpuManager::RunLoop(bool tight_loop) { + if (GDBStub::IsServerEnabled()) { + GDBStub::HandlePacket(); + + // If the loop is halted and we want to step, use a tiny (1) number of instructions to + // execute. Otherwise, get out of the loop function. + if (GDBStub::GetCpuHaltFlag()) { + if (GDBStub::GetCpuStepFlag()) { + tight_loop = false; + } else { + return; + } + } + } + + auto& core_timing = system.CoreTiming(); + core_timing.ResetRun(); + bool keep_running{}; + do { + keep_running = false; + for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { + core_timing.SwitchContext(active_core); + if (core_timing.CanCurrentContextRun()) { + core_managers[active_core]->RunLoop(tight_loop); + } + keep_running |= core_timing.CanCurrentContextRun(); + } + } while (keep_running); + + if (GDBStub::IsServerEnabled()) { + GDBStub::SetCpuStepFlag(false); + } +} + +} // namespace Core diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h new file mode 100644 index 000000000..feb619e1b --- /dev/null +++ b/src/core/cpu_manager.h @@ -0,0 +1,50 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> + +namespace Core { + +class CoreManager; +class System; + +class CpuManager { +public: + explicit CpuManager(System& system); + CpuManager(const CpuManager&) = delete; + CpuManager(CpuManager&&) = delete; + + ~CpuManager(); + + CpuManager& operator=(const CpuManager&) = delete; + CpuManager& operator=(CpuManager&&) = delete; + + void Initialize(); + void Shutdown(); + + CoreManager& GetCoreManager(std::size_t index); + const CoreManager& GetCoreManager(std::size_t index) const; + + CoreManager& GetCurrentCoreManager(); + const CoreManager& GetCurrentCoreManager() const; + + std::size_t GetActiveCoreIndex() const { + return active_core; + } + + void RunLoop(bool tight_loop); + +private: + static constexpr std::size_t NUM_CPU_CORES = 4; + + std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers; + std::size_t active_core{}; ///< Active core, only used in single thread mode + + System& system; +}; + +} // namespace Core diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 4a9912641..3376eedc5 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -75,6 +75,13 @@ public: return nullptr; } + /// Returns if window is shown (not minimized) + virtual bool IsShown() const = 0; + + /// Retrieves Vulkan specific handlers from the window + virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const = 0; + /** * Signal that a touch pressed event has occurred (e.g. mouse click pressed) * @param framebuffer_x Framebuffer x-coordinate that was pressed diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h index 7c11d7546..2b098b7c6 100644 --- a/src/core/frontend/input.h +++ b/src/core/frontend/input.h @@ -15,6 +15,13 @@ namespace Input { +enum class AnalogDirection : u8 { + RIGHT, + LEFT, + UP, + DOWN, +}; + /// An abstract class template for an input device (a button, an analog input, etc.). template <typename StatusType> class InputDevice { @@ -23,6 +30,9 @@ public: virtual StatusType GetStatus() const { return {}; } + virtual bool GetAnalogDirectionStatus(AnalogDirection direction) const { + return {}; + } }; /// An abstract class template for a factory that can create input devices. diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 37cb28848..67e95999d 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -35,7 +35,7 @@ #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/scheduler.h" diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index db189c8e3..2ea3dcb61 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/errors.h" #include "core/hle/kernel/scheduler.h" diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1d0783bd3..edd4c4259 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -3,13 +3,15 @@ // Refer to the license.txt file included. #include <atomic> +#include <functional> #include <memory> #include <mutex> #include <utility> #include "common/assert.h" #include "common/logging/log.h" - +#include "core/arm/arm_interface.h" +#include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -17,6 +19,7 @@ #include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/scheduler.h" @@ -98,6 +101,7 @@ struct KernelCore::Impl { void Initialize(KernelCore& kernel) { Shutdown(); + InitializePhysicalCores(); InitializeSystemResourceLimit(kernel); InitializeThreads(); InitializePreemption(); @@ -121,6 +125,21 @@ struct KernelCore::Impl { global_scheduler.Shutdown(); named_ports.clear(); + + for (auto& core : cores) { + core.Shutdown(); + } + cores.clear(); + + exclusive_monitor.reset(); + } + + void InitializePhysicalCores() { + exclusive_monitor = + Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount()); + for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) { + cores.emplace_back(system, i, *exclusive_monitor); + } } // Creates the default system resource limit @@ -186,6 +205,9 @@ struct KernelCore::Impl { /// the ConnectToPort SVC. NamedPortTable named_ports; + std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; + std::vector<Kernel::PhysicalCore> cores; + // System context Core::System& system; }; @@ -240,6 +262,34 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { return impl->global_scheduler; } +Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { + return impl->cores[id]; +} + +const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const { + return impl->cores[id]; +} + +Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() { + return *impl->exclusive_monitor; +} + +const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const { + return *impl->exclusive_monitor; +} + +void KernelCore::InvalidateAllInstructionCaches() { + for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) { + PhysicalCore(i).ArmInterface().ClearInstructionCache(); + } +} + +void KernelCore::PrepareReschedule(std::size_t id) { + if (id < impl->global_scheduler.CpuCoresCount()) { + impl->cores[id].Stop(); + } +} + void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 3bf0068ed..fccffaf3a 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,8 +11,9 @@ #include "core/hle/kernel/object.h" namespace Core { +class ExclusiveMonitor; class System; -} +} // namespace Core namespace Core::Timing { class CoreTiming; @@ -25,6 +26,7 @@ class AddressArbiter; class ClientPort; class GlobalScheduler; class HandleTable; +class PhysicalCore; class Process; class ResourceLimit; class Thread; @@ -84,6 +86,21 @@ public: /// Gets the sole instance of the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Gets the an instance of the respective physical CPU core. + Kernel::PhysicalCore& PhysicalCore(std::size_t id); + + /// Gets the an instance of the respective physical CPU core. + const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; + + /// Stops execution of 'id' core, in order to reschedule a new thread. + void PrepareReschedule(std::size_t id); + + Core::ExclusiveMonitor& GetExclusiveMonitor(); + + const Core::ExclusiveMonitor& GetExclusiveMonitor() const; + + void InvalidateAllInstructionCaches(); + /// Adds a port to the named port table void AddNamedPort(std::string name, std::shared_ptr<ClientPort> port); diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp new file mode 100644 index 000000000..9303dd273 --- /dev/null +++ b/src/core/hle/kernel/physical_core.cpp @@ -0,0 +1,51 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "core/arm/arm_interface.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic.h" +#endif +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h" +#include "core/core.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" + +namespace Kernel { + +PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, + Core::ExclusiveMonitor& exclusive_monitor) + : core_index{id} { +#ifdef ARCHITECTURE_x86_64 + arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); +#else + arm_interface = std::make_shared<Core::ARM_Unicorn>(system); + LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); +#endif + + scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); +} + +PhysicalCore::~PhysicalCore() = default; + +void PhysicalCore::Run() { + arm_interface->Run(); + arm_interface->ClearExclusiveState(); +} + +void PhysicalCore::Step() { + arm_interface->Step(); +} + +void PhysicalCore::Stop() { + arm_interface->PrepareReschedule(); +} + +void PhysicalCore::Shutdown() { + scheduler->Shutdown(); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h new file mode 100644 index 000000000..4c32c0f1b --- /dev/null +++ b/src/core/hle/kernel/physical_core.h @@ -0,0 +1,77 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <memory> + +namespace Kernel { +class Scheduler; +} // namespace Kernel + +namespace Core { +class ARM_Interface; +class ExclusiveMonitor; +class System; +} // namespace Core + +namespace Kernel { + +class PhysicalCore { +public: + PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor); + ~PhysicalCore(); + + PhysicalCore(const PhysicalCore&) = delete; + PhysicalCore& operator=(const PhysicalCore&) = delete; + + PhysicalCore(PhysicalCore&&) = default; + PhysicalCore& operator=(PhysicalCore&&) = default; + + /// Execute current jit state + void Run(); + /// Execute a single instruction in current jit. + void Step(); + /// Stop JIT execution/exit + void Stop(); + + // Shutdown this physical core. + void Shutdown(); + + Core::ARM_Interface& ArmInterface() { + return *arm_interface; + } + + const Core::ARM_Interface& ArmInterface() const { + return *arm_interface; + } + + bool IsMainCore() const { + return core_index == 0; + } + + bool IsSystemCore() const { + return core_index == 3; + } + + std::size_t CoreIndex() const { + return core_index; + } + + Kernel::Scheduler& Scheduler() { + return *scheduler; + } + + const Kernel::Scheduler& Scheduler() const { + return *scheduler; + } + +private: + std::size_t core_index; + std::unique_ptr<Core::ARM_Interface> arm_interface; + std::unique_ptr<Kernel::Scheduler> scheduler; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index d36fcd7d9..eb196a690 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -14,7 +14,6 @@ #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index dbcdb0b88..1d99bf7a2 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -15,7 +15,7 @@ #include "common/string_util.h" #include "core/arm/exclusive_monitor.h" #include "core/core.h" -#include "core/core_cpu.h" +#include "core/core_manager.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/kernel/address_arbiter.h" diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index e84e5ce0d..e965b5b04 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -13,7 +13,6 @@ #include "common/thread_queue_list.h" #include "core/arm/arm_interface.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hle/kernel/errors.h" @@ -356,7 +355,7 @@ void Thread::SetActivity(ThreadActivity value) { // Set status if not waiting if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { SetStatus(ThreadStatus::Paused); - Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); + kernel.PrepareReschedule(processor_id); } } else if (status == ThreadStatus::Paused) { // Ready to reschedule diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 745f2c4e8..a0c806e8f 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -7,7 +7,6 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/core_cpu.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" @@ -96,7 +95,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { } if (resume) { thread->ResumeFromWait(); - Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID()); + kernel.PrepareReschedule(thread->GetProcessorID()); } } diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 4d952adc0..15c09f04c 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -250,6 +250,10 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { auto& rstick_entry = npad_pad_states[controller_idx].r_stick; const auto& button_state = buttons[controller_idx]; const auto& analog_state = sticks[controller_idx]; + const auto [stick_l_x_f, stick_l_y_f] = + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus(); + const auto [stick_r_x_f, stick_r_y_f] = + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus(); using namespace Settings::NativeButton; pad_state.a.Assign(button_state[A - BUTTON_HID_BEGIN]->GetStatus()); @@ -270,23 +274,32 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { pad_state.d_right.Assign(button_state[DRight - BUTTON_HID_BEGIN]->GetStatus()); pad_state.d_down.Assign(button_state[DDown - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_left.Assign(button_state[LStick_Left - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_up.Assign(button_state[LStick_Up - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_right.Assign(button_state[LStick_Right - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.l_stick_down.Assign(button_state[LStick_Down - BUTTON_HID_BEGIN]->GetStatus()); - - pad_state.r_stick_left.Assign(button_state[RStick_Left - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_up.Assign(button_state[RStick_Up - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_right.Assign(button_state[RStick_Right - BUTTON_HID_BEGIN]->GetStatus()); - pad_state.r_stick_down.Assign(button_state[RStick_Down - BUTTON_HID_BEGIN]->GetStatus()); + pad_state.l_stick_right.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::RIGHT)); + pad_state.l_stick_left.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::LEFT)); + pad_state.l_stick_up.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::UP)); + pad_state.l_stick_down.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( + Input::AnalogDirection::DOWN)); + + pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); + pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT)); + pad_state.r_stick_right.Assign( + analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); + pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] + ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus()); pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus()); - const auto [stick_l_x_f, stick_l_y_f] = - analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus(); - const auto [stick_r_x_f, stick_r_y_f] = - analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus(); lstick_entry.x = static_cast<s32>(stick_l_x_f * HID_JOYSTICK_MAX); lstick_entry.y = static_cast<s32>(stick_l_y_f * HID_JOYSTICK_MAX); rstick_entry.x = static_cast<s32>(stick_r_x_f * HID_JOYSTICK_MAX); diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 884ad173b..f67fab2f9 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -42,6 +42,26 @@ void BSD::Socket(Kernel::HLERequestContext& ctx) { rb.Push<u32>(0); // bsd errno } +void BSD::Select(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + +void BSD::Bind(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + void BSD::Connect(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service, "(STUBBED) called"); @@ -52,6 +72,26 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) { rb.Push<u32>(0); // bsd errno } +void BSD::Listen(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + +void BSD::SetSockOpt(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 4}; + + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // ret + rb.Push<u32>(0); // bsd errno +} + void BSD::SendTo(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service, "(STUBBED) called"); @@ -80,7 +120,7 @@ BSD::BSD(const char* name) : ServiceFramework(name) { {2, &BSD::Socket, "Socket"}, {3, nullptr, "SocketExempt"}, {4, nullptr, "Open"}, - {5, nullptr, "Select"}, + {5, &BSD::Select, "Select"}, {6, nullptr, "Poll"}, {7, nullptr, "Sysctl"}, {8, nullptr, "Recv"}, @@ -88,15 +128,15 @@ BSD::BSD(const char* name) : ServiceFramework(name) { {10, nullptr, "Send"}, {11, &BSD::SendTo, "SendTo"}, {12, nullptr, "Accept"}, - {13, nullptr, "Bind"}, + {13, &BSD::Bind, "Bind"}, {14, &BSD::Connect, "Connect"}, {15, nullptr, "GetPeerName"}, {16, nullptr, "GetSockName"}, {17, nullptr, "GetSockOpt"}, - {18, nullptr, "Listen"}, + {18, &BSD::Listen, "Listen"}, {19, nullptr, "Ioctl"}, {20, nullptr, "Fcntl"}, - {21, nullptr, "SetSockOpt"}, + {21, &BSD::SetSockOpt, "SetSockOpt"}, {22, nullptr, "Shutdown"}, {23, nullptr, "ShutdownAllSockets"}, {24, nullptr, "Write"}, diff --git a/src/core/hle/service/sockets/bsd.h b/src/core/hle/service/sockets/bsd.h index 0fe0e65c6..3098e3baf 100644 --- a/src/core/hle/service/sockets/bsd.h +++ b/src/core/hle/service/sockets/bsd.h @@ -18,7 +18,11 @@ private: void RegisterClient(Kernel::HLERequestContext& ctx); void StartMonitoring(Kernel::HLERequestContext& ctx); void Socket(Kernel::HLERequestContext& ctx); + void Select(Kernel::HLERequestContext& ctx); + void Bind(Kernel::HLERequestContext& ctx); void Connect(Kernel::HLERequestContext& ctx); + void Listen(Kernel::HLERequestContext& ctx); + void SetSockOpt(Kernel::HLERequestContext& ctx); void SendTo(Kernel::HLERequestContext& ctx); void Close(Kernel::HLERequestContext& ctx); diff --git a/src/core/settings.h b/src/core/settings.h index 421e76f5f..e1a9a0ffa 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -371,6 +371,11 @@ enum class SDMCSize : u64 { S1TB = 0x10000000000ULL, }; +enum class RendererBackend { + OpenGL = 0, + Vulkan = 1, +}; + struct Values { // System bool use_docked_mode; @@ -419,6 +424,10 @@ struct Values { SDMCSize sdmc_size; // Renderer + RendererBackend renderer_backend; + bool renderer_debug; + int vulkan_device; + float resolution_factor; bool use_frame_limit; u16 frame_limit; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 320e8ad73..0e72d31cd 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -46,6 +46,16 @@ static u64 GenerateTelemetryId() { return telemetry_id; } +static const char* TranslateRenderer(Settings::RendererBackend backend) { + switch (backend) { + case Settings::RendererBackend::OpenGL: + return "OpenGL"; + case Settings::RendererBackend::Vulkan: + return "Vulkan"; + } + return "Unknown"; +} + u64 GetTelemetryId() { u64 telemetry_id{}; const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + @@ -169,7 +179,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { AddField(field_type, "Audio_SinkId", Settings::values.sink_id); AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core); - AddField(field_type, "Renderer_Backend", "OpenGL"); + AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend)); AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor); AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp index 9e028da89..c98c848cf 100644 --- a/src/input_common/main.cpp +++ b/src/input_common/main.cpp @@ -41,6 +41,7 @@ void Shutdown() { Input::UnregisterFactory<Input::MotionDevice>("motion_emu"); motion_emu.reset(); sdl.reset(); + udp.reset(); } Keyboard* GetKeyboard() { diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp index d2e9d278f..a2e0c0bd2 100644 --- a/src/input_common/sdl/sdl_impl.cpp +++ b/src/input_common/sdl/sdl_impl.cpp @@ -342,6 +342,22 @@ public: return std::make_tuple<float, float>(0.0f, 0.0f); } + bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { + const auto [x, y] = GetStatus(); + const float directional_deadzone = 0.4f; + switch (direction) { + case Input::AnalogDirection::RIGHT: + return x > directional_deadzone; + case Input::AnalogDirection::LEFT: + return x < -directional_deadzone; + case Input::AnalogDirection::UP: + return y > directional_deadzone; + case Input::AnalogDirection::DOWN: + return y < -directional_deadzone; + } + return false; + } + private: std::shared_ptr<SDLJoystick> joystick; const int axis_x; diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index 5f5a9989c..2228571a6 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -14,7 +14,6 @@ #include "input_common/udp/client.h" #include "input_common/udp/protocol.h" -using boost::asio::ip::address_v4; using boost::asio::ip::udp; namespace InputCommon::CemuhookUDP { @@ -31,10 +30,10 @@ public: explicit Socket(const std::string& host, u16 port, u8 pad_index, u32 client_id, SocketCallback callback) - : client_id(client_id), timer(io_service), - send_endpoint(udp::endpoint(address_v4::from_string(host), port)), - socket(io_service, udp::endpoint(udp::v4(), 0)), pad_index(pad_index), - callback(std::move(callback)) {} + : callback(std::move(callback)), timer(io_service), + socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), + pad_index(pad_index), + send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} void Stop() { io_service.stop(); @@ -126,7 +125,7 @@ static void SocketLoop(Socket* socket) { Client::Client(std::shared_ptr<DeviceStatus> status, const std::string& host, u16 port, u8 pad_index, u32 client_id) - : status(status) { + : status(std::move(status)) { StartCommunication(host, port, pad_index, client_id); } @@ -207,7 +206,7 @@ void TestCommunication(const std::string& host, u16 port, u8 pad_index, u32 clie Common::Event success_event; SocketCallback callback{[](Response::Version version) {}, [](Response::PortInfo info) {}, [&](Response::PadData data) { success_event.Set(); }}; - Socket socket{host, port, pad_index, client_id, callback}; + Socket socket{host, port, pad_index, client_id, std::move(callback)}; std::thread worker_thread{SocketLoop, &socket}; bool result = success_event.WaitFor(std::chrono::seconds(8)); socket.Stop(); @@ -267,7 +266,7 @@ CalibrationConfigurationJob::CalibrationConfigurationJob( complete_event.Set(); } }}; - Socket socket{host, port, pad_index, client_id, callback}; + Socket socket{host, port, pad_index, client_id, std::move(callback)}; std::thread worker_thread{SocketLoop, &socket}; complete_event.Wait(); socket.Stop(); diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h index 0b21f4da6..b8c654755 100644 --- a/src/input_common/udp/client.h +++ b/src/input_common/udp/client.h @@ -11,7 +11,6 @@ #include <string> #include <thread> #include <tuple> -#include <vector> #include "common/common_types.h" #include "common/thread.h" #include "common/vector_math.h" diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h index 1b521860a..3ba4d1fc8 100644 --- a/src/input_common/udp/protocol.h +++ b/src/input_common/udp/protocol.h @@ -7,7 +7,6 @@ #include <array> #include <optional> #include <type_traits> -#include <vector> #include <boost/crc.hpp> #include "common/bit_field.h" #include "common/swap.h" diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index a80f38614..ca99cc22f 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp @@ -2,7 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/logging/log.h" +#include <mutex> +#include <tuple> + #include "common/param_package.h" #include "core/frontend/input.h" #include "core/settings.h" @@ -14,7 +16,7 @@ namespace InputCommon::CemuhookUDP { class UDPTouchDevice final : public Input::TouchDevice { public: explicit UDPTouchDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} - std::tuple<float, float, bool> GetStatus() const { + std::tuple<float, float, bool> GetStatus() const override { std::lock_guard guard(status->update_mutex); return status->touch_status; } @@ -26,7 +28,7 @@ private: class UDPMotionDevice final : public Input::MotionDevice { public: explicit UDPMotionDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {} - std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const { + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override { std::lock_guard guard(status->update_mutex); return status->motion_status; } diff --git a/src/input_common/udp/udp.h b/src/input_common/udp/udp.h index ea3de60bb..4f83f0441 100644 --- a/src/input_common/udp/udp.h +++ b/src/input_common/udp/udp.h @@ -2,15 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <memory> -#include <unordered_map> -#include "input_common/main.h" -#include "input_common/udp/client.h" namespace InputCommon::CemuhookUDP { -class UDPTouchDevice; -class UDPMotionDevice; +class Client; class State { public: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ccfed4f2e..db9332d00 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,6 +29,8 @@ add_library(video_core STATIC gpu_synch.h gpu_thread.cpp gpu_thread.h + guest_driver.cpp + guest_driver.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp @@ -154,6 +156,7 @@ if (ENABLE_VULKAN) renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h renderer_vulkan/renderer_vulkan.h + renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp renderer_vulkan/vk_blit_screen.h renderer_vulkan/vk_buffer_cache.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0510ed777..186aca61d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -101,7 +101,10 @@ public: void TickFrame() { ++epoch; while (!pending_destruction.empty()) { - if (pending_destruction.front()->GetEpoch() + 1 > epoch) { + // Delay at least 4 frames before destruction. + // This is due to triple buffering happening on some drivers. + static constexpr u64 epochs_to_destroy = 5; + if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { break; } pending_destruction.pop_front(); diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 44b8b8d22..d56a47710 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -106,6 +107,9 @@ public: virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const = 0; virtual u32 GetBoundBuffer() const = 0; + + virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; + virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 110406f2f..4b824aa4e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con return result; } +VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + +const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 4ef3e0613..eeb79c56f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -218,6 +218,10 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 58dfa8033..7cea146f0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b return result; } +VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + +const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ee79260fc..8808bbf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1306,6 +1306,10 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array<u32, 0x40000>; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ab5c8e622..81b6d9eff 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { Exch = 8, }; +enum class GlobalAtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + SafeAdd = 10, +}; + +enum class GlobalAtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32_FTZ_RN = 3, + F16x2_FTZ_RN = 4, + S64 = 5, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -958,6 +980,12 @@ union Instruction { } stg; union { + BitField<52, 4, GlobalAtomicOp> operation; + BitField<49, 3, GlobalAtomicType> type; + BitField<28, 20, s64> offset; + } atom; + + union { BitField<52, 4, AtomicOp> operation; BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; @@ -1096,6 +1124,11 @@ union Instruction { } fset; union { + BitField<47, 1, u64> ftz; + BitField<48, 4, PredCondition> cond; + } fcmp; + + union { BitField<49, 1, u64> bf; BitField<35, 3, PredCondition> cond; BitField<50, 1, u64> ftz; @@ -1691,6 +1724,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1772,6 +1806,7 @@ public: ICMP_R, ICMP_CR, ICMP_IMM, + FCMP_R, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -1995,6 +2030,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), @@ -2075,6 +2111,7 @@ private: INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), + INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp new file mode 100644 index 000000000..6adef459e --- /dev/null +++ b/src/video_core/guest_driver.cpp @@ -0,0 +1,36 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <limits> + +#include "video_core/guest_driver.h" + +namespace VideoCore { + +void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { + if (texture_handler_size_deduced) { + return; + } + const std::size_t size = bound_offsets.size(); + if (size < 2) { + return; + } + std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); + u32 min_val = std::numeric_limits<u32>::max(); + for (std::size_t i = 1; i < size; ++i) { + if (bound_offsets[i] == bound_offsets[i - 1]) { + continue; + } + const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; + min_val = std::min(min_val, new_min); + } + if (min_val > 2) { + return; + } + texture_handler_size_deduced = true; + texture_handler_size = min_texture_handler_size * min_val; +} + +} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h new file mode 100644 index 000000000..fc1917347 --- /dev/null +++ b/src/video_core/guest_driver.h @@ -0,0 +1,41 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace VideoCore { + +/** + * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect + * information necessary for impossible to avoid HLE methods like shader tracks as they are + * Entscheidungsproblems. + */ +class GuestDriverProfile { +public: + void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); + + u32 GetTextureHandlerSize() const { + return texture_handler_size; + } + + bool TextureHandlerSizeKnown() const { + return texture_handler_size_deduced; + } + +private: + // Minimum size of texture handler any driver can use. + static constexpr u32 min_texture_handler_size = 4; + // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily + // use 4 bytes instead. Thus, certain drivers may squish the size. + static constexpr u32 default_texture_handler_size = 8; + + u32 texture_handler_size = default_texture_handler_size; + bool texture_handler_size_deduced = false; +}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5b0eca9e2..c586cd6fe 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -78,5 +79,18 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, const DiskResourceLoadCallback& callback = {}) {} + + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. + GuestDriverProfile& AccessGuestDriverProfile() { + return guest_driver_profile; + } + + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. + const GuestDriverProfile& AccessGuestDriverProfile() const { + return guest_driver_profile; + } + +private: + GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c428f06e4..362942e09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -55,16 +55,20 @@ namespace { template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type) { + Tegra::Engines::ShaderType shader_type, + std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 offset = + entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(shader_type, entry.GetOffset()); + return engine.GetStageTexture(shader_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); + SetupTexture(binding++, texture, entry); + } + } } } @@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); + SetupTexture(binding++, texture, entry); + } + } } } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c5bdd377..489eb143c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s } void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + locker.SetBoundBuffer(usage.bound_buffer); for (const auto& key : usage.keys) { const auto [buffer, offset] = key.first; locker.InsertKey(buffer, offset, key.second); @@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), + return ShaderDiskCacheUsage{unique_identifier, variant, + locker.GetBoundBuffer(), locker.GetKeys(), locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2996aaf08..4735000b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -391,6 +391,7 @@ public: DeclareVertex(); DeclareGeometry(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareInternalFlags(); @@ -503,6 +504,16 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); + } + if (num_custom_variables > 0) { + code.AddNewLine(); + } + } + void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { @@ -655,7 +666,8 @@ private: u32 binding = device.GetBaseBindings(stage).sampler; for (const auto& sampler : ir.GetSamplers()) { const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding++); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + binding += sampler.IsIndexed() ? sampler.Size() : 1; std::string sampler_type = [&]() { if (sampler.IsBuffer()) { @@ -682,7 +694,11 @@ private: sampler_type += "Shadow"; } - code.AddLine("{} {} {};", description, sampler_type, name); + if (!sampler.IsIndexed()) { + code.AddLine("{} {} {};", description, sampler_type, name); + } else { + code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); + } } if (!ir.GetSamplers().empty()) { code.AddNewLine(); @@ -775,6 +791,11 @@ private: return {GetRegister(index), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {GetCustomVariable(index), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { @@ -1019,7 +1040,6 @@ private: } return {{"gl_ViewportIndex", Type::Int}}; case 3: - UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); return {{"gl_PointSize", Type::Float}}; } return {}; @@ -1099,7 +1119,11 @@ private: } else if (!meta->ptp.empty()) { expr += "Offsets"; } - expr += '(' + GetSampler(meta->sampler) + ", "; + if (!meta->sampler.IsIndexed()) { + expr += '(' + GetSampler(meta->sampler) + ", "; + } else { + expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; + } expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow && !separate_dc ? 1 : 0) - 1); expr += '('; @@ -1311,6 +1335,8 @@ private: const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), Type::Uint}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {GetCustomVariable(cv->GetIndex()), Type::Float}; } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1858,10 +1884,7 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { - ASSERT(stage == ShaderType::Compute); - auto& smem = std::get<SmemNode>(*operation[0]); - - return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; } @@ -2241,6 +2264,10 @@ private: return GetDeclarationWithSuffix(index, "gpr"); } + std::string GetCustomVariable(u32 index) const { + return GetDeclarationWithSuffix(index, "custom_var"); + } + std::string GetPredicate(Tegra::Shader::Pred pred) const { return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index cf874a09a..1fc204f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,7 +53,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 11; +constexpr u32 NativeVersion = 12; // Making sure sizes doesn't change by accident static_assert(sizeof(ProgramVariant) == 20); @@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 num_bound_samplers{}; u32 num_bindless_samplers{}; if (file.ReadArray(&usage.unique_identifier, 1) != 1 || - file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || + file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { LOG_ERROR(Render_OpenGL, error_loading); @@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; if (!LoadObjectFromPrecompiled(usage.unique_identifier) || - !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(usage.variant) || + !LoadObjectFromPrecompiled(usage.bound_buffer) || + !LoadObjectFromPrecompiled(num_keys) || !LoadObjectFromPrecompiled(num_bound_samplers) || !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; @@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(usage.bound_buffer) != 1 || file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { @@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p }; if (!SaveObjectToPrecompiled(usage.unique_identifier) || - !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 69a2fbdda..ef2371f6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>); struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + u32 bound_buffer{}; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e95eb069e..d4b81cd87 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) { return GL_NONE; } +GLenum GetComponent(PixelFormat format, bool is_first) { + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + case PixelFormat::S8Z24: + return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + default: + UNREACHABLE(); + return GL_DEPTH_COMPONENT; + } +} + void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { if (params.IsBuffer()) { return; @@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } @@ -416,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou if (new_swizzle == swizzle) return; swizzle = new_swizzle; - const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), - GetSwizzleSource(z_source), - GetSwizzleSource(w_source)}; + const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), + GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; const GLuint handle = GetTexture(); - glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + const PixelFormat format = surface.GetSurfaceParams().pixel_format; + switch (format) { + case PixelFormat::Z24S8: + case PixelFormat::Z32FS8: + case PixelFormat::S8Z24: + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, + GetComponent(format, x_source == SwizzleSource::R)); + break; + default: + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + break; + } } OGLTextureView CachedSurfaceView::CreateTextureView() const { @@ -529,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top), + static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom), + static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top), + static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom), + buffers, is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp new file mode 100644 index 000000000..d5032b432 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -0,0 +1,265 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <optional> +#include <vector> + +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/telemetry.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/frontend/emu_window.h" +#include "core/memory.h" +#include "core/perf_stats.h" +#include "core/settings.h" +#include "core/telemetry_session.h" +#include "video_core/gpu.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +namespace Vulkan { + +namespace { + +VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, + VkDebugUtilsMessageTypeFlagsEXT type, + const VkDebugUtilsMessengerCallbackDataEXT* data, + [[maybe_unused]] void* user_data) { + const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; + const char* message{data->pMessage}; + + if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { + LOG_CRITICAL(Render_Vulkan, "{}", message); + } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { + LOG_WARNING(Render_Vulkan, "{}", message); + } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) { + LOG_INFO(Render_Vulkan, "{}", message); + } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) { + LOG_DEBUG(Render_Vulkan, "{}", message); + } + return VK_FALSE; +} + +std::string GetReadableVersion(u32 version) { + return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), + VK_VERSION_PATCH(version)); +} + +std::string GetDriverVersion(const VKDevice& device) { + // Extracted from + // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 + const u32 version = device.GetDriverVersion(); + + if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { + const u32 major = (version >> 22) & 0x3ff; + const u32 minor = (version >> 14) & 0x0ff; + const u32 secondary = (version >> 6) & 0x0ff; + const u32 tertiary = version & 0x003f; + return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); + } + if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) { + const u32 major = version >> 14; + const u32 minor = version & 0x3fff; + return fmt::format("{}.{}", major, minor); + } + + return GetReadableVersion(version); +} + +std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { + std::sort(std::begin(available_extensions), std::end(available_extensions)); + + static constexpr std::size_t AverageExtensionSize = 64; + std::string separated_extensions; + separated_extensions.reserve(available_extensions.size() * AverageExtensionSize); + + const auto end = std::end(available_extensions); + for (auto extension = std::begin(available_extensions); extension != end; ++extension) { + if (const bool is_last = extension + 1 == end; is_last) { + separated_extensions += *extension; + } else { + separated_extensions += fmt::format("{},", *extension); + } + } + return separated_extensions; +} + +} // Anonymous namespace + +RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system) + : RendererBase(window), system{system} {} + +RendererVulkan::~RendererVulkan() { + ShutDown(); +} + +void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + const auto& layout = render_window.GetFramebufferLayout(); + if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) { + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; + if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { + swapchain->Create(layout.width, layout.height, is_srgb); + blit_screen->Recreate(); + } + + scheduler->WaitWorker(); + + swapchain->AcquireNextImage(); + const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated); + + scheduler->Flush(false, render_semaphore); + + if (swapchain->Present(render_semaphore, fence)) { + blit_screen->Recreate(); + } + + render_window.SwapBuffers(); + rasterizer->TickFrame(); + } + + render_window.PollEvents(); +} + +bool RendererVulkan::Init() { + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; + render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); + const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); + + std::optional<vk::DebugUtilsMessengerEXT> callback; + if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) { + callback = CreateDebugCallback(dldi); + if (!callback) { + return false; + } + } + + if (!PickDevices(dldi)) { + if (callback) { + instance.destroy(*callback, nullptr, dldi); + } + return false; + } + debug_callback = UniqueDebugUtilsMessengerEXT( + *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>( + instance, nullptr, device->GetDispatchLoader())); + + Report(); + + memory_manager = std::make_unique<VKMemoryManager>(*device); + + resource_manager = std::make_unique<VKResourceManager>(*device); + + const auto& framebuffer = render_window.GetFramebufferLayout(); + swapchain = std::make_unique<VKSwapchain>(surface, *device); + swapchain->Create(framebuffer.width, framebuffer.height, false); + + scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); + + rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, + *resource_manager, *memory_manager, *scheduler); + + blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, + *resource_manager, *memory_manager, *swapchain, + *scheduler, screen_info); + + return true; +} + +void RendererVulkan::ShutDown() { + if (!device) { + return; + } + const auto dev = device->GetLogical(); + const auto& dld = device->GetDispatchLoader(); + if (dev && dld.vkDeviceWaitIdle) { + dev.waitIdle(dld); + } + + rasterizer.reset(); + blit_screen.reset(); + scheduler.reset(); + swapchain.reset(); + memory_manager.reset(); + resource_manager.reset(); + device.reset(); +} + +std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( + const vk::DispatchLoaderDynamic& dldi) { + const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( + {}, + vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | + vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose, + vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | + vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | + vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, + &DebugCallback, nullptr); + vk::DebugUtilsMessengerEXT callback; + if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != + vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); + return {}; + } + return callback; +} + +bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { + const auto devices = instance.enumeratePhysicalDevices(dldi); + + // TODO(Rodrigo): Choose device from config file + const s32 device_index = Settings::values.vulkan_device; + if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { + LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); + return false; + } + const vk::PhysicalDevice physical_device = devices[device_index]; + + if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { + return false; + } + + device = std::make_unique<VKDevice>(dldi, physical_device, surface); + return device->Create(dldi, instance); +} + +void RendererVulkan::Report() const { + const std::string vendor_name{device->GetVendorName()}; + const std::string model_name{device->GetModelName()}; + const std::string driver_version = GetDriverVersion(*device); + const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); + + const std::string api_version = GetReadableVersion(device->GetApiVersion()); + + const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); + + LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); + LOG_INFO(Render_Vulkan, "Device: {}", model_name); + LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); + + auto& telemetry_session = system.TelemetrySession(); + constexpr auto field = Telemetry::FieldType::UserSystem; + telemetry_session.AddField(field, "GPU_Vendor", vendor_name); + telemetry_session.AddField(field, "GPU_Model", model_name); + telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); + telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version); + telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); +} + +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 939eebe83..9840f26e5 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -400,8 +400,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); - Test(extension, nv_device_diagnostic_checkpoints, - VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); + if (Settings::values.renderer_debug) { + Test(extension, nv_device_diagnostic_checkpoints, + VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true); + } } if (khr_shader_float16_int8) { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b53078721..24a658dce 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -353,6 +353,7 @@ private: DeclareFragment(); DeclareCompute(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareSharedMemory(); @@ -586,6 +587,15 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); + Name(id, fmt::format("custom_var_{}", i)); + custom_variables.emplace(i, AddGlobalVariable(id)); + } + } + void DeclarePredicates() { for (const auto pred : ir.GetPredicates()) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); @@ -982,6 +992,11 @@ private: return {OpLoad(t_float, registers.at(index)), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { return {Constant(t_uint, immediate->GetValue()), Type::Uint}; } @@ -1123,15 +1138,7 @@ private: } if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - - Id offset = OpISub(t_uint, real, base); - offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); - return {OpLoad(t_float, - OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), - Type::Float}; + return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; } if (const auto lmem = std::get_if<LmemNode>(&*node)) { @@ -1142,10 +1149,7 @@ private: } if (const auto smem = std::get_if<SmemNode>(&*node)) { - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); - return {OpLoad(t_uint, pointer), Type::Uint}; + return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; } if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { @@ -1339,20 +1343,13 @@ private: target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; + target = {GetSharedMemoryPointer(*smem), Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const Id real = AsUint(Visit(gmem->GetRealAddress())); - const Id base = AsUint(Visit(gmem->GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); - target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), - Type::Float}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {custom_variables.at(cv->GetIndex()), Type::Float}; } else { UNIMPLEMENTED(); @@ -1804,11 +1801,16 @@ private: return {}; } - Expression UAtomicAdd(Operation operation) { - const auto& smem = std::get<SmemNode>(*operation[0]); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); + Expression AtomicAdd(Operation operation) { + Id pointer; + if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { + pointer = GetSharedMemoryPointer(*smem); + } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { + pointer = GetGlobalMemoryPointer(*gmem); + } else { + UNREACHABLE(); + return {Constant(t_uint, 0), Type::Uint}; + } const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id semantics = Constant(t_uint, 0U); @@ -2243,6 +2245,22 @@ private: return {}; } + Id GetGlobalMemoryPointer(const GmemNode& gmem) { + const Id real = AsUint(Visit(gmem.GetRealAddress())); + const Id base = AsUint(Visit(gmem.GetBaseAddress())); + const Id diff = OpISub(t_uint, real, base); + const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); + const Id buffer = global_buffers.at(gmem.GetDescriptor()); + return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); + } + + Id GetSharedMemoryPointer(const SmemNode& smem) { + ASSERT(stage == ShaderType::Compute); + Id address = AsUint(Visit(smem.GetAddress())); + address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); + return OpAccessChain(t_smem_uint, shared_memory, address); + } + static constexpr std::array operation_decompilers = { &SPIRVDecompiler::Assign, @@ -2389,7 +2407,7 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::AtomicAdd, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, @@ -2485,9 +2503,9 @@ private: Id t_smem_uint{}; - const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); + const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); + Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); const Id t_gmem_struct = MemberDecorate( Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); @@ -2508,6 +2526,7 @@ private: Id out_vertex{}; Id in_vertex{}; std::map<u32, Id> registers; + std::map<u32, Id> custom_variables; std::map<Tegra::Shader::Pred, Id> predicates; std::map<u32, Id> flow_variables; Id local_memory{}; diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index a2f0044ba..cca13bcde 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -65,8 +65,8 @@ public: void DetachSegment(ASTNode start, ASTNode end); void Remove(ASTNode node); - ASTNode first{}; - ASTNode last{}; + ASTNode first; + ASTNode last; }; class ASTProgram { @@ -299,9 +299,9 @@ private: friend class ASTZipper; ASTData data; - ASTNode parent{}; - ASTNode next{}; - ASTNode previous{}; + ASTNode parent; + ASTNode next; + ASTNode previous; ASTZipper* manager{}; }; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle return value; } +std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes bindless_samplers.insert_or_assign({buffer, offset}, sampler); } +void ConstBufferLocker::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + bool ConstBufferLocker::IsConsistent() const { if (!engine) { return false; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -10,6 +10,7 @@ #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" namespace VideoCommon::Shader { @@ -40,6 +41,8 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + std::optional<u32> ObtainBoundBuffer(); + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -49,6 +52,9 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Set the bound buffer for this locker. + void SetBoundBuffer(u32 buffer); + /// Checks keys and samplers against engine's current const buffers. Returns true if they are /// the same value, false otherwise; bool IsConsistent() const; @@ -71,12 +77,27 @@ public: return bindless_samplers; } + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { + if (engine) { + return &engine->AccessGuestDriverProfile(); + } + return nullptr; + } + private: const Tegra::Engines::ShaderType stage; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cstring> +#include <limits> #include <set> #include <fmt/format.h> @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); + return; + } + if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + return; + } + u32 count{}; + std::vector<u32> bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + ++count; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + } +} + +std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, + VideoCore::GuestDriverProfile* gpu_driver, + const std::list<Sampler>& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return std::nullopt; + } + const u32 base_offset = sampler_to_deduce.GetOffset(); + u32 max_offset{std::numeric_limits<u32>::max()}; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + if (sampler.GetOffset() > base_offset) { + max_offset = std::min(sampler.GetOffset(), max_offset); + } + } + if (max_offset == std::numeric_limits<u32>::max()) { + return std::nullopt; + } + return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); +} + } // Anonymous namespace class ASTDecoder { @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } +void ShaderIR::PostDecode() { + // Deduce texture handler size if needed + auto gpu_driver = locker.AccessGuestDriverProfile(); + DeduceTextureHandlerSize(gpu_driver, used_samplers); + // Deduce Indexed Samplers + if (!uses_indexed_samplers) { + return; + } + for (auto& sampler : used_samplers) { + if (!sampler.IsIndexed()) { + continue; + } + if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { + sampler.SetSize(*size); + } else { + LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); + sampler.SetSize(1); + } + } +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index fcedd2af6..90240c765 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() -> Node { + Node op_b = [&] { if (instr.is_b_imm) { return GetImmediate19(instr); } else if (instr.is_b_gpr) { @@ -141,6 +141,15 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FCMP_R: { + UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); + Node op_c = GetRegister(instr.gpr39); + Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); + SetRegister( + bb, instr.gpr0, + Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); + break; + } case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 371fae127..e60875cc4 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod const Node one = Immediate(1); const Node two = Immediate(2); - Node value{}; + Node value; for (u32 i = 0; i < lop_iterations; ++i) { const Node shift_amount = Immediate(i); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,9 +19,12 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; +using Tegra::Shader::GlobalAtomicOp; +using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::StoreType; namespace { @@ -61,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { } } +Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), + Immediate(size)); +} + +Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { + Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), + std::move(offset), Immediate(size)); +} + +Node Sign16Extend(Node value) { + Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); + return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -136,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); [[fallthrough]]; case OpCode::Id::LD_S: { - const auto GetMemory = [&](s32 offset) { + const auto GetAddress = [&](s32 offset) { ASSERT(offset % 4 == 0); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - immediate_offset); - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) - : GetLocalMemory(address); + return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); + }; + const auto GetMemory = [&](s32 offset) { + return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) + : GetLocalMemory(GetAddress(offset)); }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: - case Tegra::Shader::StoreType::Bits64: - case Tegra::Shader::StoreType::Bits128: { - const u32 count = [&]() { + case StoreType::Signed16: + SetRegister(bb, instr.gpr0, + Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); + break; + case StoreType::Bits32: + case StoreType::Bits64: + case StoreType::Bits128: { + const u32 count = [&] { switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: return 1; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: return 2; - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: return 4; default: UNREACHABLE(); @@ -212,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { // To handle unaligned loads get the bytes used to dereference global memory and extract // those bytes from the loaded u32. if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), - std::move(offset), Immediate(size)); + gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); } SetTemporary(bb, i, gmem); @@ -269,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); }; - const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L - ? &ShaderIR::SetLocalMemory - : &ShaderIR::SetSharedMemory; + const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; + const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; + const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bits128: + case StoreType::Bits128: (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits64: + case StoreType::Bits64: (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); [[fallthrough]]; - case Tegra::Shader::StoreType::Bits32: + case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Signed16: { + Node address = GetAddress(0); + Node memory = (this->*get_memory)(address); + (this->*set_memory)( + bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); + break; + } default: UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), static_cast<u32>(instr.ldst_sl.type.Value())); @@ -323,18 +354,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node value = GetRegister(instr.gpr0.Value() + i); if (IsUnaligned(type)) { - Node mask = Immediate(GetUnalignedMask(type)); - Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); - - value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, - Immediate(size)); + const u32 mask = GetUnalignedMask(type); + value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::ATOM: { + UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", + static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", + static_cast<int>(instr.atom.type.Value())); + + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::ATOMS: { UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", static_cast<int>(instr.atoms.operation.Value())); @@ -348,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { Node memory = GetSharedMemory(std::move(address)); Node data = GetRegister(instr.gpr20); - Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 7321698b2..4944e9d69 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -69,13 +69,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case OpCode::Id::MOV_SYS: { const Node value = [this, instr] { switch (instr.sys20) { + case SystemVariable::LaneId: + LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); + return Immediate(0U); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); - return Immediate(0u); + return Immediate(0U); case SystemVariable::Tid: { Node value = Immediate(0); value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9); @@ -188,7 +191,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", static_cast<u32>(cc)); - if (disable_flow_stack) { + if (decompiled) { break; } @@ -200,7 +203,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", static_cast<u32>(cc)); - if (disable_flow_stack) { + if (decompiled) { break; } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..351c8c2f1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, + {}, {}, component, element, {}}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) + is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { @@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, + {}, {}, {}, element, index_var}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, - info.is_buffer); + info.is_buffer, false); } -const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, buffer, offset] = - TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_sampler != nullptr); - if (base_sampler == nullptr) { + const auto [base_node, tracked_sampler_info] = + TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); + ASSERT(base_node != nullptr); + if (base_node == nullptr) { return nullptr; } - const auto info = GetSamplerInfo(sampler_info, offset, buffer); + if (const auto bindless_sampler_info = + std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { + const u32 buffer = bindless_sampler_info->GetIndex(); + const u32 offset = bindless_sampler_info->GetOffset(); + const auto info = GetSamplerInfo(sampler_info, offset, buffer); + + // If this sampler has already been used, return the existing mapping. + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); + return &*it; + } - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { - return entry.GetBuffer() == buffer && entry.GetOffset() == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && - it->IsShadow() == info.is_shadow); - return &*it; - } + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer, false); + } else if (const auto array_sampler_info = + std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { + const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; + index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); + const auto info = GetSamplerInfo(sampler_info, base_offset); + + // If this sampler has already been used, return the existing mapping. + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), + [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && + it->IsBuffer() == info.is_buffer && it->IsIndexed()); + return &*it; + } - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + uses_indexed_samplers = true; + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast<u32>(used_samplers.size()); + return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, + info.is_shadow, info.is_buffer, true); + } + return nullptr; } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, "This method is not supported."); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - const Sampler* sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) + : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, + lod, {}, element, index_var}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -596,7 +630,7 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); } - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -632,7 +666,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; - Node dc{}; + Node dc; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used @@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); const SamplerInfo info{texture_type, is_array, depth_compare, false}; - const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { @@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, + index_var}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..a0a7b9111 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,7 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - UAtomicAdd, /// (smem, uint) -> uint + AtomicAdd, /// (memory, {u}int) -> {u}int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -212,6 +212,7 @@ enum class MetaStackClass { class OperationNode; class ConditionalNode; class GprNode; +class CustomVarNode; class ImmediateNode; class InternalFlagNode; class PredicateNode; @@ -223,26 +224,32 @@ class SmemNode; class GmemNode; class CommentNode; -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, +using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; +class BindlessSamplerNode; +class ArraySamplerNode; + +using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSampler = std::shared_ptr<TrackSamplerData>; + class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_buffer{is_buffer} {} + is_buffer{is_buffer}, is_indexed{is_indexed} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} constexpr u32 GetIndex() const { return index; @@ -276,16 +283,72 @@ public: return is_bindless; } + constexpr bool IsIndexed() const { + return is_indexed; + } + + constexpr u32 Size() const { + return size; + } + + constexpr void SetSize(u32 new_size) { + size = new_size; + } + private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + u32 size{}; ///< Size of the sampler if indexed. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class ArraySamplerNode final { +public: + explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) + : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetBaseOffset() const { + return base_offset; + } + + constexpr u32 GetIndexVar() const { + return bindless_var; + } + +private: + u32 index; + u32 base_offset; + u32 bindless_var; +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class BindlessSamplerNode final { +public: + explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { + return offset; + } + +private: + u32 index; + u32 offset; }; class Image final { @@ -380,8 +443,9 @@ struct MetaTexture { std::vector<Node> derivates; Node bias; Node lod; - Node component{}; + Node component; u32 element{}; + Node index; }; struct MetaImage { @@ -488,6 +552,19 @@ private: Tegra::Shader::Register index{}; }; +/// A custom variable +class CustomVarNode final { +public: + explicit constexpr CustomVarNode(u32 index) : index{index} {} + + constexpr u32 GetIndex() const { + return index; + } + +private: + u32 index{}; +}; + /// A 32-bits value that represents an immediate value class ImmediateNode final { public: diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); } +template <typename T, typename... Args> +TrackSampler MakeTrackSampler(Args&&... args) { + static_assert(std::is_convertible_v<T, TrackSamplerData>); + return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); +} + template <typename... Args> Node Operation(OperationCode code, Args&&... args) { if constexpr (sizeof...(args) == 0) { diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); + PostDecode(); } ShaderIR::~ShaderIR() = default; @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { return MakeNode<GprNode>(reg); } +Node ShaderIR::GetCustomVariable(u32 id) { + return MakeNode<CustomVarNode>(id); +} + Node ShaderIR::GetImmediate19(Instruction instr) { return Immediate(instr.alu.GetImm20_19()); } @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { return id; } +u32 ShaderIR::NewCustomVariable() { + return num_custom_variables++; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,6 +180,10 @@ public: return amend_code[index]; } + u32 GetNumCustomVariables() const { + return num_custom_variables; + } + private: friend class ASTDecoder; @@ -191,6 +195,7 @@ private: }; void Decode(); + void PostDecode(); NodeBlock DecodeRange(u32 begin, u32 end); void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); @@ -235,6 +240,8 @@ private: /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); + /// Generates a node for a custom variable + Node GetCustomVariable(u32 id); /// Generates a node representing a 19-bit immediate value Node GetImmediate19(Tegra::Shader::Instruction instr); /// Generates a node representing a 32-bit immediate value @@ -321,7 +328,7 @@ private: std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses a texture sampler for a bindless texture. - const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses an image. @@ -387,6 +394,9 @@ private: std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, @@ -399,6 +409,8 @@ private: /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); + u32 NewCustomVariable(); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -414,6 +426,7 @@ private: NodeBlock global_code; ASTManager program_manager{true, true}; std::vector<Node> amend_code; + u32 num_custom_variables{}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; @@ -431,6 +444,7 @@ private: bool uses_instance_id{}; bool uses_vertex_id{}; bool uses_warps{}; + bool uses_indexed_samplers{}; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..face8c943 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "video_core/shader/node.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } return {}; } + +std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { + if (operation.GetCode() != OperationCode::UAdd) { + return std::nullopt; + } + Node gpr; + Node offset; + ASSERT(operation.GetOperandsCount() == 2); + for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { + Node operand = operation[i]; + if (std::holds_alternative<ImmediateNode>(*operand)) { + offset = operation[i]; + } else if (std::holds_alternative<GprNode>(*operand)) { + gpr = operation[i]; + } + } + if (offset && gpr) { + return std::make_pair(gpr, offset); + } + return std::nullopt; +} + +bool AmendNodeCv(std::size_t amend_index, Node node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { + operation->SetAmendIndex(amend_index); + return true; + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + conditional->SetAmendIndex(amend_index); + return true; + } + return false; +} + } // Anonymous namespace +std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { + // Constant buffer found, test if it's an immediate + const auto offset = cbuf->GetOffset(); + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + auto track = + MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); + return {tracked, track}; + } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + auto bound_buffer = locker.ObtainBoundBuffer(); + if (!bound_buffer) { + return {}; + } + if (*bound_buffer != cbuf->GetIndex()) { + return {}; + } + auto pair = DecoupleIndirectRead(*operation); + if (!pair) { + return {}; + } + auto [gpr, base_offset] = *pair; + const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); + auto gpu_driver = locker.AccessGuestDriverProfile(); + if (gpu_driver == nullptr) { + return {}; + } + const u32 bindless_cv = NewCustomVariable(); + const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, + Immediate(gpu_driver->GetTextureHandlerSize())); + + const Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); + const std::size_t amend_index = DeclareAmend(amend_op); + AmendNodeCv(amend_index, code[cursor]); + // TODO Implement Bindless Index custom variable + auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), + offset_inm->GetValue(), bindless_cv); + return {tracked, track}; + } + return {}; + } + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { + if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { + return {}; + } + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); + if (!source) { + return {}; + } + return TrackBindlessSampler(source, code, new_cursor); + } + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); + std::get<0>(found)) { + // Cbuf found in operand. + return found; + } + } + return {}; + } + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { + const auto& conditional_code = conditional->GetCode(); + return TrackBindlessSampler(tracked, conditional_code, + static_cast<s64>(conditional_code.size())); + } + return {}; +} + std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 829268b4c..84469b7ba 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -135,7 +135,7 @@ std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& i for (u32 level = 0; level < mipmaps; level++) { const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); + result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); } } return result; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 8e947394c..a5f81a8a0 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -3,19 +3,32 @@ // Refer to the license.txt file included. #include <memory> +#include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" #include "video_core/gpu_asynch.h" #include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#ifdef HAS_VULKAN +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#endif #include "video_core/video_core.h" namespace VideoCore { std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); +#ifdef HAS_VULKAN + case Settings::RendererBackend::Vulkan: + return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); +#endif + default: + return nullptr; + } } std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { diff --git a/src/web_service/telemetry_json.cpp b/src/web_service/telemetry_json.cpp index 9156ce802..7538389bf 100644 --- a/src/web_service/telemetry_json.cpp +++ b/src/web_service/telemetry_json.cpp @@ -117,6 +117,7 @@ bool TelemetryJson::SubmitTestcase() { impl->SerializeSection(Telemetry::FieldType::Session, "Session"); impl->SerializeSection(Telemetry::FieldType::UserFeedback, "UserFeedback"); impl->SerializeSection(Telemetry::FieldType::UserSystem, "UserSystem"); + impl->SerializeSection(Telemetry::FieldType::UserConfig, "UserConfig"); auto content = impl->TopSection().dump(); Client client(impl->host, impl->username, impl->token); diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index a3fb91d29..b841e63fa 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -200,3 +200,8 @@ if (MSVC) copy_yuzu_SDL_deps(yuzu) copy_yuzu_unicorn_deps(yuzu) endif() + +if (ENABLE_VULKAN) + target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) + target_compile_definitions(yuzu PRIVATE HAS_VULKAN) +endif() diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 7490fb718..55a37fffa 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -2,19 +2,30 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <glad/glad.h> + #include <QApplication> #include <QHBoxLayout> #include <QKeyEvent> +#include <QMessageBox> #include <QOffscreenSurface> #include <QOpenGLWindow> #include <QPainter> #include <QScreen> +#include <QStringList> #include <QWindow> +#ifdef HAS_VULKAN +#include <QVulkanWindow> +#endif + #include <fmt/format.h> + +#include "common/assert.h" #include "common/microprofile.h" #include "common/scm_rev.h" #include "core/core.h" #include "core/frontend/framebuffer_layout.h" +#include "core/frontend/scope_acquire_window_context.h" #include "core/settings.h" #include "input_common/keyboard.h" #include "input_common/main.h" @@ -114,19 +125,10 @@ private: QOpenGLContext context; }; -// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL -// context. -// The corresponding functionality is handled in EmuThread instead -class GGLWidgetInternal : public QOpenGLWindow { +class GWidgetInternal : public QWindow { public: - GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context) - : QOpenGLWindow(shared_context), parent(parent) {} - - void paintEvent(QPaintEvent* ev) override { - if (do_painting) { - QPainter painter(this); - } - } + GWidgetInternal(GRenderWindow* parent) : parent(parent) {} + virtual ~GWidgetInternal() = default; void resizeEvent(QResizeEvent* ev) override { parent->OnClientAreaResized(ev->size().width(), ev->size().height()); @@ -182,11 +184,47 @@ public: do_painting = true; } + std::pair<unsigned, unsigned> GetSize() const { + return std::make_pair(width(), height()); + } + +protected: + bool IsPaintingEnabled() const { + return do_painting; + } + private: GRenderWindow* parent; - bool do_painting; + bool do_painting = false; +}; + +// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL +// context. +// The corresponding functionality is handled in EmuThread instead +class GGLWidgetInternal final : public GWidgetInternal, public QOpenGLWindow { +public: + GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context) + : GWidgetInternal(parent), QOpenGLWindow(shared_context) {} + ~GGLWidgetInternal() override = default; + + void paintEvent(QPaintEvent* ev) override { + if (IsPaintingEnabled()) { + QPainter painter(this); + } + } }; +#ifdef HAS_VULKAN +class GVKWidgetInternal final : public GWidgetInternal { +public: + GVKWidgetInternal(GRenderWindow* parent, QVulkanInstance* instance) : GWidgetInternal(parent) { + setSurfaceType(QSurface::SurfaceType::VulkanSurface); + setVulkanInstance(instance); + } + ~GVKWidgetInternal() override = default; +}; +#endif + GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) : QWidget(parent), emu_thread(emu_thread) { setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") @@ -201,9 +239,15 @@ GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) GRenderWindow::~GRenderWindow() { InputCommon::Shutdown(); + + // Avoid an unordered destruction that generates a segfault + delete child; } void GRenderWindow::moveContext() { + if (!context) { + return; + } DoneCurrent(); // If the thread started running, move the GL Context to the new thread. Otherwise, move it @@ -215,8 +259,9 @@ void GRenderWindow::moveContext() { } void GRenderWindow::SwapBuffers() { - context->swapBuffers(child); - + if (context) { + context->swapBuffers(child); + } if (!first_frame) { first_frame = true; emit FirstFrameDisplayed(); @@ -224,15 +269,38 @@ void GRenderWindow::SwapBuffers() { } void GRenderWindow::MakeCurrent() { - context->makeCurrent(child); + if (context) { + context->makeCurrent(child); + } } void GRenderWindow::DoneCurrent() { - context->doneCurrent(); + if (context) { + context->doneCurrent(); + } } void GRenderWindow::PollEvents() {} +bool GRenderWindow::IsShown() const { + return !isMinimized(); +} + +void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const { +#ifdef HAS_VULKAN + const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); + const VkInstance instance_copy = vk_instance->vkInstance(); + const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child); + + std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); + std::memcpy(instance, &instance_copy, sizeof(instance_copy)); + std::memcpy(surface, &surface_copy, sizeof(surface_copy)); +#else + UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan"); +#endif +} + // On Qt 5.0+, this correctly gets the size of the framebuffer (pixels). // // Older versions get the window size (density independent pixels), @@ -241,10 +309,9 @@ void GRenderWindow::PollEvents() {} void GRenderWindow::OnFramebufferSizeChanged() { // Screen changes potentially incur a change in screen DPI, hence we should update the // framebuffer size - const qreal pixel_ratio = GetWindowPixelRatio(); - const u32 width = child->QPaintDevice::width() * pixel_ratio; - const u32 height = child->QPaintDevice::height() * pixel_ratio; - UpdateCurrentFramebufferLayout(width, height); + const qreal pixelRatio{GetWindowPixelRatio()}; + const auto size{child->GetSize()}; + UpdateCurrentFramebufferLayout(size.first * pixelRatio, size.second * pixelRatio); } void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) { @@ -290,7 +357,7 @@ qreal GRenderWindow::GetWindowPixelRatio() const { } std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const { - const qreal pixel_ratio = GetWindowPixelRatio(); + const qreal pixel_ratio{GetWindowPixelRatio()}; return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; } @@ -356,50 +423,46 @@ std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedCont return std::make_unique<GGLContext>(context.get()); } -void GRenderWindow::InitRenderTarget() { +bool GRenderWindow::InitRenderTarget() { shared_context.reset(); context.reset(); - - delete child; - child = nullptr; - - delete container; - container = nullptr; - - delete layout(); + if (child) { + delete child; + } + if (container) { + delete container; + } + if (layout()) { + delete layout(); + } first_frame = false; - // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, - // WA_DontShowOnScreen, WA_DeleteOnClose - QSurfaceFormat fmt; - fmt.setVersion(4, 3); - fmt.setProfile(QSurfaceFormat::CompatibilityProfile); - fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); - // TODO: expose a setting for buffer value (ie default/single/double/triple) - fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); - shared_context = std::make_unique<QOpenGLContext>(); - shared_context->setFormat(fmt); - shared_context->create(); - context = std::make_unique<QOpenGLContext>(); - context->setShareContext(shared_context.get()); - context->setFormat(fmt); - context->create(); - fmt.setSwapInterval(0); + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + if (!InitializeOpenGL()) { + return false; + } + break; + case Settings::RendererBackend::Vulkan: + if (!InitializeVulkan()) { + return false; + } + break; + } - child = new GGLWidgetInternal(this, shared_context.get()); container = QWidget::createWindowContainer(child, this); - QBoxLayout* layout = new QHBoxLayout(this); + layout->addWidget(container); layout->setMargin(0); setLayout(layout); - // Reset minimum size to avoid unwanted resizes when this function is called for a second time. + // Reset minimum required size to avoid resizing issues on the main window after restarting. setMinimumSize(1, 1); - // Show causes the window to actually be created and the OpenGL context as well, but we don't - // want the widget to be shown yet, so immediately hide it. + // Show causes the window to actually be created and the gl context as well, but we don't want + // the widget to be shown yet, so immediately hide it. show(); hide(); @@ -410,9 +473,17 @@ void GRenderWindow::InitRenderTarget() { OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); OnFramebufferSizeChanged(); - NotifyClientAreaSizeChanged(std::pair<unsigned, unsigned>(child->width(), child->height())); + NotifyClientAreaSizeChanged(child->GetSize()); BackupGeometry(); + + if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { + if (!LoadOpenGL()) { + return false; + } + } + + return true; } void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { @@ -441,6 +512,113 @@ void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal setMinimumSize(minimal_size.first, minimal_size.second); } +bool GRenderWindow::InitializeOpenGL() { + // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, + // WA_DontShowOnScreen, WA_DeleteOnClose + QSurfaceFormat fmt; + fmt.setVersion(4, 3); + fmt.setProfile(QSurfaceFormat::CompatibilityProfile); + fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); + // TODO: expose a setting for buffer value (ie default/single/double/triple) + fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); + shared_context = std::make_unique<QOpenGLContext>(); + shared_context->setFormat(fmt); + shared_context->create(); + context = std::make_unique<QOpenGLContext>(); + context->setShareContext(shared_context.get()); + context->setFormat(fmt); + context->create(); + fmt.setSwapInterval(false); + + child = new GGLWidgetInternal(this, shared_context.get()); + return true; +} + +bool GRenderWindow::InitializeVulkan() { +#ifdef HAS_VULKAN + vk_instance = std::make_unique<QVulkanInstance>(); + vk_instance->setApiVersion(QVersionNumber(1, 1, 0)); + vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect); + if (Settings::values.renderer_debug) { + const auto supported_layers{vk_instance->supportedLayers()}; + const bool found = + std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) { + constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation"; + return layer.name == searched_layer; + }); + if (found) { + vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation"); + vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + } + if (!vk_instance->create()) { + QMessageBox::critical( + this, tr("Error while initializing Vulkan 1.1!"), + tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the " + "latest graphics drivers.")); + return false; + } + + child = new GVKWidgetInternal(this, vk_instance.get()); + return true; +#else + QMessageBox::critical(this, tr("Vulkan not available!"), + tr("yuzu has not been compiled with Vulkan support.")); + return false; +#endif +} + +bool GRenderWindow::LoadOpenGL() { + Core::Frontend::ScopeAcquireWindowContext acquire_context{*this}; + if (!gladLoadGL()) { + QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"), + tr("Your GPU may not support OpenGL 4.3, or you do not have the " + "latest graphics driver.")); + return false; + } + + QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions(); + if (!unsupported_gl_extensions.empty()) { + QMessageBox::critical( + this, tr("Error while initializing OpenGL!"), + tr("Your GPU may not support one or more required OpenGL extensions. Please ensure you " + "have the latest graphics driver.<br><br>Unsupported extensions:<br>") + + unsupported_gl_extensions.join(QStringLiteral("<br>"))); + return false; + } + return true; +} + +QStringList GRenderWindow::GetUnsupportedGLExtensions() const { + QStringList unsupported_ext; + + if (!GLAD_GL_ARB_buffer_storage) + unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); + if (!GLAD_GL_ARB_direct_state_access) + unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); + if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) + unsupported_ext.append(QStringLiteral("ARB_vertex_type_10f_11f_11f_rev")); + if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) + unsupported_ext.append(QStringLiteral("ARB_texture_mirror_clamp_to_edge")); + if (!GLAD_GL_ARB_multi_bind) + unsupported_ext.append(QStringLiteral("ARB_multi_bind")); + if (!GLAD_GL_ARB_clip_control) + unsupported_ext.append(QStringLiteral("ARB_clip_control")); + + // Extensions required to support some texture formats. + if (!GLAD_GL_EXT_texture_compression_s3tc) + unsupported_ext.append(QStringLiteral("EXT_texture_compression_s3tc")); + if (!GLAD_GL_ARB_texture_compression_rgtc) + unsupported_ext.append(QStringLiteral("ARB_texture_compression_rgtc")); + if (!GLAD_GL_ARB_depth_buffer_float) + unsupported_ext.append(QStringLiteral("ARB_depth_buffer_float")); + + for (const QString& ext : unsupported_ext) + LOG_CRITICAL(Frontend, "Unsupported GL extension: {}", ext.toStdString()); + + return unsupported_ext; +} + void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) { this->emu_thread = emu_thread; child->DisablePainting(); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 2fc64895f..71a2fa321 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -7,17 +7,28 @@ #include <atomic> #include <condition_variable> #include <mutex> + #include <QImage> #include <QThread> #include <QWidget> + +#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" class QKeyEvent; class QScreen; class QTouchEvent; +class QStringList; +class QSurface; +class QOpenGLContext; +#ifdef HAS_VULKAN +class QVulkanInstance; +#endif +class GWidgetInternal; class GGLWidgetInternal; +class GVKWidgetInternal; class GMainWindow; class GRenderWindow; class QSurface; @@ -123,6 +134,9 @@ public: void MakeCurrent() override; void DoneCurrent() override; void PollEvents() override; + bool IsShown() const override; + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; void ForwardKeyPressEvent(QKeyEvent* event); @@ -142,7 +156,7 @@ public: void OnClientAreaResized(u32 width, u32 height); - void InitRenderTarget(); + bool InitRenderTarget(); void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); @@ -165,10 +179,13 @@ private: void OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal_size) override; - QWidget* container = nullptr; - GGLWidgetInternal* child = nullptr; + bool InitializeOpenGL(); + bool InitializeVulkan(); + bool LoadOpenGL(); + QStringList GetUnsupportedGLExtensions() const; - QByteArray geometry; + QWidget* container = nullptr; + GWidgetInternal* child = nullptr; EmuThread* emu_thread; // Context that backs the GGLWidgetInternal (and will be used by core to render) @@ -177,9 +194,14 @@ private: // current std::unique_ptr<QOpenGLContext> shared_context; +#ifdef HAS_VULKAN + std::unique_ptr<QVulkanInstance> vk_instance; +#endif + /// Temporary storage of the screenshot taken QImage screenshot_image; + QByteArray geometry; bool first_frame = false; protected: diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 59918847a..280d81ba9 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -624,6 +624,10 @@ void Config::ReadPathValues() { void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); + Settings::values.renderer_backend = + static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); + Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); + Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); Settings::values.resolution_factor = ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); Settings::values.use_frame_limit = @@ -1056,6 +1060,9 @@ void Config::SavePathValues() { void Config::SaveRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); + WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); + WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); + WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); WriteSetting(QStringLiteral("resolution_factor"), static_cast<double>(Settings::values.resolution_factor), 1.0); WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 90c1f9459..9631059c7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -36,6 +36,8 @@ void ConfigureDebug::SetConfiguration() { ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); ui->reporting_services->setChecked(Settings::values.reporting_services); ui->quest_flag->setChecked(Settings::values.quest_flag); + ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); } void ConfigureDebug::ApplyConfiguration() { @@ -46,6 +48,7 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); Settings::values.reporting_services = ui->reporting_services->isChecked(); Settings::values.quest_flag = ui->quest_flag->isChecked(); + Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Debugger::ToggleConsole(); Log::Filter filter; filter.ParseFilterString(Settings::values.log_filter); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index ce49569bb..e028c4c80 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -7,7 +7,7 @@ <x>0</x> <y>0</y> <width>400</width> - <height>474</height> + <height>467</height> </rect> </property> <property name="windowTitle"> @@ -103,6 +103,80 @@ </item> </layout> </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_3"> + <property name="title"> + <string>Homebrew</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_5"> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_4"> + <item> + <widget class="QLabel" name="label_3"> + <property name="text"> + <string>Arguments String</string> + </property> + </widget> + </item> + <item> + <widget class="QLineEdit" name="homebrew_args_edit"/> + </item> + </layout> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_4"> + <property name="title"> + <string>Graphics</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_6"> + <item> + <widget class="QCheckBox" name="enable_graphics_debugging"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="whatsThis"> + <string>When checked, the graphics API enters in a slower debugging mode</string> + </property> + <property name="text"> + <string>Enable Graphics Debugging</string> + </property> + </widget> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="groupBox_5"> + <property name="title"> + <string>Dump</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_6"> + <item> + <widget class="QCheckBox" name="dump_decompressed_nso"> + <property name="whatsThis"> + <string>When checked, any NSO yuzu tries to load or patch will be copied decompressed to the yuzu/dump directory.</string> + </property> + <property name="text"> + <string>Dump Decompressed NSOs</string> + </property> + </widget> + </item> + <item> + <widget class="QCheckBox" name="dump_exefs"> + <property name="whatsThis"> + <string>When checked, any game that yuzu loads will have its ExeFS dumped to the yuzu/dump directory.</string> + </property> + <property name="text"> + <string>Dump ExeFS</string> + </property> + </widget> + </item> <item> <widget class="QCheckBox" name="reporting_services"> <property name="text"> @@ -129,11 +203,11 @@ </widget> </item> <item> - <widget class="QGroupBox" name="groupBox_5"> + <widget class="QGroupBox" name="groupBox_6"> <property name="title"> <string>Advanced</string> </property> - <layout class="QVBoxLayout" name="verticalLayout"> + <layout class="QVBoxLayout" name="verticalLayout_7"> <item> <widget class="QCheckBox" name="quest_flag"> <property name="text"> @@ -145,29 +219,6 @@ </widget> </item> <item> - <widget class="QGroupBox" name="groupBox_3"> - <property name="title"> - <string>Homebrew</string> - </property> - <layout class="QVBoxLayout" name="verticalLayout_5"> - <item> - <layout class="QHBoxLayout" name="horizontalLayout_4"> - <item> - <widget class="QLabel" name="label_3"> - <property name="text"> - <string>Arguments String</string> - </property> - </widget> - </item> - <item> - <widget class="QLineEdit" name="homebrew_args_edit"/> - </item> - </layout> - </item> - </layout> - </widget> - </item> - <item> <spacer name="verticalSpacer"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -185,6 +236,19 @@ </item> </layout> </widget> + <tabstops> + <tabstop>toggle_gdbstub</tabstop> + <tabstop>gdbport_spinbox</tabstop> + <tabstop>log_filter_edit</tabstop> + <tabstop>toggle_console</tabstop> + <tabstop>open_log_button</tabstop> + <tabstop>homebrew_args_edit</tabstop> + <tabstop>enable_graphics_debugging</tabstop> + <tabstop>dump_decompressed_nso</tabstop> + <tabstop>dump_exefs</tabstop> + <tabstop>reporting_services</tabstop> + <tabstop>quest_flag</tabstop> + </tabstops> <resources/> <connections> <connection> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 2c9e322c9..f57a24e36 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -3,6 +3,13 @@ // Refer to the license.txt file included. #include <QColorDialog> +#include <QComboBox> +#ifdef HAS_VULKAN +#include <QVulkanInstance> +#endif + +#include "common/common_types.h" +#include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" #include "ui_configure_graphics.h" @@ -51,10 +58,18 @@ Resolution FromResolutionFactor(float factor) { ConfigureGraphics::ConfigureGraphics(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureGraphics) { + vulkan_device = Settings::values.vulkan_device; + RetrieveVulkanDevices(); + ui->setupUi(this); SetConfiguration(); + connect(ui->api, static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this, + [this] { UpdateDeviceComboBox(); }); + connect(ui->device, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this, + [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); if (!new_bg_color.isValid()) { @@ -64,11 +79,22 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) }); } +void ConfigureGraphics::UpdateDeviceSelection(int device) { + if (device == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + vulkan_device = device; + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + ui->api->setEnabled(runtime_lock); + ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); ui->use_disk_shader_cache->setEnabled(runtime_lock); @@ -80,9 +106,12 @@ void ConfigureGraphics::SetConfiguration() { ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue)); + UpdateDeviceComboBox(); } void ConfigureGraphics::ApplyConfiguration() { + Settings::values.renderer_backend = GetCurrentGraphicsBackend(); + Settings::values.vulkan_device = vulkan_device; Settings::values.resolution_factor = ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); @@ -116,3 +145,68 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { const QIcon color_icon(pixmap); ui->bg_button->setIcon(color_icon); } + +void ConfigureGraphics::UpdateDeviceComboBox() { + ui->device->clear(); + + bool enabled = false; + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + ui->device->addItem(tr("OpenGL Graphics Device")); + enabled = false; + break; + case Settings::RendererBackend::Vulkan: + for (const auto device : vulkan_devices) { + ui->device->addItem(device); + } + ui->device->setCurrentIndex(vulkan_device); + enabled = !vulkan_devices.empty(); + break; + } + ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); +} + +void ConfigureGraphics::RetrieveVulkanDevices() { +#ifdef HAS_VULKAN + QVulkanInstance instance; + instance.setApiVersion(QVersionNumber(1, 1, 0)); + if (!instance.create()) { + LOG_INFO(Frontend, "Vulkan 1.1 not available"); + return; + } + const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( + instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))}; + if (vkEnumeratePhysicalDevices == nullptr) { + LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices"); + return; + } + u32 physical_device_count; + if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) != + VK_SUCCESS) { + LOG_INFO(Frontend, "Failed to get physical devices count"); + return; + } + std::vector<VkPhysicalDevice> physical_devices(physical_device_count); + if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, + physical_devices.data()) != VK_SUCCESS) { + LOG_INFO(Frontend, "Failed to get physical devices"); + return; + } + + const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>( + instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))}; + if (vkGetPhysicalDeviceProperties == nullptr) { + LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties"); + return; + } + for (const auto physical_device : physical_devices) { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(physical_device, &properties); + vulkan_devices.push_back(QString::fromUtf8(properties.deviceName)); + } +#endif +} + +Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { + return static_cast<Settings::RendererBackend>(ui->api->currentIndex()); +} diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index fae28d98e..7e0596d9c 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -5,7 +5,10 @@ #pragma once #include <memory> +#include <vector> +#include <QString> #include <QWidget> +#include "core/settings.h" namespace Ui { class ConfigureGraphics; @@ -27,7 +30,16 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); + void UpdateDeviceComboBox(); + void UpdateDeviceSelection(int device); + + void RetrieveVulkanDevices(); + + Settings::RendererBackend GetCurrentGraphicsBackend() const; std::unique_ptr<Ui::ConfigureGraphics> ui; QColor bg_color; + + std::vector<QString> vulkan_devices; + u32 vulkan_device{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 0309ee300..e24372204 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -7,21 +7,69 @@ <x>0</x> <y>0</y> <width>400</width> - <height>300</height> + <height>321</height> </rect> </property> <property name="windowTitle"> <string>Form</string> </property> - <layout class="QVBoxLayout" name="verticalLayout"> + <layout class="QVBoxLayout" name="verticalLayout_1"> <item> - <layout class="QVBoxLayout" name="verticalLayout_3"> + <layout class="QVBoxLayout" name="verticalLayout_2"> + <item> + <widget class="QGroupBox" name="groupBox_2"> + <property name="title"> + <string>API Settings</string> + </property> + <layout class="QVBoxLayout" name="verticalLayout_3"> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_4"> + <item> + <widget class="QLabel" name="label_2"> + <property name="text"> + <string>API:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="api"> + <item> + <property name="text"> + <string notr="true">OpenGL</string> + </property> + </item> + <item> + <property name="text"> + <string notr="true">Vulkan</string> + </property> + </item> + </widget> + </item> + </layout> + </item> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_5"> + <item> + <widget class="QLabel" name="label_3"> + <property name="text"> + <string>Device:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="device"/> + </item> + </layout> + </item> + </layout> + </widget> + </item> <item> <widget class="QGroupBox" name="groupBox"> <property name="title"> - <string>Graphics</string> + <string>Graphics Settings</string> </property> - <layout class="QVBoxLayout" name="verticalLayout_2"> + <layout class="QVBoxLayout" name="verticalLayout_4"> <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> @@ -30,16 +78,16 @@ </widget> </item> <item> - <widget class="QCheckBox" name="use_accurate_gpu_emulation"> + <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> <property name="text"> - <string>Use accurate GPU emulation (slow)</string> + <string>Use asynchronous GPU emulation</string> </property> </widget> </item> <item> - <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> + <widget class="QCheckBox" name="use_accurate_gpu_emulation"> <property name="text"> - <string>Use asynchronous GPU emulation</string> + <string>Use accurate GPU emulation (slow)</string> </property> </widget> </item> @@ -51,11 +99,11 @@ </widget> </item> <item> - <layout class="QHBoxLayout" name="horizontalLayout"> + <layout class="QHBoxLayout" name="horizontalLayout_2"> <item> <widget class="QLabel" name="label"> <property name="text"> - <string>Internal Resolution</string> + <string>Internal Resolution:</string> </property> </widget> </item> @@ -91,7 +139,7 @@ </layout> </item> <item> - <layout class="QHBoxLayout" name="horizontalLayout_6"> + <layout class="QHBoxLayout" name="horizontalLayout_3"> <item> <widget class="QLabel" name="bg_label"> <property name="text"> diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 67c9a7c6d..96dec50e2 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -236,6 +236,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i widget->setVisible(false); analog_map_stick = {ui->buttonLStickAnalog, ui->buttonRStickAnalog}; + analog_map_deadzone = {ui->sliderLStickDeadzone, ui->sliderRStickDeadzone}; + analog_map_deadzone_label = {ui->labelLStickDeadzone, ui->labelRStickDeadzone}; for (int button_id = 0; button_id < Settings::NativeButton::NumButtons; button_id++) { auto* const button = button_map[button_id]; @@ -326,6 +328,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i InputCommon::Polling::DeviceType::Analog); } }); + connect(analog_map_deadzone[analog_id], &QSlider::valueChanged, [=] { + const float deadzone = analog_map_deadzone[analog_id]->value() / 100.0f; + analog_map_deadzone_label[analog_id]->setText(tr("Deadzone: %1").arg(deadzone)); + analogs_param[analog_id].Set("deadzone", deadzone); + }); } connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); }); @@ -484,7 +491,7 @@ void ConfigureInputPlayer::ClearAll() { continue; } - analogs_param[analog_id].Erase(analog_sub_buttons[sub_button_id]); + analogs_param[analog_id].Clear(); } } @@ -508,6 +515,23 @@ void ConfigureInputPlayer::UpdateButtonLabels() { AnalogToText(analogs_param[analog_id], analog_sub_buttons[sub_button_id])); } analog_map_stick[analog_id]->setText(tr("Set Analog Stick")); + + auto& param = analogs_param[analog_id]; + auto* const analog_deadzone_slider = analog_map_deadzone[analog_id]; + auto* const analog_deadzone_label = analog_map_deadzone_label[analog_id]; + + if (param.Has("engine") && param.Get("engine", "") == "sdl") { + if (!param.Has("deadzone")) { + param.Set("deadzone", 0.1f); + } + + analog_deadzone_slider->setValue(static_cast<int>(param.Get("deadzone", 0.1f) * 100)); + analog_deadzone_slider->setVisible(true); + analog_deadzone_label->setVisible(true); + } else { + analog_deadzone_slider->setVisible(false); + analog_deadzone_label->setVisible(false); + } } } diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index c66027651..045704e47 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -97,6 +97,8 @@ private: /// Analog inputs are also represented each with a single button, used to configure with an /// actual analog stick std::array<QPushButton*, Settings::NativeAnalog::NumAnalogs> analog_map_stick; + std::array<QSlider*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone; + std::array<QLabel*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone_label; static const std::array<std::string, ANALOG_SUB_BUTTONS_NUM> analog_sub_buttons; diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui index 42db020be..1556481d0 100644 --- a/src/yuzu/configuration/configure_input_player.ui +++ b/src/yuzu/configuration/configure_input_player.ui @@ -170,6 +170,44 @@ </item> </layout> </item> + <item row="4" column="0" colspan="2"> + <layout class="QVBoxLayout" name="sliderRStickDeadzoneVerticalLayout"> + <item> + <layout class="QHBoxLayout" name="sliderRStickDeadzoneHorizontalLayout"> + <item> + <widget class="QLabel" name="labelRStickDeadzone"> + <property name="text"> + <string>Deadzone: 0</string> + </property> + <property name="alignment"> + <enum>Qt::AlignHCenter</enum> + </property> + </widget> + </item> + </layout> + </item> + <item> + <widget class="QSlider" name="sliderRStickDeadzone"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + </widget> + </item> + </layout> + </item> + <item row="5" column="0"> + <spacer name="RStick_verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + </spacer> + </item> </layout> </widget> </item> @@ -745,6 +783,47 @@ </item> </layout> </item> + <item row="5" column="1" colspan="2"> + <layout class="QVBoxLayout" name="sliderLStickDeadzoneVerticalLayout"> + <property name="sizeConstraint"> + <enum>QLayout::SetDefaultConstraint</enum> + </property> + <item> + <layout class="QHBoxLayout" name="sliderLStickDeadzoneHorizontalLayout"> + <item> + <widget class="QLabel" name="labelLStickDeadzone"> + <property name="text"> + <string>Deadzone: 0</string> + </property> + <property name="alignment"> + <enum>Qt::AlignHCenter</enum> + </property> + </widget> + </item> + </layout> + </item> + <item> + <widget class="QSlider" name="sliderLStickDeadzone"> + <property name="orientation"> + <enum>Qt::Horizontal</enum> + </property> + </widget> + </item> + </layout> + </item> + <item row="6" column="1"> + <spacer name="LStick_verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>0</width> + <height>0</height> + </size> + </property> + </spacer> + </item> </layout> </widget> </item> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b5dd3e0d6..54ca2dc1d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -454,7 +454,6 @@ void GMainWindow::InitializeWidgets() { // Create status bar message_label = new QLabel(); // Configured separately for left alignment - message_label->setVisible(false); message_label->setFrameStyle(QFrame::NoFrame); message_label->setContentsMargins(4, 0, 4, 0); message_label->setAlignment(Qt::AlignLeft); @@ -476,8 +475,73 @@ void GMainWindow::InitializeWidgets() { label->setVisible(false); label->setFrameStyle(QFrame::NoFrame); label->setContentsMargins(4, 0, 4, 0); - statusBar()->addPermanentWidget(label, 0); + statusBar()->addPermanentWidget(label); } + + // Setup Dock button + dock_status_button = new QPushButton(); + dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + dock_status_button->setFocusPolicy(Qt::NoFocus); + connect(dock_status_button, &QPushButton::clicked, [&] { + Settings::values.use_docked_mode = !Settings::values.use_docked_mode; + dock_status_button->setChecked(Settings::values.use_docked_mode); + OnDockedModeChanged(!Settings::values.use_docked_mode, Settings::values.use_docked_mode); + }); + dock_status_button->setText(tr("DOCK")); + dock_status_button->setCheckable(true); + dock_status_button->setChecked(Settings::values.use_docked_mode); + statusBar()->insertPermanentWidget(0, dock_status_button); + + // Setup ASync button + async_status_button = new QPushButton(); + async_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + async_status_button->setFocusPolicy(Qt::NoFocus); + connect(async_status_button, &QPushButton::clicked, [&] { + if (emulation_running) { + return; + } + Settings::values.use_asynchronous_gpu_emulation = + !Settings::values.use_asynchronous_gpu_emulation; + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + Settings::Apply(); + }); + async_status_button->setText(tr("ASYNC")); + async_status_button->setCheckable(true); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + statusBar()->insertPermanentWidget(0, async_status_button); + + // Setup Renderer API button + renderer_status_button = new QPushButton(); + renderer_status_button->setObjectName(QStringLiteral("RendererStatusBarButton")); + renderer_status_button->setCheckable(true); + renderer_status_button->setFocusPolicy(Qt::NoFocus); + connect(renderer_status_button, &QPushButton::toggled, [=](bool checked) { + renderer_status_button->setText(checked ? tr("VULKAN") : tr("OPENGL")); + }); + renderer_status_button->toggle(); + +#ifndef HAS_VULKAN + renderer_status_button->setChecked(false); + renderer_status_button->setCheckable(false); + renderer_status_button->setDisabled(true); +#else + renderer_status_button->setChecked(Settings::values.renderer_backend == + Settings::RendererBackend::Vulkan); + connect(renderer_status_button, &QPushButton::clicked, [=] { + if (emulation_running) { + return; + } + if (renderer_status_button->isChecked()) { + Settings::values.renderer_backend = Settings::RendererBackend::Vulkan; + } else { + Settings::values.renderer_backend = Settings::RendererBackend::OpenGL; + } + + Settings::Apply(); + }); +#endif // HAS_VULKAN + statusBar()->insertPermanentWidget(0, renderer_status_button); + statusBar()->setVisible(true); setStyleSheet(QStringLiteral("QStatusBar::item{border: none;}")); } @@ -640,6 +704,7 @@ void GMainWindow::InitializeHotkeys() { Settings::values.use_docked_mode = !Settings::values.use_docked_mode; OnDockedModeChanged(!Settings::values.use_docked_mode, Settings::values.use_docked_mode); + dock_status_button->setChecked(Settings::values.use_docked_mode); }); } @@ -806,70 +871,12 @@ void GMainWindow::AllowOSSleep() { #endif } -QStringList GMainWindow::GetUnsupportedGLExtensions() { - QStringList unsupported_ext; - - if (!GLAD_GL_ARB_buffer_storage) { - unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); - } - if (!GLAD_GL_ARB_direct_state_access) { - unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); - } - if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) { - unsupported_ext.append(QStringLiteral("ARB_vertex_type_10f_11f_11f_rev")); - } - if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) { - unsupported_ext.append(QStringLiteral("ARB_texture_mirror_clamp_to_edge")); - } - if (!GLAD_GL_ARB_multi_bind) { - unsupported_ext.append(QStringLiteral("ARB_multi_bind")); - } - if (!GLAD_GL_ARB_clip_control) { - unsupported_ext.append(QStringLiteral("ARB_clip_control")); - } - - // Extensions required to support some texture formats. - if (!GLAD_GL_EXT_texture_compression_s3tc) { - unsupported_ext.append(QStringLiteral("EXT_texture_compression_s3tc")); - } - if (!GLAD_GL_ARB_texture_compression_rgtc) { - unsupported_ext.append(QStringLiteral("ARB_texture_compression_rgtc")); - } - if (!GLAD_GL_ARB_depth_buffer_float) { - unsupported_ext.append(QStringLiteral("ARB_depth_buffer_float")); - } - - for (const QString& ext : unsupported_ext) { - LOG_CRITICAL(Frontend, "Unsupported GL extension: {}", ext.toStdString()); - } - - return unsupported_ext; -} - bool GMainWindow::LoadROM(const QString& filename) { // Shutdown previous session if the emu thread is still active... if (emu_thread != nullptr) ShutdownGame(); - render_window->InitRenderTarget(); - - { - Core::Frontend::ScopeAcquireWindowContext acquire_context{*render_window}; - if (!gladLoadGL()) { - QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"), - tr("Your GPU may not support OpenGL 4.3, or you do not " - "have the latest graphics driver.")); - return false; - } - } - - const QStringList unsupported_gl_extensions = GetUnsupportedGLExtensions(); - if (!unsupported_gl_extensions.empty()) { - QMessageBox::critical(this, tr("Error while initializing OpenGL Core!"), - tr("Your GPU may not support one or more required OpenGL" - "extensions. Please ensure you have the latest graphics " - "driver.<br><br>Unsupported extensions:<br>") + - unsupported_gl_extensions.join(QStringLiteral("<br>"))); + if (!render_window->InitRenderTarget()) { return false; } @@ -980,7 +987,9 @@ void GMainWindow::BootGame(const QString& filename) { // Create and start the emulation thread emu_thread = std::make_unique<EmuThread>(render_window); emit EmulationStarting(emu_thread.get()); - render_window->moveContext(); + if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { + render_window->moveContext(); + } emu_thread->start(); connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); @@ -1000,6 +1009,8 @@ void GMainWindow::BootGame(const QString& filename) { game_list_placeholder->hide(); } status_bar_update_timer.start(2000); + async_status_button->setDisabled(true); + renderer_status_button->setDisabled(true); const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); @@ -1065,10 +1076,13 @@ void GMainWindow::ShutdownGame() { // Disable status bar updates status_bar_update_timer.stop(); - message_label->setVisible(false); emu_speed_label->setVisible(false); game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); + async_status_button->setEnabled(true); +#ifdef HAS_VULKAN + renderer_status_button->setEnabled(true); +#endif emulation_running = false; @@ -1836,6 +1850,13 @@ void GMainWindow::OnConfigure() { } config->Save(); + + dock_status_button->setChecked(Settings::values.use_docked_mode); + async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); +#ifdef HAS_VULKAN + renderer_status_button->setChecked(Settings::values.renderer_backend == + Settings::RendererBackend::Vulkan); +#endif } void GMainWindow::OnLoadAmiibo() { @@ -2028,7 +2049,6 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det if (emu_thread) { emu_thread->SetRunning(true); message_label->setText(status_message); - message_label->setVisible(true); } } } @@ -2195,6 +2215,18 @@ void GMainWindow::closeEvent(QCloseEvent* event) { QWidget::closeEvent(event); } +void GMainWindow::keyPressEvent(QKeyEvent* event) { + if (render_window) { + render_window->ForwardKeyPressEvent(event); + } +} + +void GMainWindow::keyReleaseEvent(QKeyEvent* event) { + if (render_window) { + render_window->ForwardKeyReleaseEvent(event); + } +} + static bool IsSingleFileDropEvent(QDropEvent* event) { const QMimeData* mimeData = event->mimeData(); return mimeData->hasUrls() && mimeData->urls().length() == 1; @@ -2227,18 +2259,6 @@ void GMainWindow::dragMoveEvent(QDragMoveEvent* event) { event->acceptProposedAction(); } -void GMainWindow::keyPressEvent(QKeyEvent* event) { - if (render_window) { - render_window->ForwardKeyPressEvent(event); - } -} - -void GMainWindow::keyReleaseEvent(QKeyEvent* event) { - if (render_window) { - render_window->ForwardKeyReleaseEvent(event); - } -} - bool GMainWindow::ConfirmChangeGame() { if (emu_thread == nullptr) return true; @@ -2290,8 +2310,16 @@ void GMainWindow::UpdateUITheme() { QStringList theme_paths(default_theme_paths); if (is_default_theme || current_theme.isEmpty()) { - qApp->setStyleSheet({}); - setStyleSheet({}); + const QString theme_uri(QStringLiteral(":default/style.qss")); + QFile f(theme_uri); + if (f.open(QFile::ReadOnly | QFile::Text)) { + QTextStream ts(&f); + qApp->setStyleSheet(ts.readAll()); + setStyleSheet(ts.readAll()); + } else { + qApp->setStyleSheet({}); + setStyleSheet({}); + } theme_paths.append(default_icons); QIcon::setThemeName(default_icons); } else { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index a56f9a981..8eba2172c 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -27,6 +27,7 @@ class LoadingScreen; class MicroProfileDialog; class ProfilerWidget; class QLabel; +class QPushButton; class WaitTreeWidget; enum class GameListOpenTarget; class GameListPlaceholder; @@ -130,7 +131,6 @@ private: void PreventOSSleep(); void AllowOSSleep(); - QStringList GetUnsupportedGLExtensions(); bool LoadROM(const QString& filename); void BootGame(const QString& filename); void ShutdownGame(); @@ -229,6 +229,9 @@ private: QLabel* emu_speed_label = nullptr; QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; + QPushButton* async_status_button = nullptr; + QPushButton* renderer_status_button = nullptr; + QPushButton* dock_status_button = nullptr; QTimer status_bar_update_timer; std::unique_ptr<Config> config; diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt index b5f06ab9e..a15719a0f 100644 --- a/src/yuzu_cmd/CMakeLists.txt +++ b/src/yuzu_cmd/CMakeLists.txt @@ -8,11 +8,22 @@ add_executable(yuzu-cmd emu_window/emu_window_sdl2_gl.h emu_window/emu_window_sdl2.cpp emu_window/emu_window_sdl2.h + emu_window/emu_window_sdl2_gl.cpp + emu_window/emu_window_sdl2_gl.h resource.h yuzu.cpp yuzu.rc ) +if (ENABLE_VULKAN) + target_sources(yuzu-cmd PRIVATE + emu_window/emu_window_sdl2_vk.cpp + emu_window/emu_window_sdl2_vk.h) + + target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) + target_compile_definitions(yuzu-cmd PRIVATE HAS_VULKAN) +endif() + create_target_directory_groups(yuzu-cmd) target_link_libraries(yuzu-cmd PRIVATE common core input_common) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 161583b54..b01a36023 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -371,6 +371,12 @@ void Config::ReadValues() { Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); // Renderer + const int renderer_backend = sdl2_config->GetInteger( + "Renderer", "backend", static_cast<int>(Settings::RendererBackend::OpenGL)); + Settings::values.renderer_backend = static_cast<Settings::RendererBackend>(renderer_backend); + Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); + Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); + Settings::values.resolution_factor = static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index e829f8695..00fd88279 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -98,6 +98,17 @@ udp_pad_index= use_multi_core= [Renderer] +# Which backend API to use. +# 0 (default): OpenGL, 1: Vulkan +backend = + +# Enable graphics API debugging mode. +# 0 (default): Disabled, 1: Enabled +debug = + +# Which Vulkan physical device to use (defaults to 0) +vulkan_device = + # Whether to use software or hardware rendering. # 0: Software, 1 (default): Hardware use_hw_renderer = diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index b1c512db1..e96139885 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -89,6 +89,10 @@ bool EmuWindow_SDL2::IsOpen() const { return is_open; } +bool EmuWindow_SDL2::IsShown() const { + return is_shown; +} + void EmuWindow_SDL2::OnResize() { int width, height; SDL_GetWindowSize(render_window, &width, &height); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h index eaa971f77..b38f56661 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h @@ -21,6 +21,9 @@ public: /// Whether the window is still open, and a close request hasn't yet been sent bool IsOpen() const; + /// Returns if window is shown (not minimized) + bool IsShown() const override; + protected: /// Called by PollEvents when a key is pressed or released. void OnKeyEvent(int key, u8 state); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 6fde694a2..7ffa0ac09 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -9,6 +9,7 @@ #include <SDL.h> #include <fmt/format.h> #include <glad/glad.h> +#include "common/assert.h" #include "common/logging/log.h" #include "common/scm_rev.h" #include "common/string_util.h" @@ -151,6 +152,12 @@ void EmuWindow_SDL2_GL::DoneCurrent() { SDL_GL_MakeCurrent(render_window, nullptr); } +void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const { + // Should not have been called from OpenGL + UNREACHABLE(); +} + std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { return std::make_unique<SDLGLContext>(); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index 630deba93..c753085a8 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h @@ -22,6 +22,10 @@ public: /// Releases the GL context from the caller thread void DoneCurrent() override; + /// Ignored in OpenGL + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; + std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; private: diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp new file mode 100644 index 000000000..a203f0da9 --- /dev/null +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -0,0 +1,162 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <string> +#include <vector> +#include <SDL.h> +#include <SDL_vulkan.h> +#include <fmt/format.h> +#include <vulkan/vulkan.h> +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/scm_rev.h" +#include "core/settings.h" +#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" + +EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(bool fullscreen) : EmuWindow_SDL2(fullscreen) { + if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { + LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); + exit(EXIT_FAILURE); + } + + vkGetInstanceProcAddr = + reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr()); + if (vkGetInstanceProcAddr == nullptr) { + LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); + exit(EXIT_FAILURE); + } + + const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, + Common::g_scm_branch, Common::g_scm_desc); + render_window = + SDL_CreateWindow(window_title.c_str(), + SDL_WINDOWPOS_UNDEFINED, // x position + SDL_WINDOWPOS_UNDEFINED, // y position + Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, + SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); + + const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr); + + u32 extra_ext_count{}; + if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) { + LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}", + SDL_GetError()); + exit(1); + } + + auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count); + if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) { + LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError()); + exit(1); + } + std::vector<const char*> enabled_extensions; + enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(), + extra_ext_names.get() + extra_ext_count); + + std::vector<const char*> enabled_layers; + if (use_standard_layers) { + enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation"); + } + + VkApplicationInfo app_info{}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.apiVersion = VK_API_VERSION_1_1; + app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); + app_info.pApplicationName = "yuzu-emu"; + app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); + app_info.pEngineName = "yuzu-emu"; + + VkInstanceCreateInfo instance_ci{}; + instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_ci.pApplicationInfo = &app_info; + instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); + instance_ci.ppEnabledExtensionNames = enabled_extensions.data(); + if (Settings::values.renderer_debug) { + instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size()); + instance_ci.ppEnabledLayerNames = enabled_layers.data(); + } + + const auto vkCreateInstance = + reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); + if (vkCreateInstance == nullptr || + vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { + LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); + exit(EXIT_FAILURE); + } + + vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( + vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); + if (vkDestroyInstance == nullptr) { + LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); + exit(EXIT_FAILURE); + } + + if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { + LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); + exit(EXIT_FAILURE); + } + + OnResize(); + OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); + SDL_PumpEvents(); + LOG_INFO(Frontend, "yuzu Version: {} | {}-{} (Vulkan)", Common::g_build_name, + Common::g_scm_branch, Common::g_scm_desc); +} + +EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { + vkDestroyInstance(vk_instance, nullptr); +} + +void EmuWindow_SDL2_VK::SwapBuffers() {} + +void EmuWindow_SDL2_VK::MakeCurrent() { + // Unused on Vulkan +} + +void EmuWindow_SDL2_VK::DoneCurrent() { + // Unused on Vulkan +} + +void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const { + const auto instance_proc_addr = vkGetInstanceProcAddr; + std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); + std::memcpy(instance, &vk_instance, sizeof(vk_instance)); + std::memcpy(surface, &vk_surface, sizeof(vk_surface)); +} + +std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { + return nullptr; +} + +bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const { + if (!Settings::values.renderer_debug) { + return false; + } + + const auto vkEnumerateInstanceLayerProperties = + reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>( + vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties")); + if (vkEnumerateInstanceLayerProperties == nullptr) { + LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); + return false; + } + + u32 available_layers_count{}; + if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) { + LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); + return false; + } + std::vector<VkLayerProperties> layers(available_layers_count); + if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) { + LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); + return false; + } + + return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) { + return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); + }) != layers.end(); +} diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h new file mode 100644 index 000000000..2a7c06a24 --- /dev/null +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h @@ -0,0 +1,39 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vulkan/vulkan.h> +#include "core/frontend/emu_window.h" +#include "yuzu_cmd/emu_window/emu_window_sdl2.h" + +class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { +public: + explicit EmuWindow_SDL2_VK(bool fullscreen); + ~EmuWindow_SDL2_VK(); + + /// Swap buffers to display the next frame + void SwapBuffers() override; + + /// Makes the graphics context current for the caller thread + void MakeCurrent() override; + + /// Releases the GL context from the caller thread + void DoneCurrent() override; + + /// Retrieves Vulkan specific handlers from the window + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; + + std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; + +private: + bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const; + + VkInstance vk_instance{}; + VkSurfaceKHR vk_surface{}; + + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; + PFN_vkDestroyInstance vkDestroyInstance{}; +}; diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 3ee088a91..325795321 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -32,6 +32,9 @@ #include "yuzu_cmd/config.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" #include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" +#ifdef HAS_VULKAN +#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" +#endif #include "core/file_sys/registered_cache.h" @@ -174,7 +177,20 @@ int main(int argc, char** argv) { Settings::values.use_gdbstub = use_gdbstub; Settings::Apply(); - std::unique_ptr<EmuWindow_SDL2> emu_window{std::make_unique<EmuWindow_SDL2_GL>(fullscreen)}; + std::unique_ptr<EmuWindow_SDL2> emu_window; + switch (Settings::values.renderer_backend) { + case Settings::RendererBackend::OpenGL: + emu_window = std::make_unique<EmuWindow_SDL2_GL>(fullscreen); + break; + case Settings::RendererBackend::Vulkan: +#ifdef HAS_VULKAN + emu_window = std::make_unique<EmuWindow_SDL2_VK>(fullscreen); + break; +#else + LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); + return 1; +#endif + } if (!Settings::values.use_multi_core) { // Single core mode must acquire OpenGL context for entire emulation session diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index e7fe8decf..f2cc4a797 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp @@ -5,10 +5,15 @@ #include <algorithm> #include <cstdlib> #include <string> + +#include <fmt/format.h> + #define SDL_MAIN_HANDLED #include <SDL.h> -#include <fmt/format.h> + #include <glad/glad.h> + +#include "common/assert.h" #include "common/logging/log.h" #include "common/scm_rev.h" #include "core/settings.h" @@ -120,3 +125,11 @@ void EmuWindow_SDL2_Hide::MakeCurrent() { void EmuWindow_SDL2_Hide::DoneCurrent() { SDL_GL_MakeCurrent(render_window, nullptr); } + +bool EmuWindow_SDL2_Hide::IsShown() const { + return false; +} + +void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const { + UNREACHABLE(); +} diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index 1a8953c75..c7fccc002 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h @@ -25,6 +25,13 @@ public: /// Releases the GL context from the caller thread void DoneCurrent() override; + /// Whether the screen is being shown or not. + bool IsShown() const override; + + /// Retrieves Vulkan specific handlers from the window + void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, + void* surface) const override; + /// Whether the window is still open, and a close request hasn't yet been sent bool IsOpen() const; |