diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/core.cpp | 25 | ||||
-rw-r--r-- | src/core/file_sys/submission_package.cpp | 26 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 8 | ||||
-rw-r--r-- | src/core/loader/nsp.cpp | 27 | ||||
-rw-r--r-- | src/core/memory.cpp | 9 | ||||
-rw-r--r-- | src/core/perf_stats.cpp | 47 | ||||
-rw-r--r-- | src/core/perf_stats.h | 21 | ||||
-rw-r--r-- | src/core/settings.h | 1 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 87 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 50 | ||||
-rw-r--r-- | src/video_core/shader/decode/warp.cpp | 47 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 10 | ||||
-rw-r--r-- | src/yuzu/configuration/config.cpp | 5 | ||||
-rw-r--r-- | src/yuzu/configuration/configure_input.cpp | 2 | ||||
-rw-r--r-- | src/yuzu/main.cpp | 8 | ||||
-rw-r--r-- | src/yuzu_cmd/config.cpp | 2 | ||||
-rw-r--r-- | src/yuzu_cmd/default_ini.h | 2 |
21 files changed, 352 insertions, 52 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index 3d0978cbf..9ab174de2 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -160,10 +160,6 @@ struct System::Impl { LOG_DEBUG(Core, "Initialized OK"); - // Reset counters and set time origin to current frame - GetAndResetPerfStats(); - perf_stats.BeginSystemFrame(); - return ResultStatus::Success; } @@ -206,6 +202,16 @@ struct System::Impl { main_process->Run(load_parameters->main_thread_priority, load_parameters->main_thread_stack_size); + u64 title_id{0}; + if (app_loader->ReadProgramId(title_id) != Loader::ResultStatus::Success) { + LOG_ERROR(Core, "Failed to find title id for ROM (Error {})", + static_cast<u32>(load_result)); + } + perf_stats = std::make_unique<PerfStats>(title_id); + // Reset counters and set time origin to current frame + GetAndResetPerfStats(); + perf_stats->BeginSystemFrame(); + status = ResultStatus::Success; return status; } @@ -219,6 +225,8 @@ struct System::Impl { perf_results.game_fps); telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", perf_results.frametime * 1000.0); + telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", + perf_stats->GetMeanFrametime()); is_powered_on = false; @@ -229,6 +237,7 @@ struct System::Impl { service_manager.reset(); cheat_engine.reset(); telemetry_session.reset(); + perf_stats.reset(); gpu_core.reset(); // Close all CPU/threading state @@ -286,7 +295,7 @@ struct System::Impl { } PerfStatsResults GetAndResetPerfStats() { - return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs()); + return perf_stats->GetAndResetStats(core_timing.GetGlobalTimeUs()); } Timing::CoreTiming core_timing; @@ -327,7 +336,7 @@ struct System::Impl { ResultStatus status = ResultStatus::Success; std::string status_details = ""; - Core::PerfStats perf_stats; + std::unique_ptr<Core::PerfStats> perf_stats; Core::FrameLimiter frame_limiter; }; @@ -480,11 +489,11 @@ const Timing::CoreTiming& System::CoreTiming() const { } Core::PerfStats& System::GetPerfStats() { - return impl->perf_stats; + return *impl->perf_stats; } const Core::PerfStats& System::GetPerfStats() const { - return impl->perf_stats; + return *impl->perf_stats; } Core::FrameLimiter& System::FrameLimiter() { diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp index 8b3b14e25..730221fd6 100644 --- a/src/core/file_sys/submission_package.cpp +++ b/src/core/file_sys/submission_package.cpp @@ -14,6 +14,7 @@ #include "core/file_sys/content_archive.h" #include "core/file_sys/nca_metadata.h" #include "core/file_sys/partition_filesystem.h" +#include "core/file_sys/program_metadata.h" #include "core/file_sys/submission_package.h" #include "core/loader/loader.h" @@ -78,6 +79,10 @@ Loader::ResultStatus NSP::GetStatus() const { } Loader::ResultStatus NSP::GetProgramStatus(u64 title_id) const { + if (IsExtractedType() && GetExeFS() != nullptr && FileSys::IsDirectoryExeFS(GetExeFS())) { + return Loader::ResultStatus::Success; + } + const auto iter = program_status.find(title_id); if (iter == program_status.end()) return Loader::ResultStatus::ErrorNSPMissingProgramNCA; @@ -85,12 +90,29 @@ Loader::ResultStatus NSP::GetProgramStatus(u64 title_id) const { } u64 NSP::GetFirstTitleID() const { + if (IsExtractedType()) { + return GetProgramTitleID(); + } + if (program_status.empty()) return 0; return program_status.begin()->first; } u64 NSP::GetProgramTitleID() const { + if (IsExtractedType()) { + if (GetExeFS() == nullptr || !IsDirectoryExeFS(GetExeFS())) { + return 0; + } + + ProgramMetadata meta; + if (meta.Load(GetExeFS()->GetFile("main.npdm")) == Loader::ResultStatus::Success) { + return meta.GetTitleID(); + } else { + return 0; + } + } + const auto out = GetFirstTitleID(); if ((out & 0x800) == 0) return out; @@ -102,6 +124,10 @@ u64 NSP::GetProgramTitleID() const { } std::vector<u64> NSP::GetTitleIDs() const { + if (IsExtractedType()) { + return {GetProgramTitleID()}; + } + std::vector<u64> out; out.reserve(ncas.size()); for (const auto& kv : ncas) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 241dac881..b4ee2a255 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -146,8 +146,8 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp } IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", - params.address, params.num_entries, params.flags.raw); + LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, + params.num_entries, params.flags.raw); ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(Tegra::CommandListHeader), @@ -179,8 +179,8 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) } IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", - params.address, params.num_entries, params.flags.raw); + LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, + params.num_entries, params.flags.raw); Tegra::CommandList entries(params.num_entries); Memory::ReadBlock(params.address, entries.data(), diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp index b1171ce65..35c82c99d 100644 --- a/src/core/loader/nsp.cpp +++ b/src/core/loader/nsp.cpp @@ -26,20 +26,18 @@ AppLoader_NSP::AppLoader_NSP(FileSys::VirtualFile file) if (nsp->GetStatus() != ResultStatus::Success) return; - if (nsp->IsExtractedType()) - return; - - const auto control_nca = - nsp->GetNCA(nsp->GetProgramTitleID(), FileSys::ContentRecordType::Control); - if (control_nca == nullptr || control_nca->GetStatus() != ResultStatus::Success) - return; - - std::tie(nacp_file, icon_file) = - FileSys::PatchManager(nsp->GetProgramTitleID()).ParseControlNCA(*control_nca); if (nsp->IsExtractedType()) { secondary_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(nsp->GetExeFS()); } else { + const auto control_nca = + nsp->GetNCA(nsp->GetProgramTitleID(), FileSys::ContentRecordType::Control); + if (control_nca == nullptr || control_nca->GetStatus() != ResultStatus::Success) + return; + + std::tie(nacp_file, icon_file) = + FileSys::PatchManager(nsp->GetProgramTitleID()).ParseControlNCA(*control_nca); + if (title_id == 0) return; @@ -56,11 +54,11 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) { if (nsp.GetStatus() == ResultStatus::Success) { // Extracted Type case if (nsp.IsExtractedType() && nsp.GetExeFS() != nullptr && - FileSys::IsDirectoryExeFS(nsp.GetExeFS()) && nsp.GetRomFS() != nullptr) { + FileSys::IsDirectoryExeFS(nsp.GetExeFS())) { return FileType::NSP; } - // Non-Ectracted Type case + // Non-Extracted Type case if (!nsp.IsExtractedType() && nsp.GetNCA(nsp.GetFirstTitleID(), FileSys::ContentRecordType::Program) != nullptr && AppLoader_NCA::IdentifyType(nsp.GetNCAFile( @@ -77,7 +75,7 @@ AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) { return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (title_id == 0) { + if (!nsp->IsExtractedType() && title_id == 0) { return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; } @@ -91,7 +89,8 @@ AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) { return {nsp_program_status, {}}; } - if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) { + if (!nsp->IsExtractedType() && + nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) { if (!Core::Crypto::KeyManager::KeyFileExists(false)) { return {ResultStatus::ErrorMissingProductionKeyFile, {}}; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8555691c0..9e030789d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -43,8 +43,13 @@ static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* me // During boot, current_page_table might not be set yet, in which case we need not flush if (Core::System::GetInstance().IsPoweredOn()) { - Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS, - size * PAGE_SIZE); + auto& gpu = Core::System::GetInstance().GPU(); + for (u64 i = 0; i < size; i++) { + const auto page = base + i; + if (page_table.attributes[page] == Common::PageType::RasterizerCachedMemory) { + gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE); + } + } } VAddr end = base + size; diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp index 4afd6c8a3..d2c69d1a0 100644 --- a/src/core/perf_stats.cpp +++ b/src/core/perf_stats.cpp @@ -4,8 +4,14 @@ #include <algorithm> #include <chrono> +#include <iterator> #include <mutex> +#include <numeric> +#include <sstream> #include <thread> +#include <fmt/chrono.h> +#include <fmt/format.h> +#include "common/file_util.h" #include "common/math_util.h" #include "core/perf_stats.h" #include "core/settings.h" @@ -15,8 +21,31 @@ using DoubleSecs = std::chrono::duration<double, std::chrono::seconds::period>; using std::chrono::duration_cast; using std::chrono::microseconds; +// Purposefully ignore the first five frames, as there's a significant amount of overhead in +// booting that we shouldn't account for +constexpr std::size_t IgnoreFrames = 5; + namespace Core { +PerfStats::PerfStats(u64 title_id) : title_id(title_id) {} + +PerfStats::~PerfStats() { + if (!Settings::values.record_frame_times || title_id == 0) { + return; + } + + const std::time_t t = std::time(nullptr); + std::ostringstream stream; + std::copy(perf_history.begin() + IgnoreFrames, perf_history.begin() + current_index, + std::ostream_iterator<double>(stream, "\n")); + const std::string& path = FileUtil::GetUserPath(FileUtil::UserPath::LogDir); + // %F Date format expanded is "%Y-%m-%d" + const std::string filename = + fmt::format("{}/{:%F-%H-%M}_{:016X}.csv", path, *std::localtime(&t), title_id); + FileUtil::IOFile file(filename, "w"); + file.WriteString(stream.str()); +} + void PerfStats::BeginSystemFrame() { std::lock_guard lock{object_mutex}; @@ -27,7 +56,12 @@ void PerfStats::EndSystemFrame() { std::lock_guard lock{object_mutex}; auto frame_end = Clock::now(); - accumulated_frametime += frame_end - frame_begin; + const auto frame_time = frame_end - frame_begin; + if (current_index < perf_history.size()) { + perf_history[current_index++] = + std::chrono::duration<double, std::milli>(frame_time).count(); + } + accumulated_frametime += frame_time; system_frames += 1; previous_frame_length = frame_end - previous_frame_end; @@ -40,6 +74,17 @@ void PerfStats::EndGameFrame() { game_frames += 1; } +double PerfStats::GetMeanFrametime() { + std::lock_guard lock{object_mutex}; + + if (current_index <= IgnoreFrames) { + return 0; + } + const double sum = std::accumulate(perf_history.begin() + IgnoreFrames, + perf_history.begin() + current_index, 0); + return sum / (current_index - IgnoreFrames); +} + PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) { std::lock_guard lock{object_mutex}; diff --git a/src/core/perf_stats.h b/src/core/perf_stats.h index 222ac1a63..d9a64f072 100644 --- a/src/core/perf_stats.h +++ b/src/core/perf_stats.h @@ -4,7 +4,9 @@ #pragma once +#include <array> #include <chrono> +#include <cstddef> #include <mutex> #include "common/common_types.h" @@ -27,6 +29,10 @@ struct PerfStatsResults { */ class PerfStats { public: + explicit PerfStats(u64 title_id); + + ~PerfStats(); + using Clock = std::chrono::high_resolution_clock; void BeginSystemFrame(); @@ -36,13 +42,26 @@ public: PerfStatsResults GetAndResetStats(std::chrono::microseconds current_system_time_us); /** + * Returns the Arthimetic Mean of all frametime values stored in the performance history. + */ + double GetMeanFrametime(); + + /** * Gets the ratio between walltime and the emulated time of the previous system frame. This is * useful for scaling inputs or outputs moving between the two time domains. */ double GetLastFrameTimeScale(); private: - std::mutex object_mutex; + std::mutex object_mutex{}; + + /// Title ID for the game that is running. 0 if there is no game running yet + u64 title_id{0}; + /// Current index for writing to the perf_history array + std::size_t current_index{0}; + /// Stores an hour of historical frametime data useful for processing and tracking performance + /// regressions with code changes. + std::array<double, 216000> perf_history = {}; /// Point when the cumulative counters were reset Clock::time_point reset_point = Clock::now(); diff --git a/src/core/settings.h b/src/core/settings.h index 6638ce8f9..d4b70ec4c 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -409,6 +409,7 @@ struct Values { float volume; // Debugging + bool record_frame_times; bool use_gdbstub; u16 gdbstub_port; std::string program_args; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c7a3c85a0..fb3d1112c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -541,7 +541,7 @@ void Maxwell3D::ProcessSyncPoint() { } void Maxwell3D::DrawArrays() { - LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 203e7758c..28272ef6f 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -566,6 +566,13 @@ enum class ImageAtomicOperation : u64 { Exch = 8, }; +enum class ShuffleOperation : u64 { + Idx = 0, // shuffleNV + Up = 1, // shuffleUpNV + Down = 2, // shuffleDownNV + Bfly = 3, // shuffleXorNV +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -600,6 +607,15 @@ union Instruction { } vote; union { + BitField<30, 2, ShuffleOperation> operation; + BitField<48, 3, u64> pred48; + BitField<28, 1, u64> is_index_imm; + BitField<29, 1, u64> is_mask_imm; + BitField<20, 5, u64> index_imm; + BitField<34, 13, u64> mask_imm; + } shfl; + + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; } gmem; @@ -1547,6 +1563,7 @@ public: BRK, DEPBAR, VOTE, + SHFL, BFE_C, BFE_R, BFE_IMM, @@ -1842,6 +1859,7 @@ private: INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), + INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 909ccb82c..0dbc4c02f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn std::string source = "#version 430 core\n" "#extension GL_ARB_separate_shader_objects : enable\n" "#extension GL_NV_gpu_shader5 : enable\n" - "#extension GL_NV_shader_thread_group : enable\n"; + "#extension GL_NV_shader_thread_group : enable\n" + "#extension GL_NV_shader_thread_shuffle : enable\n"; if (entries.shader_viewport_layer_array) { source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 14834d86a..76439e7ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1021,10 +1021,10 @@ private: return {std::move(temporary), value.GetType()}; } - Expression GetOutputAttribute(const AbufNode* abuf) { + std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { switch (const auto attribute = abuf->GetIndex()) { case Attribute::Index::Position: - return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}; + return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}}; case Attribute::Index::LayerViewportPointSize: switch (abuf->GetElement()) { case 0: @@ -1034,25 +1034,25 @@ private: if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { return {}; } - return {"gl_Layer", Type::Int}; + return {{"gl_Layer", Type::Int}}; case 2: if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { return {}; } - return {"gl_ViewportIndex", Type::Int}; + return {{"gl_ViewportIndex", Type::Int}}; case 3: UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); - return {"gl_PointSize", Type::Float}; + return {{"gl_PointSize", Type::Float}}; } return {}; case Attribute::Index::ClipDistances0123: - return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}; + return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}}; case Attribute::Index::ClipDistances4567: - return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}; + return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; default: if (IsGenericAttribute(attribute)) { - return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), - Type::Float}; + return { + {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); return {}; @@ -1292,7 +1292,11 @@ private: target = {GetRegister(gpr->GetIndex()), Type::Float}; } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - target = GetOutputAttribute(abuf); + auto output = GetOutputAttribute(abuf); + if (!output) { + return {}; + } + target = std::move(*output); } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { if (stage == ProgramType::Compute) { LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); @@ -1953,8 +1957,7 @@ private: Expression BallotThread(Operation operation) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia warp intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub on non-Nvidia devices by simulating all threads voting the same as the active // one. return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; @@ -1965,8 +1968,7 @@ private: Expression Vote(Operation operation, const char* func) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub with a warp size of one. return {value, Type::Bool}; } @@ -1983,15 +1985,54 @@ private: Expression VoteEqual(Operation operation) { if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); - // We must return true here since a stub for a theoretical warp size of 1 will always - // return an equal result for all its votes. + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); + // We must return true here since a stub for a theoretical warp size of 1. + // This will always return an equal result across all votes. return {"true", Type::Bool}; } return Vote(operation, "allThreadsEqualNV"); } + template <const std::string_view& func> + Expression Shuffle(Operation operation) { + const std::string value = VisitOperand(operation, 0).AsFloat(); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); + // On a "single-thread" device we are either on the same thread or out of bounds. Both + // cases return the passed value. + return {value, Type::Float}; + } + + const std::string index = VisitOperand(operation, 1).AsUint(); + const std::string width = VisitOperand(operation, 2).AsUint(); + return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + } + + template <const std::string_view& func> + Expression InRangeShuffle(Operation operation) { + const std::string index = VisitOperand(operation, 0).AsUint(); + const std::string width = VisitOperand(operation, 1).AsUint(); + if (!device.HasWarpIntrinsics()) { + // On a "single-thread" device we are only in bounds when the requested index is 0. + return {fmt::format("({} == 0U)", index), Type::Bool}; + } + + const std::string in_range = code.GenerateTemporary(); + code.AddLine("bool {};", in_range); + code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); + return {in_range, Type::Bool}; + } + + struct Func final { + Func() = delete; + ~Func() = delete; + + static constexpr std::string_view ShuffleIndexed = "shuffleNV"; + static constexpr std::string_view ShuffleUp = "shuffleUpNV"; + static constexpr std::string_view ShuffleDown = "shuffleDownNV"; + static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; + }; + static constexpr std::array operation_decompilers = { &GLSLDecompiler::Assign, @@ -2154,6 +2195,16 @@ private: &GLSLDecompiler::VoteAll, &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, + + &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::Shuffle<Func::ShuffleUp>, + &GLSLDecompiler::Shuffle<Func::ShuffleDown>, + &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, + + &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ea77dd211..9ed738171 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -145,7 +145,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::None: return GL_LINEAR; case Tegra::Texture::TextureMipmapFilter::Nearest: - return GL_NEAREST_MIPMAP_LINEAR; + return GL_LINEAR_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: return GL_LINEAR_MIPMAP_LINEAR; } @@ -157,7 +157,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Nearest: return GL_NEAREST_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: - return GL_LINEAR_MIPMAP_NEAREST; + return GL_NEAREST_MIPMAP_LINEAR; } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b9153934e..f7fbbb6e4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1127,6 +1127,46 @@ private: return {}; } + Id ShuffleIndexed(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id ShuffleUp(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id ShuffleDown(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id ShuffleButterfly(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleIndexed(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleUp(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleDown(Operation) { + UNIMPLEMENTED(); + return {}; + } + + Id InRangeShuffleButterfly(Operation) { + UNIMPLEMENTED(); + return {}; + } + Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, const std::string& name) { const Id id = OpVariable(type, storage); @@ -1431,6 +1471,16 @@ private: &SPIRVDecompiler::VoteAll, &SPIRVDecompiler::VoteAny, &SPIRVDecompiler::VoteEqual, + + &SPIRVDecompiler::ShuffleIndexed, + &SPIRVDecompiler::ShuffleUp, + &SPIRVDecompiler::ShuffleDown, + &SPIRVDecompiler::ShuffleButterfly, + + &SPIRVDecompiler::InRangeShuffleIndexed, + &SPIRVDecompiler::InRangeShuffleUp, + &SPIRVDecompiler::InRangeShuffleDown, + &SPIRVDecompiler::InRangeShuffleButterfly, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index 04ca74f46..a8e481b3c 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -13,6 +13,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; +using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { @@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { SetPredicate(bb, instr.vote.dest_pred, vote); break; } + case OpCode::Id::SHFL: { + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node width = [&] { + // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has + // been done reversing Nvidia's math. It won't work on all cases due to SHFL having + // different parameters that don't properly map to GLSL's interface, but it should work + // for cases emitted by Nvidia's compiler. + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation( + OperationCode::ILogicalShiftRight, + Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), + Immediate(8)); + } else { + return Operation(OperationCode::ILogicalShiftRight, + Operation(OperationCode::IAdd, Immediate(0x201F), + Operation(OperationCode::INegate, std::move(mask))), + Immediate(8)); + } + }(); + + const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + switch (instr.shfl.operation) { + case ShuffleOperation::Idx: + return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; + case ShuffleOperation::Up: + return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + case ShuffleOperation::Down: + return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + case ShuffleOperation::Bfly: + return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + } + UNREACHABLE_MSG("Invalid SHFL operation: {}", + static_cast<u64>(instr.shfl.operation.Value())); + return {}; + }(); + + // Setting the predicate before the register is intentional to avoid overwriting. + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + SetRegister( + bb, instr.gpr0, + Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + break; + } default: UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); break; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 425111cc4..abf2cb1ab 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -181,6 +181,16 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool + ShuffleIndexed, /// (uint value, uint index, uint width) -> uint + ShuffleUp, /// (uint value, uint index, uint width) -> uint + ShuffleDown, /// (uint value, uint index, uint width) -> uint + ShuffleButterfly, /// (uint value, uint index, uint width) -> uint + + InRangeShuffleIndexed, /// (uint index, uint width) -> bool + InRangeShuffleUp, /// (uint index, uint width) -> bool + InRangeShuffleDown, /// (uint index, uint width) -> bool + InRangeShuffleButterfly, /// (uint index, uint width) -> bool + Amount, }; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index f594106bf..3f54f54fb 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -466,6 +466,9 @@ void Config::ReadDataStorageValues() { void Config::ReadDebuggingValues() { qt_config->beginGroup(QStringLiteral("Debugging")); + // Intentionally not using the QT default setting as this is intended to be changed in the ini + Settings::values.record_frame_times = + qt_config->value(QStringLiteral("record_frame_times"), false).toBool(); Settings::values.use_gdbstub = ReadSetting(QStringLiteral("use_gdbstub"), false).toBool(); Settings::values.gdbstub_port = ReadSetting(QStringLiteral("gdbstub_port"), 24689).toInt(); Settings::values.program_args = @@ -879,6 +882,8 @@ void Config::SaveDataStorageValues() { void Config::SaveDebuggingValues() { qt_config->beginGroup(QStringLiteral("Debugging")); + // Intentionally not using the QT default setting as this is intended to be changed in the ini + qt_config->setValue(QStringLiteral("record_frame_times"), Settings::values.record_frame_times); WriteSetting(QStringLiteral("use_gdbstub"), Settings::values.use_gdbstub, false); WriteSetting(QStringLiteral("gdbstub_port"), Settings::values.gdbstub_port, 24689); WriteSetting(QStringLiteral("program_args"), diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp index 7613197f2..f2977719c 100644 --- a/src/yuzu/configuration/configure_input.cpp +++ b/src/yuzu/configuration/configure_input.cpp @@ -182,6 +182,8 @@ void ConfigureInput::UpdateUIEnabled() { players_configure[i]->setEnabled(players_controller[i]->currentIndex() != 0); } + ui->handheld_connected->setChecked(ui->handheld_connected->isChecked() && + !ui->use_docked_mode->isChecked()); ui->handheld_connected->setEnabled(!ui->use_docked_mode->isChecked()); ui->handheld_configure->setEnabled(ui->handheld_connected->isChecked() && !ui->use_docked_mode->isChecked()); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 8304c6517..1dcfac258 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -54,6 +54,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include <QProgressDialog> #include <QShortcut> #include <QStatusBar> +#include <QSysInfo> #include <QtConcurrent/QtConcurrent> #include <fmt/format.h> @@ -66,6 +67,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "common/microprofile.h" #include "common/scm_rev.h" #include "common/scope_exit.h" +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/cpu_detect.h" +#endif #include "common/telemetry.h" #include "core/core.h" #include "core/crypto/key_manager.h" @@ -205,6 +209,10 @@ GMainWindow::GMainWindow() LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, Common::g_scm_desc); +#ifdef ARCHITECTURE_x86_64 + LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); +#endif + LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); UpdateWindowTitle(); show(); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 067d58d80..5cadfd191 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -374,6 +374,8 @@ void Config::ReadValues() { Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false); // Debugging + Settings::values.record_frame_times = + sdl2_config->GetBoolean("Debugging", "record_frame_times", false); Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); Settings::values.gdbstub_port = static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689)); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 0cfc111a6..f9f244522 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -213,6 +213,8 @@ region_value = log_filter = *:Trace [Debugging] +# Record frame time data, can be found in the log directory. Boolean value +record_frame_times = # Port for listening to GDB connections. use_gdbstub=false gdbstub_port=24689 |