diff options
Diffstat (limited to 'src/video_core')
79 files changed, 53 insertions, 19513 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..c5ce71706 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,7 +29,6 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h - engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp @@ -61,8 +60,6 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h - guest_driver.cpp - guest_driver.h memory_manager.cpp memory_manager.h query_cache.h @@ -71,8 +68,6 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h - renderer_opengl/gl_arb_decompiler.cpp - renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp @@ -85,10 +80,6 @@ add_library(video_core STATIC renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h - renderer_opengl/gl_shader_disk_cache.cpp - renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp @@ -128,8 +119,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp @@ -142,8 +131,6 @@ add_library(video_core STATIC renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp @@ -159,57 +146,6 @@ add_library(video_core STATIC shader_cache.h shader_notify.cpp shader_notify.h - shader/decode/arithmetic.cpp - shader/decode/arithmetic_immediate.cpp - shader/decode/bfe.cpp - shader/decode/bfi.cpp - shader/decode/shift.cpp - shader/decode/arithmetic_integer.cpp - shader/decode/arithmetic_integer_immediate.cpp - shader/decode/arithmetic_half.cpp - shader/decode/arithmetic_half_immediate.cpp - shader/decode/ffma.cpp - shader/decode/hfma2.cpp - shader/decode/conversion.cpp - shader/decode/memory.cpp - shader/decode/texture.cpp - shader/decode/image.cpp - shader/decode/float_set_predicate.cpp - shader/decode/integer_set_predicate.cpp - shader/decode/half_set_predicate.cpp - shader/decode/predicate_set_register.cpp - shader/decode/predicate_set_predicate.cpp - shader/decode/register_set_predicate.cpp - shader/decode/float_set.cpp - shader/decode/integer_set.cpp - shader/decode/half_set.cpp - shader/decode/video.cpp - shader/decode/warp.cpp - shader/decode/xmad.cpp - shader/decode/other.cpp - shader/ast.cpp - shader/ast.h - shader/async_shaders.cpp - shader/async_shaders.h - shader/compiler_settings.cpp - shader/compiler_settings.h - shader/control_flow.cpp - shader/control_flow.h - shader/decode.cpp - shader/expr.cpp - shader/expr.h - shader/memory_util.cpp - shader/memory_util.h - shader/node_helper.cpp - shader/node_helper.h - shader/node.h - shader/registry.cpp - shader/registry.h - shader/shader_ir.cpp - shader/shader_ir.h - shader/track.cpp - shader/transform_feedback.cpp - shader/transform_feedback.h surface.cpp surface.h texture_cache/accelerated_swizzle.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <type_traits> -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read<u32>(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,7 +10,6 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" @@ -40,7 +39,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +208,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read<u32>(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read<u32>(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,7 +17,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" @@ -49,7 +48,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -1424,23 +1423,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <limits> -#include <vector> - -#include "common/common_types.h" -#include "video_core/guest_driver.h" - -namespace VideoCore { - -void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { - if (texture_handler_size) { - return; - } - const std::size_t size = bound_offsets.size(); - if (size < 2) { - return; - } - std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); - u32 min_val = std::numeric_limits<u32>::max(); - for (std::size_t i = 1; i < size; ++i) { - if (bound_offsets[i] == bound_offsets[i - 1]) { - continue; - } - const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; - min_val = std::min(min_val, new_min); - } - if (min_val > 2) { - return; - } - texture_handler_size = min_texture_handler_size * min_val; -} - -} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <optional> -#include <vector> - -#include "common/common_types.h" - -namespace VideoCore { - -/** - * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks as they are - * Entscheidungsproblems. - */ -class GuestDriverProfile { -public: - explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional<u32> texture_handler_size_) - : texture_handler_size{texture_handler_size_} {} - - void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); - - u32 GetTextureHandlerSize() const { - return texture_handler_size.value_or(default_texture_handler_size); - } - - bool IsTextureHandlerSizeKnown() const { - return texture_handler_size.has_value(); - } - -private: - // Minimum size of texture handler any driver can use. - static constexpr u32 min_texture_handler_size = 4; - - // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. - // Thus, certain drivers may squish the size. - static constexpr u32 default_texture_handler_size = 8; - - std::optional<u32> texture_handler_size = default_texture_handler_size; -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" -#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -45,7 +44,7 @@ public: virtual void Clear() = 0; /// Dispatches a compute shader invocation - virtual void DispatchCompute(GPUVAddr code_addr) = 0; + virtual void DispatchCompute() = 0; /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; @@ -136,18 +135,5 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { - return guest_driver_profile; - } - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { - return guest_driver_profile; - } - -private: - GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <cstddef> -#include <string> -#include <string_view> -#include <utility> -#include <variant> - -#include <fmt/format.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template <const std::string_view& op> - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template <const std::string_view& op> - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template <const std::string_view& op> - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template <const std::string_view& op, bool unordered> - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template <const std::string_view& op, bool is_nan> - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template <const std::string_view& op, const std::string_view& type> - std::string AtomicImage(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template <const std::string_view& op, const std::string_view& type> - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template <char type> - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template <char type> - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template <char type> - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template <char type> - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template <char swizzle> - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template <char swizzle> - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template <char c1, char c2> - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template <typename... Args> - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...); - } - - template <typename... Args> - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward<Args>(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map<GlobalMemoryBase, u32> global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary<ADD_F32>, - &ARBDecompiler::Binary<MUL_F32>, - &ARBDecompiler::Binary<DIV_F32>, - &ARBDecompiler::Trinary<MAD_F32>, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary<MIN_F>, - &ARBDecompiler::Binary<MAX_F>, - &ARBDecompiler::Unary<COS_F32>, - &ARBDecompiler::Unary<SIN_F32>, - &ARBDecompiler::Unary<EX2_F32>, - &ARBDecompiler::Unary<LG2_F32>, - &ARBDecompiler::Unary<RSQ_F32>, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary<ROUND_F>, - &ARBDecompiler::Unary<FLR_F>, - &ARBDecompiler::Unary<CEIL_F>, - &ARBDecompiler::Unary<TRUNC_F>, - &ARBDecompiler::Unary<I2F_S>, - &ARBDecompiler::Unary<I2F_U>, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary<ADD_S>, - &ARBDecompiler::Binary<MUL_S>, - &ARBDecompiler::Binary<DIV_S>, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary<MIN_S>, - &ARBDecompiler::Binary<MAX_S>, - - &ARBDecompiler::Unary<TRUNC_S>, - &ARBDecompiler::Unary<MOV_U>, - &ARBDecompiler::Binary<SHL_S>, - &ARBDecompiler::Binary<SHR_U>, - &ARBDecompiler::Binary<SHR_S>, - &ARBDecompiler::Binary<AND_S>, - &ARBDecompiler::Binary<OR_S>, - &ARBDecompiler::Binary<XOR_S>, - &ARBDecompiler::Unary<NOT_S>, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary<BTC_S>, - &ARBDecompiler::Unary<BTFM_S>, - - &ARBDecompiler::Binary<ADD_U>, - &ARBDecompiler::Binary<MUL_U>, - &ARBDecompiler::Binary<DIV_U>, - &ARBDecompiler::Binary<MIN_U>, - &ARBDecompiler::Binary<MAX_U>, - &ARBDecompiler::Unary<TRUNC_U>, - &ARBDecompiler::Unary<MOV_U>, - &ARBDecompiler::Binary<SHL_U>, - &ARBDecompiler::Binary<SHR_U>, - &ARBDecompiler::Binary<SHR_U>, - &ARBDecompiler::Binary<AND_U>, - &ARBDecompiler::Binary<OR_U>, - &ARBDecompiler::Binary<XOR_U>, - &ARBDecompiler::Unary<NOT_U>, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary<BTC_U>, - &ARBDecompiler::Unary<BTFM_U>, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary<AND_U>, - &ARBDecompiler::Binary<OR_U>, - &ARBDecompiler::Binary<XOR_U>, - &ARBDecompiler::Unary<NOT_U>, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison<SLT_F, false>, - &ARBDecompiler::FloatComparison<SEQ_F, false>, - &ARBDecompiler::FloatComparison<SLE_F, false>, - &ARBDecompiler::FloatComparison<SGT_F, false>, - &ARBDecompiler::FloatComparison<SNE_F, false>, - &ARBDecompiler::FloatComparison<SGE_F, false>, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison<SLT_F, true>, - &ARBDecompiler::FloatComparison<SEQ_F, true>, - &ARBDecompiler::FloatComparison<SLE_F, true>, - &ARBDecompiler::FloatComparison<SGT_F, true>, - &ARBDecompiler::FloatComparison<SNE_F, true>, - &ARBDecompiler::FloatComparison<SGE_F, true>, - - &ARBDecompiler::Binary<SLT_S>, - &ARBDecompiler::Binary<SEQ_S>, - &ARBDecompiler::Binary<SLE_S>, - &ARBDecompiler::Binary<SGT_S>, - &ARBDecompiler::Binary<SNE_S>, - &ARBDecompiler::Binary<SGE_S>, - - &ARBDecompiler::Binary<SLT_U>, - &ARBDecompiler::Binary<SEQ_U>, - &ARBDecompiler::Binary<SLE_U>, - &ARBDecompiler::Binary<SGT_U>, - &ARBDecompiler::Binary<SNE_U>, - &ARBDecompiler::Binary<SGE_U>, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison<SLT_F, false>, - &ARBDecompiler::HalfComparison<SEQ_F, false>, - &ARBDecompiler::HalfComparison<SLE_F, false>, - &ARBDecompiler::HalfComparison<SGT_F, false>, - &ARBDecompiler::HalfComparison<SNE_F, false>, - &ARBDecompiler::HalfComparison<SGE_F, false>, - &ARBDecompiler::HalfComparison<SLT_F, true>, - &ARBDecompiler::HalfComparison<SEQ_F, true>, - &ARBDecompiler::HalfComparison<SLE_F, true>, - &ARBDecompiler::HalfComparison<SGT_F, true>, - &ARBDecompiler::HalfComparison<SNE_F, true>, - &ARBDecompiler::HalfComparison<SGE_F, true>, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage<ADD, U32>, - &ARBDecompiler::AtomicImage<AND, U32>, - &ARBDecompiler::AtomicImage<OR, U32>, - &ARBDecompiler::AtomicImage<XOR, U32>, - &ARBDecompiler::AtomicImage<EXCH, U32>, - - &ARBDecompiler::Atomic<EXCH, U32>, - &ARBDecompiler::Atomic<ADD, U32>, - &ARBDecompiler::Atomic<MIN, U32>, - &ARBDecompiler::Atomic<MAX, U32>, - &ARBDecompiler::Atomic<AND, U32>, - &ARBDecompiler::Atomic<OR, U32>, - &ARBDecompiler::Atomic<XOR, U32>, - - &ARBDecompiler::Atomic<EXCH, S32>, - &ARBDecompiler::Atomic<ADD, S32>, - &ARBDecompiler::Atomic<MIN, S32>, - &ARBDecompiler::Atomic<MAX, S32>, - &ARBDecompiler::Atomic<AND, S32>, - &ARBDecompiler::Atomic<OR, S32>, - &ARBDecompiler::Atomic<XOR, S32>, - - &ARBDecompiler::Atomic<ADD, U32>, - &ARBDecompiler::Atomic<MIN, U32>, - &ARBDecompiler::Atomic<MAX, U32>, - &ARBDecompiler::Atomic<AND, U32>, - &ARBDecompiler::Atomic<OR, U32>, - &ARBDecompiler::Atomic<XOR, U32>, - - &ARBDecompiler::Atomic<ADD, S32>, - &ARBDecompiler::Atomic<MIN, S32>, - &ARBDecompiler::Atomic<MAX, S32>, - &ARBDecompiler::Atomic<AND, S32>, - &ARBDecompiler::Atomic<OR, S32>, - &ARBDecompiler::Atomic<XOR, S32>, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary<TGBALLOT_U>, - &ARBDecompiler::Unary<TGALL_U>, - &ARBDecompiler::Unary<TGANY_U>, - &ARBDecompiler::Unary<TGEQ_U>, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast<u64>(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if<ExprAnd>(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if<ExprOr>(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if<ExprNot>(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if<ExprPredicate>(&*node)) { - return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); - } - if (const auto expr = std::get_if<ExprCondCode>(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if<ExprVar>(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if<ExprBoolean>(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast<std::size_t>(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if<GprNode>(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if<CustomVarNode>(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if<PredicateNode>(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if<AbufNode>(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if<CbufNode>(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if<GmemNode>(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if<LmemNode>(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if<SmemNode>(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { - const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if<GprNode>(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if<ImmediateNode>(&*low); - const auto* const imm_high = std::get_if<ImmediateNode>(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if<PredicateNode>(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast<u64>(index)); - } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { - const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get<ImmediateNode>(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get<MetaImage>(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get<ImmediateNode>(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <string> -#include <string_view> - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,40 +54,6 @@ namespace { constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template <typename Engine, typename Entry> -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v<Entry, SamplerEntry>) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast<Maxwell::ShaderProgram>(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); -} + const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); - ++num_queued_commands; +void RasterizerOpenGL::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); - glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast<ShaderType>(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast<ShaderType>(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -230,8 +203,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -251,8 +222,6 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,307 +20,19 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" namespace OpenGL { -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - -namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } -} - -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; -} - -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; -} - -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; - } - return {}; -} - -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} - -std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared<Registry>(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); - } - return registry; -} - -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); - } - return supported_formats; -} - -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared<ProgramHandle>(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), - reinterpret_cast<const GLubyte*>(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast<GLsizei>(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; -} - -Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; - } - if (is_built) { - ASSERT(handle != 0); - } -} +Shader::Shader() = default; Shader::~Shader() = default; -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr<Shader> Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr<Shader>(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared<ProgramHandle>(); - return std::unique_ptr<Shader>( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); - } -} - -std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr<Shader>(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr<Shader>(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; - } - - std::vector<ShaderDiskCachePrecompiled> gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); - } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); - } - - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); - std::vector<std::thread> threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); - } - for (auto& thread : threads) { - thread.join(); - } - - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } - - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; - } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; - } - } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set<GLenum>& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared<ProgramHandle>(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast<GLsizei>(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; -} - -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; - if (last_shader->IsBuilt()) { - return last_shader; - } - } - - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; - - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; - } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); - } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); - } - } - - // Look up shader in the cache based on address - const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast<std::size_t>(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr<Shader> shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); - } - - return last_shaders[static_cast<std::size_t>(program)] = result; -} - -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr<Shader> kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -19,10 +19,6 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -33,10 +29,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; @@ -44,77 +36,10 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr<VideoCommon::Shader::Registry> registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class Shader { public: + explicit Shader(); ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); - - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); - - static std::unique_ptr<Shader> CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); - - static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); - -private: - explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr<VideoCommon::Shader::Registry> registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { @@ -126,36 +51,13 @@ public: Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); - - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); - - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); - private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set<GLenum>& supported_formats); - Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; - - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map<u64, PrecompiledShader> runtime_cache; - - std::unique_ptr<Shader> null_shader; - std::unique_ptr<Shader> null_kernel; - - std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> -#include <string> -#include <string_view> -#include <utility> -#include <variant> -#include <vector> - -#include <fmt/format.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair<Type, Node>; -using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template <typename... Args> - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return "<invalid type>"; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && - static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast<InternalFlag>(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast<u8>(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast<u8>(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast<std::size_t>(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if<GprNode>(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if<CustomVarNode>(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if<PredicateNode>(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if<AbufNode>(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if<CbufNode>(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if<GmemNode>(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if<LmemNode>(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if<SmemNode>(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if<CommentNode>(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector<TextureIR>& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if<TextureArgument>(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative<TextureOffset>(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative<TextureDerivates>(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast<u32>(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if<ImmediateNode>(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast<s32>(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector<Node>& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector<Node>& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if<GprNode>(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template <Type type> - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template <Type type> - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template <Type type> - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template <Type type> - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template <Type type> - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template <Type type> - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template <Type type> - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template <Type type> - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template <Type type> - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template <Type type> - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template <Type type> - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template <Type type> - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template <Type type> - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template <Type type> - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template <Type type> - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template <Type type> - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template <Type type> - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template <Type type> - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template <const std::string_view& op, Type type, bool unordered = false> - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if<PredicateNode>(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template <bool with_nan> - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template <bool with_nan> - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "lessThan"); - } - - template <bool with_nan> - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "equal"); - } - - template <bool with_nan> - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); - } - - template <bool with_nan> - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "greaterThan"); - } - - template <bool with_nan> - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "notEqual"); - } - - template <bool with_nan> - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get<MetaTexture>(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector<TextureIR> extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector<TextureIR> ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template <const std::string_view& opname> - Expression AtomicImage(Operation operation) { - const auto& meta{std::get<MetaImage>(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template <const std::string_view& opname, Type type> - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template <const std::string_view& opname, Type type> - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - const auto target = std::get_if<ImmediateNode>(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get<MetaStackClass>(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template <u32 element> - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template <u32 element> - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template <const std::string_view& comparison> - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add<Type::Float>, - &GLSLDecompiler::Mul<Type::Float>, - &GLSLDecompiler::Div<Type::Float>, - &GLSLDecompiler::Fma<Type::Float>, - &GLSLDecompiler::Negate<Type::Float>, - &GLSLDecompiler::Absolute<Type::Float>, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min<Type::Float>, - &GLSLDecompiler::Max<Type::Float>, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger<Type::Int>, - &GLSLDecompiler::FCastInteger<Type::Uint>, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add<Type::Int>, - &GLSLDecompiler::Mul<Type::Int>, - &GLSLDecompiler::Div<Type::Int>, - &GLSLDecompiler::Negate<Type::Int>, - &GLSLDecompiler::Absolute<Type::Int>, - &GLSLDecompiler::Min<Type::Int>, - &GLSLDecompiler::Max<Type::Int>, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft<Type::Int>, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd<Type::Int>, - &GLSLDecompiler::BitwiseOr<Type::Int>, - &GLSLDecompiler::BitwiseXor<Type::Int>, - &GLSLDecompiler::BitwiseNot<Type::Int>, - &GLSLDecompiler::BitfieldInsert<Type::Int>, - &GLSLDecompiler::BitfieldExtract<Type::Int>, - &GLSLDecompiler::BitCount<Type::Int>, - &GLSLDecompiler::BitMSB<Type::Int>, - - &GLSLDecompiler::Add<Type::Uint>, - &GLSLDecompiler::Mul<Type::Uint>, - &GLSLDecompiler::Div<Type::Uint>, - &GLSLDecompiler::Min<Type::Uint>, - &GLSLDecompiler::Max<Type::Uint>, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd<Type::Uint>, - &GLSLDecompiler::BitwiseOr<Type::Uint>, - &GLSLDecompiler::BitwiseXor<Type::Uint>, - &GLSLDecompiler::BitwiseNot<Type::Uint>, - &GLSLDecompiler::BitfieldInsert<Type::Uint>, - &GLSLDecompiler::BitfieldExtract<Type::Uint>, - &GLSLDecompiler::BitCount<Type::Uint>, - &GLSLDecompiler::BitMSB<Type::Uint>, - - &GLSLDecompiler::Add<Type::HalfFloat>, - &GLSLDecompiler::Mul<Type::HalfFloat>, - &GLSLDecompiler::Fma<Type::HalfFloat>, - &GLSLDecompiler::Absolute<Type::HalfFloat>, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>, - - &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Int>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>, - - &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>, - &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>, - &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>, - &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>, - &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>, - &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan<false>, - &GLSLDecompiler::Logical2HEqual<false>, - &GLSLDecompiler::Logical2HLessEqual<false>, - &GLSLDecompiler::Logical2HGreaterThan<false>, - &GLSLDecompiler::Logical2HNotEqual<false>, - &GLSLDecompiler::Logical2HGreaterEqual<false>, - &GLSLDecompiler::Logical2HLessThan<true>, - &GLSLDecompiler::Logical2HEqual<true>, - &GLSLDecompiler::Logical2HLessEqual<true>, - &GLSLDecompiler::Logical2HGreaterThan<true>, - &GLSLDecompiler::Logical2HNotEqual<true>, - &GLSLDecompiler::Logical2HGreaterEqual<true>, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage<Func::Add>, - &GLSLDecompiler::AtomicImage<Func::And>, - &GLSLDecompiler::AtomicImage<Func::Or>, - &GLSLDecompiler::AtomicImage<Func::Xor>, - &GLSLDecompiler::AtomicImage<Func::Exchange>, - - &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, - &GLSLDecompiler::Atomic<Func::And, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, - &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, - - &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, - &GLSLDecompiler::Atomic<Func::Add, Type::Int>, - &GLSLDecompiler::Atomic<Func::Min, Type::Int>, - &GLSLDecompiler::Atomic<Func::Max, Type::Int>, - &GLSLDecompiler::Atomic<Func::And, Type::Int>, - &GLSLDecompiler::Atomic<Func::Or, Type::Int>, - &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, - - &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, - &GLSLDecompiler::Reduce<Func::And, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, - &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, - - &GLSLDecompiler::Reduce<Func::Add, Type::Int>, - &GLSLDecompiler::Reduce<Func::Min, Type::Int>, - &GLSLDecompiler::Reduce<Func::Max, Type::Int>, - &GLSLDecompiler::Reduce<Func::And, Type::Int>, - &GLSLDecompiler::Reduce<Func::Or, Type::Int>, - &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask<Func::Eq>, - &GLSLDecompiler::ThreadMask<Func::Ge>, - &GLSLDecompiler::ThreadMask<Func::Gt>, - &GLSLDecompiler::ThreadMask<Func::Le>, - &GLSLDecompiler::ThreadMask<Func::Lt>, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast<u32>(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map<u8, GenericVaryingDescription> varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast<u32>(flag); - ASSERT(index < static_cast<u32>(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map<u8, VaryingTFB> transform_feedback; - - ShaderWriter code; - - std::optional<u32> max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <string> -#include <string_view> -#include <utility> -#include <vector> -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector<ConstBufferEntry> const_buffers; - std::vector<GlobalMemoryEntry> global_memory_entries; - std::vector<SamplerEntry> samplers; - std::vector<ImageEntry> images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array<u8, 64>; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector<ConstBufferKey> flat_keys(num_keys); - std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); - std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); - std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast<u32>(type)) || - !file.WriteObject(static_cast<u32>(code.size())) || - !file.WriteObject(static_cast<u32>(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) || - !file.WriteObject(static_cast<u32>(bound_samplers.size())) || - !file.WriteObject(static_cast<u32>(separate_samplers.size())) || - !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) { - return false; - } - - std::vector<ConstBufferKey> flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector<BoundSamplerEntry> flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector<SeparateSamplerEntry> flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector<BindlessSamplerEntry> flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector<ShaderDiskCacheEntry> entries; - while (static_cast<u64>(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector<u8> compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector<ShaderDiskCachePrecompiled> entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector<u8> binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector<u8> compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <filesystem> -#include <optional> -#include <string> -#include <tuple> -#include <type_traits> -#include <unordered_map> -#include <unordered_set> -#include <utility> -#include <vector> - -#include <glad/glad.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector<u64>; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional<u32> texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector<u8> binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template <typename T> - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template <typename T> - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template <typename T> - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast<u8>(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template <typename T> - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set<u64> stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -8,146 +8,14 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} +ComputePipeline::ComputePipeline() = default; -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector<VkDescriptorSetLayoutBinding> bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast<u32>(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; - } - - return device.GetLogical().CreateComputePipeline(ci); -} +ComputePipeline::~ComputePipeline() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -15,50 +14,10 @@ class Device; class VKScheduler; class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; - - vk::Pipeline CreatePipeline() const; - - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; - vk::Pipeline pipeline; + explicit ComputePipeline(); + ~ComputePipeline(); }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <cstring> -#include <vector> - -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -MICROPROFILE_DECLARE(Vulkan_PipelineCache); - -namespace { - -template <class StencilFace> -VkStencilOpState GetStencilFaceState(const StencilFace& face) { - return { - .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), - .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), - .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), - .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), - .compareMask = 0, - .writeMask = 0, - .reference = 0, - }; -} - -bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); -} - -VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union Swizzle { - u32 raw; - BitField<0, 3, Maxwell::ViewportSwizzle> x; - BitField<4, 3, Maxwell::ViewportSwizzle> y; - BitField<8, 3, Maxwell::ViewportSwizzle> z; - BitField<12, 3, Maxwell::ViewportSwizzle> w; - }; - const Swizzle unpacked{swizzle}; - - return { - .x = MaxwellToVK::ViewportSwizzle(unpacked.x), - .y = MaxwellToVK::ViewportSwizzle(unpacked.y), - .z = MaxwellToVK::ViewportSwizzle(unpacked.z), - .w = MaxwellToVK::ViewportSwizzle(unpacked.w), - }; -} - -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; - } -} - -} // Anonymous namespace - -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span<VkDescriptorSetLayoutBinding> bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span<VkDescriptorSetLayoutBinding> bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); -} - -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); -} - -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector<VkDescriptorUpdateTemplateEntry> template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); - } - } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); -} - -std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; - - std::vector<vk::ShaderModule> shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; - } - - device.SaveShader(stage->code); - - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); - } - return shader_modules; -} - -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector<VkVertexInputBindingDescription> vertex_bindings; - std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast<u32>(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast<u32>(index), - .divisor = state.binding_divisors[index], - }); - } - } - - std::vector<VkVertexInputAttributeDescription> vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; - } - if (!input_attributes.contains(static_cast<u32>(index))) { - // Skip attributes not used by the vertex shaders. - continue; - } - vertex_attributes.push_back({ - .location = static_cast<u32>(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); - } - - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), - }; - - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), - .pVertexBindingDivisors = vertex_binding_divisors.data(), - }; - if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &input_divisor_ci; - } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), - }; - - const VkPipelineTessellationStateCreateInfo tessellation_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; - - std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewportSwizzles = swizzles.data(), - }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = - static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), - .rasterizerDiscardEnable = - static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = static_cast<VkCullModeFlags>( - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), - .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisample_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthTestEnable = dynamic.depth_test_enable, - .depthWriteEnable = dynamic.depth_write_enable, - .depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, - .stencilTestEnable = dynamic.stencil_enable, - .front = GetStencilFaceState(dynamic.front), - .back = GetStencilFaceState(dynamic.back), - .minDepthBounds = 0.0f, - .maxDepthBounds = 0.0f, - }; - - std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ - VK_COLOR_COMPONENT_R_BIT, - VK_COLOR_COMPONENT_G_BIT, - VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT, - }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } - } - - cb_attachments[index] = { - .blendEnable = blend.enable != 0, - .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), - .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), - .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), - .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), - .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), - .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; - } - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, - .pAttachments = cb_attachments.data(), - .blendConstants = {}, - }; - - std::vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended{ - VK_DYNAMIC_STATE_CULL_MODE_EXT, - VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, - VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_OP_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - } - - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast<u32>(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - std::vector<VkPipelineShaderStageCreateInfo> shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { - continue; - } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { - stage_ci.pNext = &subgroup_size_ci; - } - } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <optional> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); - -class Device; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - -using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; - -class VKGraphicsPipeline final { -public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span<VkDescriptorSetLayoutBinding> bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span<VkDescriptorSetLayoutBinding> bindings) const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; - - std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; - - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; - - const Device& device; - VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector<vk::ShaderModule> modules; - - vk::Pipeline pipeline; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,49 +19,27 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { - -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { +size_t StageFromProgram(size_t program) { return program == 0 ? 0 : program - 1; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); +ShaderType StageFromProgram(Maxwell::ShaderProgram program) { + return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { return ShaderType::Vertex; } } - -template <VkDescriptorType descriptor_type, class Container> -void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast<u32>(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; - } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); - } -} - -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector<VkDescriptorSetLayoutBinding>& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); - AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); - AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); - AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); - AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); - AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); - return binding; -} - } // Anonymous namespace -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); - return static_cast<std::size_t>(hash); -} - -bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, Size()) == 0; -} - -std::size_t ComputePipelineCacheKey::Hash() const noexcept { +size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); - return static_cast<std::size_t>(hash); + return static_cast<size_t>(hash); } bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} +Shader::Shader() = default; Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} -VKPipelineCache::~VKPipelineCache() = default; - -std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { - std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast<Maxwell::ShaderProgram>(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); - - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } - } - shaders[index] = result; - } - return last_shaders = shaders; -} - -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; - } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); - } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; -} +PipelineCache::~PipelineCache() = default; -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); - - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } - } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; -} - -void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); -} - -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; - continue; - } - Finish(); - it = graphics_cache.erase(it); - } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; - continue; - } - Finish(); - it = compute_cache.erase(it); - } -} - -std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector<VkDescriptorSetLayoutBinding> bindings; - - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); - } - return {std::move(program), std::move(bindings)}; + throw "Bad"; } -template <VkDescriptorType descriptor_type, class Container> -void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast<u32>(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; - } - return; - } - - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast<std::size_t>(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - } - offset += count * entry_size; - binding += count; -} - -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { - AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); - AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); - AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); - AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); - AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); - AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); -} +void PipelineCache::OnShaderRemoval(Shader*) {} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -15,15 +15,8 @@ #include <boost/functional/hash.hpp> #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -35,7 +28,7 @@ namespace Vulkan { class Device; class RasterizerVulkan; -class VKComputePipeline; +class ComputePipeline; class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { u32 shared_memory_size; std::array<u32, 3> workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -64,15 +57,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); namespace std { template <> -struct hash<Vulkan::GraphicsPipelineCacheKey> { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -template <> struct hash<Vulkan::ComputePipelineCacheKey> { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -83,66 +69,26 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); + explicit Shader(); ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; }; -class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { +class PipelineCache final : public VideoCommon::ShaderCache<Shader> { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; - - std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~PipelineCache() override; - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); - - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - - void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); + ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: void OnShaderRemoval(Shader* shader) final; private: - std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( - const FixedPipelineState& fixed_state); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -158,17 +104,8 @@ private: std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::mutex pipeline_cache; - std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> - graphics_cache; - std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; + std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; }; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( - const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { - std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { const Tegra::Texture::TextureHandle handle{data}; @@ -117,98 +107,6 @@ struct TextureHandle { u32 sampler; }; -template <typename Engine, typename Entry> -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); - if constexpr (std::is_same_v<Entry, SamplerEntry>) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra descriptor_pool, update_descriptor_queue), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - - SCOPE_EXIT({ gpu.TickWork(); }); - FlushWork(); - - query_cache.UpdateCounters(); - - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. - return; - } - - BeginTransformFeedback(); - - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - UpdateDynamicStates(); - - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); - } - }); - - EndTransformFeedback(); + UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); } void RasterizerVulkan::Clear() { @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); - - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); - - // Compute dispatches can't be executed inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); +void RasterizerVulkan::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,7 +28,6 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -103,19 +102,6 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -125,40 +111,12 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -198,13 +156,12 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null @@ -1,752 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <string> -#include <string_view> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { - -ASTZipper::ASTZipper() = default; - -void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { - ASSERT(new_first->manager == nullptr); - first = new_first; - last = new_first; - - ASTNode current = first; - while (current) { - current->manager = this; - current->parent = parent; - last = current; - current = current->next; - } -} - -void ASTZipper::PushBack(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous = last; - if (last) { - last->next = new_node; - } - new_node->next.reset(); - last = new_node; - if (!first) { - first = new_node; - } - new_node->manager = this; -} - -void ASTZipper::PushFront(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous.reset(); - new_node->next = first; - if (first) { - first->previous = new_node; - } - if (last == first) { - last = new_node; - } - first = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushFront(new_node); - return; - } - const ASTNode next = at_node->next; - if (next) { - next->previous = new_node; - } - new_node->previous = at_node; - if (at_node == last) { - last = new_node; - } - new_node->next = next; - at_node->next = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushBack(new_node); - return; - } - const ASTNode previous = at_node->previous; - if (previous) { - previous->next = new_node; - } - new_node->next = at_node; - if (at_node == first) { - first = new_node; - } - new_node->previous = previous; - at_node->previous = new_node; - new_node->manager = this; -} - -void ASTZipper::DetachTail(ASTNode node) { - ASSERT(node->manager == this); - if (node == first) { - first.reset(); - last.reset(); - return; - } - - last = node->previous; - last->next.reset(); - node->previous.reset(); - - ASTNode current = std::move(node); - while (current) { - current->manager = nullptr; - current->parent.reset(); - current = current->next; - } -} - -void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { - ASSERT(start->manager == this && end->manager == this); - if (start == end) { - DetachSingle(start); - return; - } - const ASTNode prev = start->previous; - const ASTNode post = end->next; - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - start->previous.reset(); - end->next.reset(); - ASTNode current = start; - bool found = false; - while (current) { - current->manager = nullptr; - current->parent.reset(); - found |= current == end; - current = current->next; - } - ASSERT(found); -} - -void ASTZipper::DetachSingle(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode prev = node->previous; - const ASTNode post = node->next; - node->previous.reset(); - node->next.reset(); - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - - node->manager = nullptr; - node->parent.reset(); -} - -void ASTZipper::Remove(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode next = node->next; - const ASTNode previous = node->previous; - if (previous) { - previous->next = next; - } - if (next) { - next->previous = previous; - } - node->parent.reset(); - node->manager = nullptr; - if (node == last) { - last = previous; - } - if (node == first) { - first = next; - } -} - -class ExprPrinter final { -public: - void operator()(const ExprAnd& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += "!"; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - inner += fmt::format("P{}", expr.predicate); - } - - void operator()(const ExprCondCode& expr) { - inner += fmt::format("CC{}", expr.cc); - } - - void operator()(const ExprVar& expr) { - inner += fmt::format("V{}", expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(const ExprGprEqual& expr) { - inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string inner; -}; - -class ASTPrinter { -public: - void operator()(const ASTProgram& ast) { - scope++; - inner += "program {\n"; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - inner += "}\n"; - scope--; - } - - void operator()(const ASTIfThen& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}}\n", Indent()); - } - - void operator()(const ASTIfElse& ast) { - inner += Indent(); - inner += "else {\n"; - - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - - inner += Indent(); - inner += "}\n"; - } - - void operator()(const ASTBlockEncoded& ast) { - inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); - } - - void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { - inner += Indent(); - inner += "Block;\n"; - } - - void operator()(const ASTVarSet& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - inner += fmt::format("Label_{}:\n", ast.index); - } - - void operator()(const ASTGoto& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += - fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); - } - - void operator()(const ASTDoWhile& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}do {{\n", Indent()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), - ast.kills ? "discard" : "exit"); - } - - void operator()(const ASTBreak& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string_view Indent() { - if (space_segment_scope == scope) { - return space_segment; - } - - // Ensure that we don't exceed our view. - ASSERT(scope * 2 < spaces.size()); - - space_segment = spaces.substr(0, scope * 2); - space_segment_scope = scope; - return space_segment; - } - - std::string inner{}; - std::string_view space_segment; - - u32 scope{}; - u32 space_segment_scope{}; - - static constexpr std::string_view spaces{" "}; -}; - -std::string ASTManager::Print() const { - ASTPrinter printer{}; - printer.Visit(main_node); - return printer.GetResult(); -} - -ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) - : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} - -ASTManager::~ASTManager() { - Clear(); -} - -void ASTManager::Init() { - main_node = ASTBase::Make<ASTProgram>(ASTNode{}); - program = std::get_if<ASTProgram>(main_node->GetInnerData()); - false_condition = MakeExpr<ExprBoolean>(false); -} - -void ASTManager::DeclareLabel(u32 address) { - const auto pair = labels_map.emplace(address, labels_count); - if (pair.second) { - labels_count++; - labels.resize(labels_count); - } -} - -void ASTManager::InsertLabel(u32 address) { - const u32 index = labels_map[address]; - const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); - labels[index] = label; - program->nodes.PushBack(label); -} - -void ASTManager::InsertGoto(Expr condition, u32 address) { - const u32 index = labels_map[address]; - const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); - gotos.push_back(goto_node); - program->nodes.PushBack(goto_node); -} - -void ASTManager::InsertBlock(u32 start_address, u32 end_address) { - ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); - program->nodes.PushBack(std::move(block)); -} - -void ASTManager::InsertReturn(Expr condition, bool kills) { - ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); - program->nodes.PushBack(std::move(node)); -} - -// The decompile algorithm is based on -// "Taming control flow: A structured approach to eliminating goto statements" -// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be -// on the same structured level as the label which they jump to. This is done, -// through outward/inward movements and lifting. Once they are at the same -// level, you can enclose them in an "if" structure or a "do-while" structure. -void ASTManager::Decompile() { - auto it = gotos.begin(); - while (it != gotos.end()) { - const ASTNode goto_node = *it; - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - const ASTNode label = labels[*label_index]; - if (!full_decompile) { - // We only decompile backward jumps - if (!IsBackwardsJump(goto_node, label)) { - it++; - continue; - } - } - if (IndirectlyRelated(goto_node, label)) { - while (!DirectlyRelated(goto_node, label)) { - MoveOutward(goto_node); - } - } - if (DirectlyRelated(goto_node, label)) { - u32 goto_level = goto_node->GetLevel(); - const u32 label_level = label->GetLevel(); - while (label_level < goto_level) { - MoveOutward(goto_node); - goto_level--; - } - // TODO(Blinkhawk): Implement Lifting and Inward Movements - } - if (label->GetParent() == goto_node->GetParent()) { - bool is_loop = false; - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label) { - is_loop = true; - break; - } - current = current->GetPrevious(); - } - - if (is_loop) { - EncloseDoWhile(goto_node, label); - } else { - EncloseIfThen(goto_node, label); - } - it = gotos.erase(it); - continue; - } - it++; - } - if (full_decompile) { - for (const ASTNode& label : labels) { - auto& manager = label->GetManager(); - manager.Remove(label); - } - labels.clear(); - } else { - auto label_it = labels.begin(); - while (label_it != labels.end()) { - bool can_remove = true; - ASTNode label = *label_it; - for (const ASTNode& goto_node : gotos) { - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - ASTNode& glabel = labels[*label_index]; - if (glabel == label) { - can_remove = false; - break; - } - } - if (can_remove) { - label->MarkLabelUnused(); - } - } - } -} - -bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { - u32 goto_level = goto_node->GetLevel(); - u32 label_level = label_node->GetLevel(); - while (goto_level > label_level) { - goto_level--; - goto_node = goto_node->GetParent(); - } - while (label_level > goto_level) { - label_level--; - label_node = label_node->GetParent(); - } - while (goto_node->GetParent() != label_node->GetParent()) { - goto_node = goto_node->GetParent(); - label_node = label_node->GetParent(); - } - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label_node) { - return true; - } - current = current->GetPrevious(); - } - return false; -} - -bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { - return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); -} - -bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { - if (first->GetParent() == second->GetParent()) { - return false; - } - const u32 first_level = first->GetLevel(); - const u32 second_level = second->GetLevel(); - u32 min_level; - u32 max_level; - ASTNode max; - ASTNode min; - if (first_level > second_level) { - min_level = second_level; - min = second; - max_level = first_level; - max = first; - } else { - min_level = first_level; - min = first; - max_level = second_level; - max = second; - } - - while (max_level > min_level) { - max_level--; - max = max->GetParent(); - } - - return min->GetParent() == max->GetParent(); -} - -void ASTManager::ShowCurrentState(std::string_view state) const { - LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); - SanityCheck(); -} - -void ASTManager::SanityCheck() const { - for (const auto& label : labels) { - if (!label->GetParent()) { - LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); - } - } -} - -void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode loop_start = label->GetNext(); - if (loop_start == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode parent = label->GetParent(); - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSegment(loop_start, goto_node); - const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); - ASTZipper* sub_zipper = do_while_node->GetSubNodes(); - sub_zipper->Init(loop_start, do_while_node); - zipper.InsertAfter(do_while_node, label); - sub_zipper->Remove(goto_node); -} - -void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode if_end = label->GetPrevious(); - if (if_end == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode prev = goto_node->GetPrevious(); - const Expr condition = goto_node->GetGotoCondition(); - bool do_else = false; - if (!disable_else_derivation && prev->IsIfThen()) { - const Expr if_condition = prev->GetIfCondition(); - do_else = ExprAreEqual(if_condition, condition); - } - const ASTNode parent = label->GetParent(); - zipper.DetachSegment(goto_node, if_end); - ASTNode if_node; - if (do_else) { - if_node = ASTBase::Make<ASTIfElse>(parent); - } else { - Expr neg_condition = MakeExprNot(condition); - if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); - } - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(goto_node, if_node); - zipper.InsertAfter(if_node, prev); - sub_zipper->Remove(goto_node); -} - -void ASTManager::MoveOutward(ASTNode goto_node) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode parent = goto_node->GetParent(); - ASTZipper& zipper2 = parent->GetManager(); - const ASTNode grandpa = parent->GetParent(); - const bool is_loop = parent->IsLoop(); - const bool is_else = parent->IsIfElse(); - const bool is_if = parent->IsIfThen(); - - const ASTNode prev = goto_node->GetPrevious(); - const ASTNode post = goto_node->GetNext(); - - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSingle(goto_node); - if (is_loop) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr<ExprVar>(var_index); - const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); - zipper2.InsertBefore(var_node_init, parent); - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); - zipper.InsertAfter(break_node, var_node); - } else if (is_if || is_else) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr<ExprVar>(var_index); - const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); - if (is_if) { - zipper2.InsertBefore(var_node_init, parent); - } else { - zipper2.InsertBefore(var_node_init, parent->GetPrevious()); - } - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - if (post) { - zipper.DetachTail(post); - const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(post, if_node); - zipper.InsertAfter(if_node, var_node); - } - } else { - UNREACHABLE(); - } - const ASTNode next = parent->GetNext(); - if (is_if && next && next->IsIfElse()) { - zipper2.InsertAfter(goto_node, next); - goto_node->SetParent(grandpa); - return; - } - zipper2.InsertAfter(goto_node, parent); - goto_node->SetParent(grandpa); -} - -class ASTClearer { -public: - ASTClearer() = default; - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) { - ast.nodes.clear(); - } - - void operator()([[maybe_unused]] const ASTVarSet& ast) {} - - void operator()([[maybe_unused]] const ASTLabel& ast) {} - - void operator()([[maybe_unused]] const ASTGoto& ast) {} - - void operator()(const ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTReturn& ast) {} - - void operator()([[maybe_unused]] const ASTBreak& ast) {} - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - node->Clear(); - } -}; - -void ASTManager::Clear() { - if (!main_node) { - return; - } - ASTClearer clearer{}; - clearer.Visit(main_node); - main_node.reset(); - program = nullptr; - labels_map.clear(); - labels.clear(); - gotos.clear(); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <functional> -#include <list> -#include <memory> -#include <optional> -#include <string> -#include <unordered_map> -#include <vector> - -#include "video_core/shader/expr.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -class ASTBase; -class ASTBlockDecoded; -class ASTBlockEncoded; -class ASTBreak; -class ASTDoWhile; -class ASTGoto; -class ASTIfElse; -class ASTIfThen; -class ASTLabel; -class ASTProgram; -class ASTReturn; -class ASTVarSet; - -using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, - ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; - -using ASTNode = std::shared_ptr<ASTBase>; - -enum class ASTZipperType : u32 { - Program, - IfThen, - IfElse, - Loop, -}; - -class ASTZipper final { -public: - explicit ASTZipper(); - - void Init(ASTNode first, ASTNode parent); - - ASTNode GetFirst() const { - return first; - } - - ASTNode GetLast() const { - return last; - } - - void PushBack(ASTNode new_node); - void PushFront(ASTNode new_node); - void InsertAfter(ASTNode new_node, ASTNode at_node); - void InsertBefore(ASTNode new_node, ASTNode at_node); - void DetachTail(ASTNode node); - void DetachSingle(ASTNode node); - void DetachSegment(ASTNode start, ASTNode end); - void Remove(ASTNode node); - - ASTNode first; - ASTNode last; -}; - -class ASTProgram { -public: - ASTZipper nodes{}; -}; - -class ASTIfThen { -public: - explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTIfElse { -public: - ASTZipper nodes{}; -}; - -class ASTBlockEncoded { -public: - explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} - u32 start; - u32 end; -}; - -class ASTBlockDecoded { -public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} - NodeBlock nodes; -}; - -class ASTVarSet { -public: - explicit ASTVarSet(u32 index_, Expr condition_) - : index{index_}, condition{std::move(condition_)} {} - - u32 index; - Expr condition; -}; - -class ASTLabel { -public: - explicit ASTLabel(u32 index_) : index{index_} {} - u32 index; - bool unused{}; -}; - -class ASTGoto { -public: - explicit ASTGoto(Expr condition_, u32 label_) - : condition{std::move(condition_)}, label{label_} {} - - Expr condition; - u32 label; -}; - -class ASTDoWhile { -public: - explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTReturn { -public: - explicit ASTReturn(Expr condition_, bool kills_) - : condition{std::move(condition_)}, kills{kills_} {} - - Expr condition; - bool kills; -}; - -class ASTBreak { -public: - explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; -}; - -class ASTBase { -public: - explicit ASTBase(ASTNode parent_, ASTData data_) - : data{std::move(data_)}, parent{std::move(parent_)} {} - - template <class U, class... Args> - static ASTNode Make(ASTNode parent, Args&&... args) { - return std::make_shared<ASTBase>(std::move(parent), - ASTData(U(std::forward<Args>(args)...))); - } - - void SetParent(ASTNode new_parent) { - parent = std::move(new_parent); - } - - ASTNode& GetParent() { - return parent; - } - - const ASTNode& GetParent() const { - return parent; - } - - u32 GetLevel() const { - u32 level = 0; - auto next_parent = parent; - while (next_parent) { - next_parent = next_parent->GetParent(); - level++; - } - return level; - } - - ASTData* GetInnerData() { - return &data; - } - - const ASTData* GetInnerData() const { - return &data; - } - - ASTNode GetNext() const { - return next; - } - - ASTNode GetPrevious() const { - return previous; - } - - ASTZipper& GetManager() { - return *manager; - } - - const ASTZipper& GetManager() const { - return *manager; - } - - std::optional<u32> GetGotoLabel() const { - if (const auto* inner = std::get_if<ASTGoto>(&data)) { - return {inner->label}; - } - return std::nullopt; - } - - Expr GetGotoCondition() const { - if (const auto* inner = std::get_if<ASTGoto>(&data)) { - return inner->condition; - } - return nullptr; - } - - void MarkLabelUnused() { - if (auto* inner = std::get_if<ASTLabel>(&data)) { - inner->unused = true; - } - } - - bool IsLabelUnused() const { - if (const auto* inner = std::get_if<ASTLabel>(&data)) { - return inner->unused; - } - return true; - } - - std::optional<u32> GetLabelIndex() const { - if (const auto* inner = std::get_if<ASTLabel>(&data)) { - return {inner->index}; - } - return std::nullopt; - } - - Expr GetIfCondition() const { - if (const auto* inner = std::get_if<ASTIfThen>(&data)) { - return inner->condition; - } - return nullptr; - } - - void SetGotoCondition(Expr new_condition) { - if (auto* inner = std::get_if<ASTGoto>(&data)) { - inner->condition = std::move(new_condition); - } - } - - bool IsIfThen() const { - return std::holds_alternative<ASTIfThen>(data); - } - - bool IsIfElse() const { - return std::holds_alternative<ASTIfElse>(data); - } - - bool IsBlockEncoded() const { - return std::holds_alternative<ASTBlockEncoded>(data); - } - - void TransformBlockEncoded(NodeBlock&& nodes) { - data = ASTBlockDecoded(std::move(nodes)); - } - - bool IsLoop() const { - return std::holds_alternative<ASTDoWhile>(data); - } - - ASTZipper* GetSubNodes() { - if (std::holds_alternative<ASTProgram>(data)) { - return &std::get_if<ASTProgram>(&data)->nodes; - } - if (std::holds_alternative<ASTIfThen>(data)) { - return &std::get_if<ASTIfThen>(&data)->nodes; - } - if (std::holds_alternative<ASTIfElse>(data)) { - return &std::get_if<ASTIfElse>(&data)->nodes; - } - if (std::holds_alternative<ASTDoWhile>(data)) { - return &std::get_if<ASTDoWhile>(&data)->nodes; - } - return nullptr; - } - - void Clear() { - next.reset(); - previous.reset(); - parent.reset(); - manager = nullptr; - } - -private: - friend class ASTZipper; - - ASTData data; - ASTNode parent; - ASTNode next; - ASTNode previous; - ASTZipper* manager{}; -}; - -class ASTManager final { -public: - explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); - ~ASTManager(); - - ASTManager(const ASTManager& o) = delete; - ASTManager& operator=(const ASTManager& other) = delete; - - ASTManager(ASTManager&& other) noexcept = default; - ASTManager& operator=(ASTManager&& other) noexcept = default; - - void Init(); - - void DeclareLabel(u32 address); - - void InsertLabel(u32 address); - - void InsertGoto(Expr condition, u32 address); - - void InsertBlock(u32 start_address, u32 end_address); - - void InsertReturn(Expr condition, bool kills); - - std::string Print() const; - - void Decompile(); - - void ShowCurrentState(std::string_view state) const; - - void SanityCheck() const; - - void Clear(); - - bool IsFullyDecompiled() const { - if (full_decompile) { - return gotos.empty(); - } - - for (ASTNode goto_node : gotos) { - auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return false; - } - ASTNode glabel = labels[*label_index]; - if (IsBackwardsJump(goto_node, glabel)) { - return false; - } - } - return true; - } - - ASTNode GetProgram() const { - return main_node; - } - - u32 GetVariables() const { - return variables; - } - - const std::vector<ASTNode>& GetLabels() const { - return labels; - } - -private: - bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; - - bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - void EncloseDoWhile(ASTNode goto_node, ASTNode label); - - void EncloseIfThen(ASTNode goto_node, ASTNode label); - - void MoveOutward(ASTNode goto_node); - - u32 NewVariable() { - return variables++; - } - - bool full_decompile{}; - bool disable_else_derivation{}; - std::unordered_map<u32, u32> labels_map{}; - u32 labels_count{}; - std::vector<ASTNode> labels{}; - std::list<ASTNode> gotos{}; - u32 variables{}; - ASTProgram* program{}; - ASTNode main_node{}; - Expr false_condition{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> -#include <thread> -#include <vector> -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/shader/async_shaders.h" - -namespace VideoCommon::Shader { - -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} - -AsyncShaders::~AsyncShaders() { - KillWorkers(); -} - -void AsyncShaders::AllocateWorkers() { - // Use at least one thread - u32 num_workers = 1; - - // Deduce how many more threads we can use - const u32 thread_count = std::thread::hardware_concurrency(); - if (thread_count >= 8) { - // Increase async workers by 1 for every 2 threads >= 8 - num_workers += 1 + (thread_count - 8) / 2; - } - - // If we already have workers queued, ignore - if (num_workers == worker_threads.size()) { - return; - } - - // If workers already exist, clear them - if (!worker_threads.empty()) { - FreeWorkers(); - } - - // Create workers - for (std::size_t i = 0; i < num_workers; i++) { - context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, - context_list[i].get()); - } -} - -void AsyncShaders::FreeWorkers() { - // Mark all threads to quit - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.join(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -void AsyncShaders::KillWorkers() { - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.detach(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -bool AsyncShaders::HasWorkQueued() const { - return !pending_queue.empty(); -} - -bool AsyncShaders::HasCompletedWork() const { - std::shared_lock lock{completed_mutex}; - return !finished_work.empty(); -} - -bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { - const auto& regs = gpu.Maxwell3D().regs; - - // If something is using depth, we can assume that games are not rendering anything which will - // be used one time. - if (regs.zeta_enable) { - return true; - } - - // If games are using a small index count, we can assume these are full screen quads. Usually - // these shaders are only used once for building textures so we can assume they can't be built - // async - if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { - return false; - } - - return true; -} - -std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { - std::vector<Result> results; - { - std::unique_lock lock{completed_mutex}; - results = std::move(finished_work); - finished_work.clear(); - } - return results; -} - -void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, - Tegra::Engines::ShaderType shader_type, u64 uid, - std::vector<u64> code, std::vector<u64> code_b, - u32 main_offset, CompilerSettings compiler_settings, - const Registry& registry, VAddr cpu_addr) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, - .device = &device, - .shader_type = shader_type, - .uid = uid, - .code = std::move(code), - .code_b = std::move(code_b), - .main_offset = main_offset, - .compiler_settings = compiler_settings, - .registry = registry, - .cpu_address = cpu_addr, - .pp_cache = nullptr, - .vk_device = nullptr, - .scheduler = nullptr, - .descriptor_pool = nullptr, - .update_descriptor_queue = nullptr, - .bindings{}, - .program{}, - .key{}, - .num_color_buffers = 0, - }); - cv.notify_one(); -} - -void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector<VkDescriptorSetLayoutBinding> bindings, - Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = Backend::Vulkan, - .device = nullptr, - .shader_type{}, - .uid = 0, - .code{}, - .code_b{}, - .main_offset = 0, - .compiler_settings{}, - .registry{}, - .cpu_address = 0, - .pp_cache = pp_cache, - .vk_device = &device, - .scheduler = &scheduler, - .descriptor_pool = &descriptor_pool, - .update_descriptor_queue = &update_descriptor_queue, - .bindings = std::move(bindings), - .program = std::move(program), - .key = key, - .num_color_buffers = num_color_buffers, - }); - cv.notify_one(); -} - -void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - while (!is_thread_exiting.load(std::memory_order_relaxed)) { - std::unique_lock lock{queue_mutex}; - cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); - if (is_thread_exiting) { - return; - } - - // Partial lock to allow all threads to read at the same time - if (!HasWorkQueued()) { - continue; - } - // Another thread beat us, just unlock and wait for the next load - if (pending_queue.empty()) { - continue; - } - - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop(); - lock.unlock(); - - if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); - const auto scope = context->Acquire(); - auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); - Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; - - if (work.backend == Backend::OpenGL) { - result.program.opengl = std::move(program->source_program); - } else if (work.backend == Backend::GLASM) { - result.program.glasm = std::move(program->assembly_program); - } - - { - std::unique_lock complete_lock(completed_mutex); - finished_work.push_back(std::move(result)); - } - } else if (work.backend == Backend::Vulkan) { - auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( - *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, work.key, work.bindings, work.program, - work.num_color_buffers); - - work.pp_cache->EmplacePipeline(std::move(pipeline)); - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <condition_variable> -#include <memory> -#include <shared_mutex> -#include <thread> - -#include <glad/glad.h> - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Core::Frontend { -class EmuWindow; -class GraphicsContext; -} // namespace Core::Frontend - -namespace Tegra { -class GPU; -} - -namespace Vulkan { -class VKPipelineCache; -} - -namespace VideoCommon::Shader { - -class AsyncShaders { -public: - enum class Backend { - OpenGL, - GLASM, - Vulkan, - }; - - struct ResultPrograms { - OpenGL::OGLProgram opengl; - OpenGL::OGLAssemblyProgram glasm; - }; - - struct Result { - u64 uid; - VAddr cpu_address; - Backend backend; - ResultPrograms program; - std::vector<u64> code; - std::vector<u64> code_b; - Tegra::Engines::ShaderType shader_type; - }; - - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); - ~AsyncShaders(); - - /// Start up shader worker threads - void AllocateWorkers(); - - /// Clear the shader queue and kill all worker threads - void FreeWorkers(); - - // Force end all threads - void KillWorkers(); - - /// Check to see if any shaders have actually been compiled - [[nodiscard]] bool HasCompletedWork() const; - - /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build - /// every shader async as some shaders are only built and executed once. We try to "guess" which - /// shader would be used only once - [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; - - /// Pulls completed compiled shaders - [[nodiscard]] std::vector<Result> GetCompletedWork(); - - void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, - u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, - CompilerSettings compiler_settings, const Registry& registry, - VAddr cpu_addr); - - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, - Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector<VkDescriptorSetLayoutBinding> bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, - u32 num_color_buffers); - -private: - void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); - - /// Check our worker queue to see if we have any work queued already - [[nodiscard]] bool HasWorkQueued() const; - - struct WorkerParams { - Backend backend; - // For OGL - const OpenGL::Device* device; - Tegra::Engines::ShaderType shader_type; - u64 uid; - std::vector<u64> code; - std::vector<u64> code_b; - u32 main_offset; - CompilerSettings compiler_settings; - std::optional<Registry> registry; - VAddr cpu_address; - - // For Vulkan - Vulkan::VKPipelineCache* pp_cache; - const Vulkan::Device* vk_device; - Vulkan::VKScheduler* scheduler; - Vulkan::VKDescriptorPool* descriptor_pool; - Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - std::vector<VkDescriptorSetLayoutBinding> bindings; - Vulkan::SPIRVProgram program; - Vulkan::GraphicsPipelineCacheKey key; - u32 num_color_buffers; - }; - - std::condition_variable cv; - mutable std::mutex queue_mutex; - mutable std::shared_mutex completed_mutex; - std::atomic<bool> is_thread_exiting{}; - std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; - std::vector<std::thread> worker_threads; - std::queue<WorkerParams> pending_queue; - std::vector<Result> finished_work; - Core::Frontend::EmuWindow& emu_window; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/shader/compiler_settings.h" - -namespace VideoCommon::Shader { - -std::string CompileDepthAsString(const CompileDepth cd) { - switch (cd) { - case CompileDepth::BruteForce: - return "Brute Force Compile"; - case CompileDepth::FlowStack: - return "Simple Flow Stack Mode"; - case CompileDepth::NoFlowStack: - return "Remove Flow Stack"; - case CompileDepth::DecompileBackwards: - return "Decompile Backward Jumps"; - case CompileDepth::FullDecompile: - return "Full Decompilation"; - default: - return "Unknown Compiler Process"; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class CompileDepth : u32 { - BruteForce = 0, - FlowStack = 1, - NoFlowStack = 2, - DecompileBackwards = 3, - FullDecompile = 4, -}; - -std::string CompileDepthAsString(CompileDepth cd); - -struct CompilerSettings { - CompileDepth depth{CompileDepth::NoFlowStack}; - bool disable_else_derivation{true}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <list> -#include <map> -#include <set> -#include <stack> -#include <unordered_map> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -constexpr s32 unassigned_branch = -2; - -struct Query { - u32 address{}; - std::stack<u32> ssy_stack{}; - std::stack<u32> pbk_stack{}; -}; - -struct BlockStack { - BlockStack() = default; - explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - std::stack<u32> ssy_stack{}; - std::stack<u32> pbk_stack{}; -}; - -template <typename T, typename... Args> -BlockBranchInfo MakeBranchInfo(Args&&... args) { - static_assert(std::is_convertible_v<T, BranchData>); - return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); -} - -bool BlockBranchIsIgnored(BlockBranchInfo first) { - bool ignore = false; - if (std::holds_alternative<SingleBranch>(*first)) { - const auto branch = std::get_if<SingleBranch>(first.get()); - ignore = branch->ignore; - } - return ignore; -} - -struct BlockInfo { - u32 start{}; - u32 end{}; - bool visited{}; - BlockBranchInfo branch{}; - - bool IsInside(const u32 address) const { - return start <= address && address <= end; - } -}; - -struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) - : program_code{program_code_}, registry{registry_}, start{start_} {} - - const ProgramCode& program_code; - Registry& registry; - u32 start{}; - std::vector<BlockInfo> block_info; - std::list<u32> inspect_queries; - std::list<Query> queries; - std::unordered_map<u32, u32> registered; - std::set<u32> labels; - std::map<u32, u32> ssy_labels; - std::map<u32, u32> pbk_labels; - std::unordered_map<u32, BlockStack> stacks; - ASTManager* manager{}; -}; - -enum class BlockCollision : u32 { None, Found, Inside }; - -std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { - const auto& blocks = state.block_info; - for (u32 index = 0; index < blocks.size(); index++) { - if (blocks[index].start == address) { - return {BlockCollision::Found, index}; - } - if (blocks[index].IsInside(address)) { - return {BlockCollision::Inside, index}; - } - } - return {BlockCollision::None, 0xFFFFFFFF}; -} - -struct ParseInfo { - BlockBranchInfo branch_info{}; - u32 end_address{}; -}; - -BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { - auto& it = state.block_info.emplace_back(); - it.start = start; - it.end = end; - const u32 index = static_cast<u32>(state.block_info.size() - 1); - state.registered.insert({start, index}); - return it; -} - -Pred GetPredicate(u32 index, bool negated) { - return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); -} - -enum class ParseResult : u32 { - ControlCaught, - BlockEnd, - AbnormalFlow, -}; - -struct BranchIndirectInfo { - u32 buffer{}; - u32 offset{}; - u32 entries{}; - s32 relative_position{}; -}; - -struct BufferInfo { - u32 index; - u32 offset; -}; - -std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() != OpCode::Id::BRX) { - return std::nullopt; - } - if (instr.brx.constant_buffer != 0) { - return std::nullopt; - } - --pos; - return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); -} - -template <typename Result, typename TestCallable, typename PackCallable> -// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> -// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> -std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, - PackCallable pack) { - for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.start)) { - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (!opcode) { - continue; - } - if (test(instr, opcode->get())) { - --pos; - return std::make_optional(pack(instr, opcode->get())); - } - } - return std::nullopt; -} - -std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, - u64 brx_tracked_register) { - return TrackInstruction<std::pair<BufferInfo, u64>>( - state, pos, - [brx_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::LD_C && - instr.gpr0.Value() == brx_tracked_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; - }, - [](auto instr, const auto& opcode) { - const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), - static_cast<u32>(instr.cbuf36.GetOffset())}; - return std::make_pair(info, instr.gpr8.Value()); - }); -} - -std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, - u64 ldc_tracked_register) { - return TrackInstruction<u64>( - state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); -} - -std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, - u64 shl_tracked_register) { - return TrackInstruction<u32>( - state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); - }); -} - -std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { - const auto brx_info = GetBRXInfo(state, pos); - if (!brx_info) { - return std::nullopt; - } - const auto [relative_position, brx_tracked_register] = *brx_info; - - const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); - if (!ldc_info) { - return std::nullopt; - } - const auto [buffer_info, ldc_tracked_register] = *ldc_info; - - const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); - if (!shl_tracked_register) { - return std::nullopt; - } - - const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); - if (!entries) { - return std::nullopt; - } - - return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; -} - -std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { - u32 offset = static_cast<u32>(address); - const u32 end_address = static_cast<u32>(state.program_code.size()); - ParseInfo parse_info{}; - SingleBranch single_branch{}; - - const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { - const auto pair = rebuild_state.labels.emplace(label_address); - if (pair.second) { - rebuild_state.inspect_queries.push_back(label_address); - } - }; - - while (true) { - if (offset >= end_address) { - // ASSERT_OR_EXECUTE can't be used, as it ignores the break - ASSERT_MSG(false, "Shader passed the current limit!"); - - single_branch.address = exit_branch; - single_branch.ignore = false; - break; - } - if (state.registered.contains(offset)) { - single_branch.address = offset; - single_branch.ignore = true; - break; - } - if (IsSchedInstruction(offset, state.start)) { - offset++; - continue; - } - const Instruction instr = {state.program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { - offset++; - continue; - } - - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRA: { - if (instr.bra.constant_buffer != 0) { - return {ParseResult::AbnormalFlow, parse_info}; - } - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - const u32 branch_offset = offset + instr.bra.GetBranchTarget(); - if (branch_offset == 0) { - single_branch.address = exit_branch; - } else { - single_branch.address = branch_offset; - } - insert_label(state, branch_offset); - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SYNC: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = true; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRK: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = true; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::KIL: { - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = true; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SSY: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.ssy_labels.emplace(offset, target); - break; - } - case OpCode::Id::PBK: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.pbk_labels.emplace(offset, target); - break; - } - case OpCode::Id::BRX: { - const auto tmp = TrackBranchIndirectInfo(state, offset); - if (!tmp) { - LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); - return {ParseResult::AbnormalFlow, parse_info}; - } - - const auto result = *tmp; - const s32 pc_target = offset + result.relative_position; - std::vector<CaseBranch> branches; - for (u32 i = 0; i < result.entries; i++) { - auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); - if (!key) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *key; - u32 target = static_cast<u32>((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<MultiBranch>( - static_cast<u32>(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } - default: - break; - } - - offset++; - } - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - parse_info.end_address = offset - 1; - parse_info.branch_info = MakeBranchInfo<SingleBranch>( - single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, - single_branch.is_brk, single_branch.ignore); - return {ParseResult::BlockEnd, parse_info}; -} - -bool TryInspectAddress(CFGRebuildState& state) { - if (state.inspect_queries.empty()) { - return false; - } - - const u32 address = state.inspect_queries.front(); - state.inspect_queries.pop_front(); - const auto [result, block_index] = TryGetBlock(state, address); - switch (result) { - case BlockCollision::Found: { - return true; - } - case BlockCollision::Inside: { - // This case is the tricky one: - // We need to split the block into 2 separate blocks - const u32 end = state.block_info[block_index].end; - BlockInfo& new_block = CreateBlockInfo(state, address, end); - BlockInfo& current_block = state.block_info[block_index]; - current_block.end = address - 1; - new_block.branch = std::move(current_block.branch); - BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); - const auto branch = std::get_if<SingleBranch>(forward_branch.get()); - branch->address = address; - branch->ignore = true; - current_block.branch = std::move(forward_branch); - return true; - } - default: - break; - } - const auto [parse_result, parse_info] = ParseCode(state, address); - if (parse_result == ParseResult::AbnormalFlow) { - // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction - return false; - } - - BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info.branch = parse_info.branch_info; - if (std::holds_alternative<SingleBranch>(*block_info.branch)) { - const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); - if (branch->condition.IsUnconditional()) { - return true; - } - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); - return true; - } - return true; -} - -bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, - BlockInfo& block) { - auto gather_start = labels.lower_bound(block.start); - const auto gather_end = labels.upper_bound(block.end); - while (gather_start != gather_end) { - cc.push(gather_start->second); - ++gather_start; - } - }; - if (state.queries.empty()) { - return false; - } - - Query& q = state.queries.front(); - const u32 block_index = state.registered[q.address]; - BlockInfo& block = state.block_info[block_index]; - // If the block is visited, check if the stacks match, else gather the ssy/pbk - // labels into the current stack and look if the branch at the end of the block - // consumes a label. Schedule new queries accordingly - if (block.visited) { - BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); - state.queries.pop_front(); - return all_okay; - } - block.visited = true; - state.stacks.insert_or_assign(q.address, BlockStack{q}); - - Query q2(q); - state.queries.pop_front(); - gather_labels(q2.ssy_stack, state.ssy_labels, block); - gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (std::holds_alternative<SingleBranch>(*block.branch)) { - auto* branch = std::get_if<SingleBranch>(block.branch.get()); - if (!branch->condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } - - auto& conditional_query = state.queries.emplace_back(q2); - if (branch->is_sync) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.ssy_stack.top(); - } - conditional_query.ssy_stack.pop(); - } - if (branch->is_brk) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.pbk_stack.top(); - } - conditional_query.pbk_stack.pop(); - } - conditional_query.address = branch->address; - return true; - } - - const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); - for (const auto& branch_case : multi_branch->branches) { - auto& conditional_query = state.queries.emplace_back(q2); - conditional_query.address = branch_case.address; - } - - return true; -} - -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = [](const Condition& cond) -> Expr { - Expr result; - if (cond.cc != ConditionCode::T) { - result = MakeExpr<ExprCondCode>(cond.cc); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast<u32>(cond.predicate); - bool negate = false; - if (pred > 7) { - negate = true; - pred -= 8; - } - Expr extra = MakeExpr<ExprPredicate>(pred); - if (negate) { - extra = MakeExpr<ExprNot>(std::move(extra)); - } - if (result) { - return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); - } - return extra; - } - if (result) { - return result; - } - return MakeExpr<ExprBoolean>(true); - }; - - if (std::holds_alternative<SingleBranch>(*branch_info)) { - const auto* branch = std::get_if<SingleBranch>(branch_info.get()); - if (branch->address < 0) { - if (branch->kill) { - mm.InsertReturn(get_expr(branch->condition), true); - return; - } - mm.InsertReturn(get_expr(branch->condition), false); - return; - } - mm.InsertGoto(get_expr(branch->condition), branch->address); - return; - } - const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); - for (const auto& branch_case : multi_branch->branches) { - mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), - branch_case.address); - } -} - -void DecompileShader(CFGRebuildState& state) { - state.manager->Init(); - for (auto label : state.labels) { - state.manager->DeclareLabel(label); - } - for (const auto& block : state.block_info) { - if (state.labels.contains(block.start)) { - state.manager->InsertLabel(block.start); - } - const bool ignore = BlockBranchIsIgnored(block.branch); - const u32 end = ignore ? block.end + 1 : block.end; - state.manager->InsertBlock(block.start, end); - if (!ignore) { - InsertBranch(*state.manager, block.branch); - } - } - state.manager->Decompile(); -} - -} // Anonymous namespace - -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry) { - auto result_out = std::make_unique<ShaderCharacteristics>(); - if (settings.depth == CompileDepth::BruteForce) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - - CFGRebuildState state{program_code, start_address, registry}; - // Inspect Code and generate blocks - state.labels.clear(); - state.labels.emplace(start_address); - state.inspect_queries.push_back(state.start); - while (!state.inspect_queries.empty()) { - if (!TryInspectAddress(state)) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - } - - bool use_flow_stack = true; - - bool decompiled = false; - - if (settings.depth != CompileDepth::FlowStack) { - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; - } - } - } - - use_flow_stack = !decompiled; - - // Sort and organize results - std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - if (decompiled && settings.depth != CompileDepth::NoFlowStack) { - ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, - settings.disable_else_derivation}; - state.manager = &manager; - DecompileShader(state); - decompiled = state.manager->IsFullyDecompiled(); - if (!decompiled) { - if (settings.depth == CompileDepth::FullDecompile) { - LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); - } else { - LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); - } - state.manager->ShowCurrentState("Of Shader"); - state.manager->Clear(); - } else { - auto characteristics = std::make_unique<ShaderCharacteristics>(); - characteristics->start = start_address; - characteristics->settings.depth = settings.depth; - characteristics->manager = std::move(manager); - characteristics->end = state.block_info.back().end + 1; - return characteristics; - } - } - - result_out->start = start_address; - result_out->settings.depth = - use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out->blocks.clear(); - for (auto& block : state.block_info) { - ShaderBlock new_block{}; - new_block.start = block.start; - new_block.end = block.end; - new_block.ignore_branch = BlockBranchIsIgnored(block.branch); - if (!new_block.ignore_branch) { - new_block.branch = block.branch; - } - result_out->end = std::max(result_out->end, block.end); - result_out->blocks.push_back(new_block); - } - if (!use_flow_stack) { - result_out->labels = std::move(state.labels); - return result_out; - } - - auto back = result_out->blocks.begin(); - auto next = std::next(back); - while (next != result_out->blocks.end()) { - if (!state.labels.contains(next->start) && next->start == back->end + 1) { - back->end = next->end; - next = result_out->blocks.erase(next); - continue; - } - back = next; - ++next; - } - - return result_out; -} -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <list> -#include <optional> -#include <set> -#include <variant> - -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -constexpr s32 exit_branch = -1; - -struct Condition { - Pred predicate{Pred::UnusedIndex}; - ConditionCode cc{ConditionCode::T}; - - bool IsUnconditional() const { - return predicate == Pred::UnusedIndex && cc == ConditionCode::T; - } - - bool operator==(const Condition& other) const { - return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); - } - - bool operator!=(const Condition& other) const { - return !operator==(other); - } -}; - -class SingleBranch { -public: - SingleBranch() = default; - explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, - bool is_brk_, bool ignore_) - : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, - ignore{ignore_} {} - - bool operator==(const SingleBranch& b) const { - return std::tie(condition, address, kill, is_sync, is_brk, ignore) == - std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); - } - - bool operator!=(const SingleBranch& b) const { - return !operator==(b); - } - - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; - -struct CaseBranch { - explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} - u32 cmp_value; - u32 address; -}; - -class MultiBranch { -public: - explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) - : gpr{gpr_}, branches{std::move(branches_)} {} - - u32 gpr{}; - std::vector<CaseBranch> branches{}; -}; - -using BranchData = std::variant<SingleBranch, MultiBranch>; -using BlockBranchInfo = std::shared_ptr<BranchData>; - -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); - -struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; - BlockBranchInfo branch{}; - - bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch) == - std::tie(sb.start, sb.end, sb.ignore_branch) && - BlockBranchInfoAreEqual(branch, sb.branch); - } - - bool operator!=(const ShaderBlock& sb) const { - return !operator==(sb); - } -}; - -struct ShaderCharacteristics { - std::list<ShaderBlock> blocks{}; - std::set<u32> labels{}; - u32 start{}; - u32 end{}; - ASTManager manager{true, true}; - CompilerSettings settings{}; -}; - -std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> -#include <limits> -#include <set> - -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { - -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list<SamplerEntry>& used_samplers) { - if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector<u32> bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - ++count; - bound_offsets.emplace_back(sampler.offset); - } - if (count > 1) { - gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - -std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, - VideoCore::GuestDriverProfile& gpu_driver, - const std::list<SamplerEntry>& used_samplers) { - const u32 base_offset = sampler_to_deduce.offset; - u32 max_offset{std::numeric_limits<u32>::max()}; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - if (sampler.offset > base_offset) { - max_offset = std::min(sampler.offset, max_offset); - } - } - if (max_offset == std::numeric_limits<u32>::max()) { - return std::nullopt; - } - return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); -} - -} // Anonymous namespace - -class ASTDecoder { -public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) {} - - void operator()(ASTVarSet& ast) {} - - void operator()(ASTLabel& ast) {} - - void operator()(ASTGoto& ast) {} - - void operator()(ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTReturn& ast) {} - - void operator()(ASTBreak& ast) {} - - void Visit(ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - if (node->IsBlockEncoded()) { - auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); - NodeBlock bb = ir.DecodeRange(block->start, block->end); - node->TransformBlockEncoded(std::move(bb)); - } - } - -private: - ShaderIR& ir; -}; - -void ShaderIR::Decode() { - std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - - decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast<u32>(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast<u32>(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast<u32>(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } -} - -NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - NodeBlock basic_block; - DecodeRangeInner(basic_block, begin, end); - return basic_block; -} - -void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(bb, pc); - } -} - -void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { - Node result = n; - if (cond.cc != ConditionCode::T) { - result = Conditional(GetConditionCode(cond.cc), {result}); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast<u32>(cond.predicate); - const bool is_neg = pred > 7; - if (is_neg) { - pred -= 8; - } - result = Conditional(GetPredicate(pred, is_neg), {result}); - } - return result; - }; - if (std::holds_alternative<SingleBranch>(*block.branch)) { - auto branch = std::get_if<SingleBranch>(block.branch.get()); - if (branch->address < 0) { - if (branch->kill) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Branch, Immediate(branch->address)); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); - Node op_a = GetRegister(multi_branch->gpr); - for (auto& branch_case : multi_branch->branches) { - Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); - Node op_b = Immediate(branch_case.cmp_value); - Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); - auto result = Conditional(condition, {n}); - bb.push_back(result); - global_code.push_back(result); - } -} - -u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { - // Ignore sched instructions when generating code. - if (IsSchedInstruction(pc, main_offset)) { - return pc + 1; - } - - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const u32 nv_address = ConvertAddressToNvidiaSpace(pc); - - // Decoding failure - if (!opcode) { - UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); - bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", - nv_address, instr.value))); - return pc + 1; - } - - bb.push_back(Comment( - fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); - - using Tegra::Shader::Pred; - UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, - "NeverExecute predicate not implemented"); - - static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, - {OpCode::Type::Image, &ShaderIR::DecodeImage}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; - - std::vector<Node> tmp_block; - if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, pc); - } else { - pc = DecodeOther(tmp_block, pc); - } - - // Some instructions (like SSY) don't have a predicate field, they are always unconditionally - // executed. - const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); - const auto pred_index = static_cast<u32>(instr.pred.pred_index); - - if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { - const Node conditional = - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); - global_code.push_back(conditional); - bb.push_back(conditional); - } else { - for (auto& node : tmp_block) { - global_code.push_back(node); - bb.push_back(node); - } - } - - return pc + 1; -} - -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto gpu_driver = registry.AccessGuestDriverProfile(); - DeduceTextureHandlerSize(gpu_driver, used_samplers); - // Deduce Indexed Samplers - if (!uses_indexed_samplers) { - return; - } - for (auto& sampler : used_samplers) { - if (!sampler.is_indexed) { - continue; - } - if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { - sampler.size = *size; - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.size = 1; - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::SubOp; - -u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - - Node op_b = [&] { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV_C: - case OpCode::Id::MOV_R: { - // MOV does not have neither 'abs' nor 'neg' bits. - SetRegister(bb, instr.gpr0, op_b); - break; - } - case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: - case OpCode::Id::FMUL_IMM: { - // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - if (instr.fmul.tab5cb8_2 != 0) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - } - if (instr.fmul.tab5c68_0 != 1) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); - } - - op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - - static constexpr std::array FmulPostFactor = { - 1.000f, // None - 0.500f, // Divide 2 - 0.250f, // Divide 4 - 0.125f, // Divide 8 - 8.000f, // Mul 8 - 4.000f, // Mul 4 - 2.000f, // Mul 2 - }; - - if (instr.fmul.postfactor != 0) { - op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, - Immediate(FmulPostFactor[instr.fmul.postfactor])); - } - - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); - - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: - case OpCode::Id::FADD_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::MUFU: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - - Node value = [&]() { - switch (instr.sub_op) { - case SubOp::Cos: - return Operation(OperationCode::FCos, PRECISE, op_a); - case SubOp::Sin: - return Operation(OperationCode::FSin, PRECISE, op_a); - case SubOp::Ex2: - return Operation(OperationCode::FExp2, PRECISE, op_a); - case SubOp::Lg2: - return Operation(OperationCode::FLog2, PRECISE, op_a); - case SubOp::Rcp: - return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); - case SubOp::Rsq: - return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); - case SubOp::Sqrt: - return Operation(OperationCode::FSqrt, PRECISE, op_a); - default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); - return Immediate(0); - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FMNMX_C: - case OpCode::Id::FMNMX_R: - case OpCode::Id::FMNMX_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); - - const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); - const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: - case OpCode::Id::FCMP_IMMR: { - UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); - Node op_c = GetRegister(instr.gpr39); - Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); - break; - } - case OpCode::Id::RRO_C: - case OpCode::Id::RRO_R: - case OpCode::Id::RRO_IMM: { - LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); - - // Currently RRO is only implemented as a register move. - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - SetRegister(bb, instr.gpr0, op_b); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - bool negate_a = false; - bool negate_b = false; - bool absolute_a = false; - bool absolute_b = false; - - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_R: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HADD2_C: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 56) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - case OpCode::Id::HMUL2_R: - negate_a = ((instr.value >> 43) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HMUL2_C: - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - default: - UNREACHABLE(); - break; - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - - auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HMUL2_C: - return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HADD2_R: - case OpCode::Id::HMUL2_R: - return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; - default: - UNREACHABLE(); - return {HalfType::F32, Immediate(0)}; - } - }(); - op_b = UnpackHalfFloat(op_b, type_b); - op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - - Node value = [this, opcode, op_a, op_b = op_b] { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_C: - case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); - return Immediate(0); - } - }(); - value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); - - const Node op_b = UnpackHalfImmediate(instr, true); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); - SetRegister(bb, instr.gpr0, value); - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV32_IMM: { - SetRegister(bb, instr.gpr0, GetImmediate32(instr)); - break; - } - case OpCode::Id::FMUL32_IMM: { - Node value = - Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); - value = GetSaturatedFloat(value, instr.fmul32.saturate); - - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD32I: { - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, - instr.fadd32i.negate_a); - const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, - instr.fadd32i.negate_b); - - const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::IAdd3Height; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD_C: - case OpCode::Id::IADD_R: - case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); - UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - Node value = Operation(OperationCode::UAdd, op_a, op_b); - - if (instr.iadd.x) { - Node carry = GetInternalFlag(InternalFlag::Carry); - Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); - } - - if (instr.generates_cc) { - const Node i0 = Immediate(0); - - Node zero = Operation(OperationCode::LogicalIEqual, value, i0); - Node sign = Operation(OperationCode::LogicalILessThan, value, i0); - Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); - - Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); - Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); - Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); - Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); - - SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); - SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); - SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); - SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::IADD3_C: - case OpCode::Id::IADD3_R: - case OpCode::Id::IADD3_IMM: { - Node op_c = GetRegister(instr.gpr39); - - const auto ApplyHeight = [&](IAdd3Height height, Node value) { - switch (height) { - case IAdd3Height::None: - return value; - case IAdd3Height::LowerHalfWord: - return BitfieldExtract(value, 0, 16); - case IAdd3Height::UpperHalfWord: - return BitfieldExtract(value, 16, 16); - default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); - return Immediate(0); - } - }; - - if (opcode->get().GetId() == OpCode::Id::IADD3_R) { - op_a = ApplyHeight(instr.iadd3.height_a, op_a); - op_b = ApplyHeight(instr.iadd3.height_b, op_b); - op_c = ApplyHeight(instr.iadd3.height_c, op_c); - } - - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); - op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - - const Node value = [&] { - Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); - if (opcode->get().GetId() != OpCode::Id::IADD3_R) { - return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); - } - const Node shifted = [&] { - switch (instr.iadd3.mode) { - case Tegra::Shader::IAdd3Mode::RightShift: - // TODO(tech4me): According to - // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 - // The addition between op_a and op_b should be done in uint33, more - // investigation required - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, - Immediate(16)); - case Tegra::Shader::IAdd3Mode::LeftShift: - return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, - Immediate(16)); - default: - return add_ab; - } - }(); - return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); - }(); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ISCADD_C: - case OpCode::Id::ISCADD_R: - case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); - const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::POPC_C: - case OpCode::Id::POPC_R: - case OpCode::Id::POPC_IMM: { - if (instr.popc.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - } - const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FLO_R: - case OpCode::Id::FLO_C: - case OpCode::Id::FLO_IMM: { - Node value; - if (instr.flo.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.is_signed) { - value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); - } else { - value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.sh) { - value = - Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::SEL_C: - case OpCode::Id::SEL_R: - case OpCode::Id::SEL_IMM: { - const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); - const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ICMP_CR: - case OpCode::Id::ICMP_R: - case OpCode::Id::ICMP_RC: - case OpCode::Id::ICMP_IMM: { - const Node zero = Immediate(0); - - const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_R: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::ICMP_IMM: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {zero, zero}; - } - }(); - const Node op_lhs = GetRegister(instr.gpr8); - const Node comparison = - GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); - break; - } - case OpCode::Id::LOP_C: - case OpCode::Id::LOP_R: - case OpCode::Id::LOP_IMM: { - if (instr.alu.lop.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); - if (instr.alu.lop.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, - instr.generates_cc); - break; - } - case OpCode::Id::LOP3_C: - case OpCode::Id::LOP3_R: - case OpCode::Id::LOP3_IMM: { - const Node op_c = GetRegister(instr.gpr39); - const Node lut = [&]() { - if (opcode->get().GetId() == OpCode::Id::LOP3_R) { - return Immediate(instr.alu.lop3.GetImmLut28()); - } else { - return Immediate(instr.alu.lop3.GetImmLut48()); - } - }(); - - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); - break; - } - case OpCode::Id::IMNMX_C: - case OpCode::Id::IMNMX_R: - case OpCode::Id::IMNMX_IMM: { - UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - - const bool is_signed = instr.imnmx.is_signed; - - const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); - const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::LEA_R2: - case OpCode::Id::LEA_R1: - case OpCode::Id::LEA_IMM: - case OpCode::Id::LEA_RZ: - case OpCode::Id::LEA_HI: { - auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::LEA_R2: { - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), - Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; - } - case OpCode::Id::LEA_R1: { - const bool neg = instr.lea.r1.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - GetRegister(instr.gpr20), - Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; - } - case OpCode::Id::LEA_IMM: { - const bool neg = instr.lea.imm.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast<u32>(instr.lea.imm.entry_a)), - Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; - } - case OpCode::Id::LEA_RZ: { - const bool neg = instr.lea.rz.neg != 0; - return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; - } - case OpCode::Id::LEA_HI: - default: - UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); - - return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), - Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), - "Unhandled LEA Predicate"); - - Node value = - Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); - value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); - SetRegister(bb, instr.gpr0, std::move(value)); - - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut, bool sets_cc) { - const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { - Node value = Immediate(0); - const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); - if (imm.GetValue() & 0x01) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x02) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x04) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x08) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x10) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x20) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x40) { - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x80) { - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - return value; - }(op_a, op_b, op_c, imm_lut); - - SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); - SetRegister(bb, dest, lop3_fast); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::LogicOperation; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredicateResultMode; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - - op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - - Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) { - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); - } - - if (instr.alu.lop32i.invert_b) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), - std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, - instr.op_32.generates_cc != 0); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, - Node op_b, PredicateResultMode predicate_mode, Pred predicate, - bool sets_cc) { - Node result = [&] { - switch (logic_op) { - case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::PassB: - return op_b; - default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); - return Immediate(0); - } - }(); - - SetInternalFlagsFromInteger(bb, result, sets_cc); - SetRegister(bb, dest, result); - - // Write the predicate value depending on the predicate mode. - switch (predicate_mode) { - case PredicateResultMode::None: - // Do nothing. - return; - case PredicateResultMode::NotZero: { - // Set the predicate to true if the result is not zero. - Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); - SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_R: - return GetRegister(instr.gpr20); - case OpCode::Id::BFE_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::BFE_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - - const bool is_signed = instr.bfe.is_signed; - - // using reverse parallel method in - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - // note for later if possible to implement faster method. - if (instr.bfe.brev) { - const auto swap = [&](u32 s, u32 mask) { - Node v1 = - SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); - if (mask != 0) { - v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), - Immediate(mask)); - } - Node v2 = op_a; - if (mask != 0) { - v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), - Immediate(mask)); - } - v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), - Immediate(s)); - return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), - std::move(v2)); - }; - op_a = swap(1, 0x55555555U); - op_a = swap(2, 0x33333333U); - op_a = swap(4, 0x0F0F0F0FU); - op_a = swap(8, 0x00FF00FFU); - op_a = swap(16, 0); - } - - const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(0), Immediate(8)); - const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(8), Immediate(8)); - auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); - SetRegister(bb, instr.gpr0, std::move(result)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::BFI_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::BFI_IMM_R: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {Immediate(0), Immediate(0)}; - } - }(); - const Node insert = GetRegister(instr.gpr8); - const Node offset = BitfieldExtract(packed_shift, 0, 8); - const Node bits = BitfieldExtract(packed_shift, 8, 8); - - const Node value = - Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <limits> -#include <optional> -#include <utility> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; - -namespace { - -constexpr OperationCode GetFloatSelector(u64 selector) { - return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; -} - -constexpr u32 SizeInBits(Register::Size size) { - switch (size) { - case Register::Size::Byte: - return 8; - case Register::Size::Short: - return 16; - case Register::Size::Word: - return 32; - case Register::Size::Long: - return 64; - } - return 0; -} - -constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, - Register::Size dst_size, - bool src_signed, - bool dst_signed) { - const u32 dst_bits = SizeInBits(dst_size); - if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { - if (src_signed == dst_signed) { - return std::nullopt; - } - return std::make_pair(0, std::numeric_limits<s32>::max()); - } - if (dst_signed) { - // Signed destination, clamp to [-128, 127] for instance - return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); - } else { - // Unsigned destination - if (dst_bits == 32) { - // Avoid shifting by 32, that is undefined behavior - return std::make_pair(0, s32(std::numeric_limits<u32>::max())); - } - return std::make_pair(0, (1 << dst_bits) - 1); - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - case OpCode::Id::I2I_C: - case OpCode::Id::I2I_IMM: { - const bool src_signed = instr.conversion.is_input_signed; - const bool dst_signed = instr.conversion.is_output_signed; - const Register::Size src_size = instr.conversion.src_size; - const Register::Size dst_size = instr.conversion.dst_size; - const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); - - Node value = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2I_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - // Ensure the source selector is valid - switch (instr.conversion.src_size) { - case Register::Size::Byte: - break; - case Register::Size::Short: - ASSERT(selector == 0 || selector == 2); - break; - default: - ASSERT(selector == 0); - break; - } - - if (src_size != Register::Size::Word || selector != 0) { - value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), - Immediate(selector * 8), Immediate(SizeInBits(src_size))); - } - - value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, - instr.conversion.negate_a, src_signed); - - if (instr.alu.saturate_d) { - if (src_signed && !dst_signed) { - Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, - Immediate(1 << (SizeInBits(src_size) - 1))); - value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), - std::move(value)); - - // Simplify generated expressions, this can be removed without semantic impact - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - - if (dst_size != Register::Size::Word) { - const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); - Node is_large = - Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); - value = Operation(OperationCode::Select, std::move(is_large), limit, - std::move(value)); - } - } else if (const std::optional bounds = - IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { - value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), - Immediate(bounds->first)); - value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), - Immediate(bounds->second)); - } - } else if (dst_size != Register::Size::Word) { - // No saturation, we only have to mask the result - Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); - value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: - case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - Node value = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2F_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const bool input_signed = instr.conversion.is_input_signed; - - if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { - ASSERT(instr.conversion.src_size == Register::Size::Byte || - instr.conversion.src_size == Register::Size::Short); - if (instr.conversion.src_size == Register::Size::Short) { - ASSERT(offset == 0 || offset == 2); - } - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset * 8)); - } - - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); - value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); - value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: - case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2F_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&] { - if (instr.conversion.src_size != instr.conversion.dst_size) { - // Rounding operations only matter when the source and destination conversion size - // is the same. - return value; - } - switch (instr.conversion.f2f.GetRoundingMode()) { - case Tegra::Shader::F2fRoundingOp::None: - return value; - case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, value); - case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, value); - case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, value); - case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - instr.conversion.f2f.rounding.Value()); - return value; - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: - case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2I_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&]() { - switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::RoundEven: - return Operation(OperationCode::FRoundEven, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - instr.conversion.f2i.rounding.Value()); - return Immediate(0); - } - }(); - const bool is_signed = instr.conversion.is_output_signed; - value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); - - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - if (instr.ffma.tab5980_0 != 1) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); - } - if (instr.ffma.tab5980_1 != 0) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - } - - const Node op_a = GetRegister(instr.gpr8); - - auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - } - case OpCode::Id::FFMA_RR: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::FFMA_RC: { - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - } - case OpCode::Id::FFMA_IMM: - return {GetImmediate19(instr), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); - return {Immediate(0), Immediate(0)}; - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); - op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); - - Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, - instr.fset.neg_a != 0); - - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); - - // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the - // condition is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fset.op); - const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.fset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node predicate = - GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); - const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); - const Node value = Operation(combiner, predicate, second_pred); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.fsetp.pred3, value); - - if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - const Node second_value = Operation(combiner, negated_pred, second_pred); - SetPredicate(bb, instr.fsetp.pred0, second_value); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - PredCondition cond{}; - bool bf = false; - bool ftz = false; - bool neg_a = false; - bool abs_a = false; - bool neg_b = false; - bool abs_b = false; - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - case OpCode::Id::HSET2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - bf = instr.Bit(53); - ftz = instr.Bit(54); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(56); - abs_b = instr.Bit(54); - break; - case OpCode::Id::HSET2_R: - cond = instr.hsetp2.reg.cond; - bf = instr.Bit(49); - ftz = instr.Bit(50); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(31); - abs_b = instr.Bit(30); - break; - default: - UNREACHABLE(); - } - - Node op_b = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - // Inform as unimplemented as this is not tested. - UNIMPLEMENTED_MSG("HSET2_C is not implemented"); - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::HSET2_R: - return GetRegister(instr.gpr20); - case OpCode::Id::HSET2_IMM: - return UnpackHalfImmediate(instr, true); - default: - UNREACHABLE(); - return Node{}; - } - }(); - - if (!ftz) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); - - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_R: - op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); - [[fallthrough]]; - case OpCode::Id::HSET2_C: - op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); - break; - default: - break; - } - - Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - - Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); - - // HSET2 operates on each half float in the pack. - std::array<Node, 2> values; - for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = bf ? 0x3c00 : 0xffff; - Node true_value = Immediate(raw_value << (i * 16)); - Node false_value = Immediate(0); - - Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - Node predicate = Operation(combiner, comparison, second_pred); - values[i] = - Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); - } - - Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); - SetRegister(bb, instr.gpr0, move(value)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (instr.hsetp2.ftz != 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - - Tegra::Shader::PredCondition cond{}; - bool h_and{}; - Node op_b{}; - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_C: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); - // F32 is hardcoded in hardware - op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); - break; - case OpCode::Id::HSETP2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = UnpackHalfImmediate(instr, true); - break; - case OpCode::Id::HSETP2_R: - cond = instr.hsetp2.reg.cond; - h_and = instr.hsetp2.reg.h_and; - op_b = - GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), - instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); - break; - default: - UNREACHABLE(); - op_b = Immediate(0); - } - - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - - const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); - }; - - const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred3; - const u64 second = instr.hsetp2.pred0; - if (h_and) { - Node joined = Operation(OperationCode::LogicalAnd2, comparison); - Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); - } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <tuple> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfPrecision; -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); - } else { - DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); - } - - constexpr auto identity = HalfType::H0_H1; - bool neg_b{}, neg_c{}; - auto [saturate, type_b, op_b, type_c, - op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::HFMA2_CR: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, HalfType::F32, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_RC: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HFMA2_RR: - neg_b = instr.hfma2.rr.negate_b; - neg_c = instr.hfma2.rr.negate_c; - return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), - instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_IMM_R: - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - default: - return {false, identity, Immediate(0), identity, Immediate(0)}; - } - }(); - - const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); - op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); - op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - - Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedHalfFloat(value, saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <vector> -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/textures/texture.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; -using Tegra::Shader::StoreType; -using Tegra::Texture::ComponentType; -using Tegra::Texture::TextureFormat; -using Tegra::Texture::TICEntry; - -namespace { - -ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, - std::size_t component) { - const TextureFormat format{descriptor.format}; - switch (format) { - case TextureFormat::R16G16B16A16: - case TextureFormat::R32G32B32A32: - case TextureFormat::R32G32B32: - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.b_type; - } - if (component == 3) { - return descriptor.a_type; - } - break; - case TextureFormat::A8R8G8B8: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.r_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.b_type; - } - break; - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.r_type; - } - break; - case TextureFormat::R32_B24G8: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - break; - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - if (component == 0) { - return descriptor.b_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.r_type; - } - break; - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - if (component == 0) { - return descriptor.g_type; - } - if (component == 1) { - return descriptor.r_type; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return ComponentType::FLOAT; -} - -bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array<u8, 16> mask = { - 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), - (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask)}.test(component); -} - -u32 GetComponentSize(TextureFormat format, std::size_t component) { - switch (format) { - case TextureFormat::R32G32B32A32: - return 32; - case TextureFormat::R16G16B16A16: - return 16; - case TextureFormat::R32G32B32: - return component <= 2 ? 32 : 0; - case TextureFormat::R32G32: - return component <= 1 ? 32 : 0; - case TextureFormat::R16G16: - return component <= 1 ? 16 : 0; - case TextureFormat::R32: - return component == 0 ? 32 : 0; - case TextureFormat::R16: - return component == 0 ? 16 : 0; - case TextureFormat::R8: - return component == 0 ? 8 : 0; - case TextureFormat::R1: - return component == 0 ? 1 : 0; - case TextureFormat::A8R8G8B8: - return 8; - case TextureFormat::A2B10G10R10: - return (component == 3 || component == 2 || component == 1) ? 10 : 2; - case TextureFormat::A4B4G4R4: - return 4; - case TextureFormat::A5B5G5R1: - return (component == 0 || component == 1 || component == 2) ? 5 : 1; - case TextureFormat::A1B5G5R5: - return (component == 1 || component == 2 || component == 3) ? 5 : 1; - case TextureFormat::R32_B24G8: - if (component == 0) { - return 32; - } - if (component == 1) { - return 24; - } - if (component == 2) { - return 8; - } - return 0; - case TextureFormat::B5G6R5: - if (component == 0 || component == 2) { - return 5; - } - if (component == 1) { - return 6; - } - return 0; - case TextureFormat::B6G5R5: - if (component == 1 || component == 2) { - return 5; - } - if (component == 0) { - return 6; - } - return 0; - case TextureFormat::B10G11R11: - if (component == 1 || component == 2) { - return 11; - } - if (component == 0) { - return 10; - } - return 0; - case TextureFormat::R24G8: - if (component == 0) { - return 8; - } - if (component == 1) { - return 24; - } - return 0; - case TextureFormat::R8G24: - if (component == 0) { - return 24; - } - if (component == 1) { - return 8; - } - return 0; - case TextureFormat::R8G8: - return (component == 0 || component == 1) ? 8 : 0; - case TextureFormat::G4R4: - return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; - } -} - -std::size_t GetImageComponentMask(TextureFormat format) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - switch (format) { - case TextureFormat::R32G32B32A32: - case TextureFormat::R16G16B16A16: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - return std::size_t{R | G | B | A}; - case TextureFormat::R32G32B32: - case TextureFormat::R32_B24G8: - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - return std::size_t{R | G | B}; - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - return std::size_t{R | G}; - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; - } -} - -std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - case Tegra::Shader::ImageType::TextureBuffer: - return 1; - case Tegra::Shader::ImageType::Texture1DArray: - case Tegra::Shader::ImageType::Texture2D: - return 2; - case Tegra::Shader::ImageType::Texture2DArray: - case Tegra::Shader::ImageType::Texture3D: - return 3; - } - UNREACHABLE(); - return 1; -} -} // Anonymous namespace - -std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, - Node original_value) { - switch (component_type) { - case ComponentType::SNORM: { - // range [-1.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); - cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); - return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; - } - case ComponentType::SINT: - case ComponentType::UNORM: { - bool is_signed = component_type == ComponentType::SINT; - // range [0.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast<float>(1 << component_size) - 1.f)); - return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), - is_signed}; - } - case ComponentType::UINT: // range [0, (1 << component_size) - 1] - return {std::move(original_value), false}; - case ComponentType::FLOAT: - if (component_size == 16) { - return {Operation(OperationCode::HCastFloat, original_value), true}; - } else { - return {std::move(original_value), true}; - } - default: - UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); - return {std::move(original_value), true}; - } -} - -u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { - std::vector<Node> coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; - coords.reserve(num_coords); - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - return coords; - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkRead(); - - if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; - } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && - instr.suldst.GetStoreDataLayout() != StoreType::Bits64); - - auto descriptor = [this, instr] { - std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; - if (instr.suldst.is_immediate) { - sampler_descriptor = - registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); - } else { - const Node image_register = GetRegister(instr.gpr39); - const auto result = TrackCbuf(image_register, global_code, - static_cast<s64>(global_code.size())); - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); - } - if (!sampler_descriptor) { - UNREACHABLE_MSG("Failed to obtain image descriptor"); - } - return *sampler_descriptor; - }(); - - const auto comp_mask = GetImageComponentMask(descriptor.format); - - switch (instr.suldst.GetStoreDataLayout()) { - case StoreType::Bits32: - case StoreType::Bits64: { - u32 indexer = 0; - u32 shifted_counter = 0; - Node value = Immediate(0); - for (u32 element = 0; element < 4; ++element) { - if (!IsComponentEnabled(comp_mask, element)) { - continue; - } - const auto component_type = GetComponentType(descriptor, element); - const auto component_size = GetComponentSize(descriptor.format, element); - MetaImage meta{image, {}, element}; - - auto [converted_value, is_signed] = GetComponentValue( - component_type, component_size, - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); - - // shift element to correct position - const auto shifted = shifted_counter; - if (shifted > 0) { - converted_value = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, - std::move(converted_value), Immediate(shifted)); - } - shifted_counter += component_size; - - // add value into result - value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); - - // if we shifted enough for 1 byte -> we save it into temp - if (shifted_counter >= 32) { - SetTemporary(bb, indexer++, std::move(value)); - // reset counter and value to prepare pack next byte - value = Immediate(0); - shifted_counter = 0; - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNREACHABLE(); - break; - } - } - break; - } - case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA - - std::vector<Node> values; - constexpr std::size_t hardcoded_size{4}; - for (std::size_t i = 0; i < hardcoded_size; ++i) { - values.push_back(GetRegister(instr.gpr0.Value() + i)); - } - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkWrite(); - - MetaImage meta{image, std::move(values)}; - bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); - break; - } - case OpCode::Id::SUATOM: { - UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - - const OperationCode operation_code = [instr] { - switch (instr.suatom_d.operation_type) { - case Tegra::Shader::ImageAtomicOperationType::S32: - case Tegra::Shader::ImageAtomicOperationType::U32: - switch (instr.suatom_d.operation) { - case Tegra::Shader::ImageAtomicOperation::Add: - return OperationCode::AtomicImageAdd; - case Tegra::Shader::ImageAtomicOperation::And: - return OperationCode::AtomicImageAnd; - case Tegra::Shader::ImageAtomicOperation::Or: - return OperationCode::AtomicImageOr; - case Tegra::Shader::ImageAtomicOperation::Xor: - return OperationCode::AtomicImageXor; - case Tegra::Shader::ImageAtomicOperation::Exch: - return OperationCode::AtomicImageExchange; - default: - break; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", - static_cast<u64>(instr.suatom_d.operation.Value()), - static_cast<u64>(instr.suatom_d.operation_type.Value())); - return OperationCode::AtomicImageAdd; - }(); - - Node value = GetRegister(instr.gpr0); - - const auto type = instr.suatom_d.image_type; - auto& image = GetImage(instr.image, type); - image.MarkAtomic(); - - MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset = static_cast<u32>(image.index.Value()); - - const auto it = - std::find_if(std::begin(used_images), std::end(used_images), - [offset](const ImageEntry& entry) { return entry.offset == offset; }); - if (it != std::end(used_images)) { - ASSERT(!it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast<u32>(used_images.size()); - return used_images.emplace_back(next_index, offset, type); -} - -ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register = GetRegister(reg); - const auto result = - TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); - - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const ImageEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != std::end(used_images)) { - ASSERT(it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast<u32>(used_images.size()); - return used_images.emplace_back(next_index, offset, buffer, type); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition - // is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); - const Node first_pred = - GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.iset.op); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // We can't use the constant predicate as destination. - ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); - const Node predicate = - GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); - const Node value = Operation(combiner, predicate, second_pred); - SetPredicate(bb, instr.isetp.pred3, value); - - if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <utility> -#include <vector> - -#include <fmt/format.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::AtomicOp; -using Tegra::Shader::AtomicType; -using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::StoreType; - -namespace { - -OperationCode GetAtomOperation(AtomicOp op) { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", op); - return OperationCode::AtomicIAdd; - } -} - -bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { - return uniform_type == Tegra::Shader::UniformType::UnsignedByte || - uniform_type == Tegra::Shader::UniformType::UnsignedShort; -} - -u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 0b11; - case Tegra::Shader::UniformType::UnsignedShort: - return 0b10; - default: - UNREACHABLE(); - return 0; - } -} - -u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 8; - case Tegra::Shader::UniformType::UnsignedShort: - return 16; - case Tegra::Shader::UniformType::Single: - return 32; - case Tegra::Shader::UniformType::Double: - return 64; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 128; - default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); - return 32; - } -} - -Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); -} - -Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), - Immediate(size)); -} - -Node Sign16Extend(Node value) { - Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); - Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::LD_A: { - // Note: Shouldn't this be interp mode flat? As in no interpolation made. - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && - instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, - "Non-32 bits PHYS reads are not implemented"); - - const Node buffer{GetRegister(instr.gpr39)}; - - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); - - const auto LoadNextElement = [&](u32 reg_offset) { - const Node attribute{instr.attribute.fmt20.IsPhysical() - ? GetPhysicalInputAttribute(instr.gpr8, buffer) - : GetInputAttribute(static_cast<Attribute::Index>(next_index), - next_element, buffer)}; - - SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); - - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - LoadNextElement(reg_offset); - } - break; - } - case OpCode::Id::LD_C: { - UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - - Node index = GetRegister(instr.gpr8); - - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - - switch (instr.ld_c.type.Value()) { - case Tegra::Shader::UniformType::Single: - SetRegister(bb, instr.gpr0, op_a); - break; - - case Tegra::Shader::UniformType::Double: { - const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - - SetTemporary(bb, 0, op_a); - SetTemporary(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); - } - break; - } - case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); - [[fallthrough]]; - case OpCode::Id::LD_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - }; - const auto GetMemory = [&](s32 offset) { - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) - : GetLocalMemory(GetAddress(offset)); - }; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: - SetRegister(bb, instr.gpr0, - Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); - break; - case StoreType::Bits32: - case StoreType::Bits64: - case StoreType::Bits128: { - const u32 count = [&] { - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits32: - return 1; - case StoreType::Bits64: - return 2; - case StoreType::Bits128: - return 4; - default: - UNREACHABLE(); - return 0; - } - }(); - for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i, GetMemory(i * 4)); - } - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::LD: - case OpCode::Id::LDG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::LD: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); - return instr.generic.type; - case OpCode::Id::LDG: - return instr.ldg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, false); - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - if (!real_address_base || !base_address) { - // Tracking failed, load zeroes. - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); - } - break; - } - - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - - // To handle unaligned loads get the bytes used to dereference global memory and extract - // those bytes from the loaded u32. - if (IsUnaligned(type)) { - gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); - } - - SetTemporary(bb, i, gmem); - } - - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::ST_A: { - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - - u64 element = instr.attribute.fmt20.element; - auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); - - const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - Node dest; - if (instr.attribute.fmt20.patch) { - const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); - dest = MakeNode<PatchNode>(offset); - } else { - dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, - GetRegister(instr.gpr39)); - } - const auto src = GetRegister(instr.gpr0.Value() + reg_offset); - - bb.push_back(Operation(OperationCode::Assign, dest, src)); - - // Load the next attribute element into the following register. If the element to load - // goes beyond the vec4 size, load the first element of the next attribute. - element = (element + 1) % 4; - index = index + (element == 0 ? 1 : 0); - } - break; - } - case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); - [[fallthrough]]; - case OpCode::Id::ST_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); - }; - - const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; - const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; - const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits128: - (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); - (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); - [[fallthrough]]; - case StoreType::Bits64: - (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); - [[fallthrough]]; - case StoreType::Bits32: - (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); - break; - case StoreType::Unsigned16: - case StoreType::Signed16: { - Node address = GetAddress(0); - Node memory = (this->*get_memory)(address); - (this->*set_memory)( - bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); - break; - } - default: - UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::ST: - case OpCode::Id::STG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::ST: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); - return instr.generic.type; - case OpCode::Id::STG: - return instr.stg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - // For unaligned reads we have to read memory too. - const bool is_read = IsUnaligned(type); - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, is_read, true); - if (!real_address_base || !base_address) { - // Tracking failed, skip the store. - break; - } - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0.Value() + i); - - if (IsUnaligned(type)) { - const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, move(value), real_address, mask, size); - } - - bb.push_back(Operation(OperationCode::Assign, gmem, value)); - } - break; - } - case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - instr.red.type.Value()); - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); - break; - } - case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || - instr.atom.operation == AtomicOp::Dec || - instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", instr.atom.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64 || - instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || - instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", instr.atom.type.Value()); - - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - - const bool is_signed = - instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; - Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || - instr.atoms.operation == AtomicOp::Dec, - "operation={}", instr.atoms.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || - instr.atoms.type == AtomicType::U64, - "type={}", instr.atoms.type.Value()); - const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; - const s32 offset = instr.atoms.GetImmediateOffset(); - Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, - GetSharedMemory(move(address)), GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::AL2P: { - // Ignore al2p.direction since we don't care about it. - - // Calculate emulation fake physical address. - const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; - const Node reg{GetRegister(instr.gpr8)}; - const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; - - // Set the fake address to target register. - SetRegister(bb, instr.gpr0, fake_address); - - // Signal the shader IR to declare all possible attributes and varyings - uses_physical_attributes = true; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_read, bool is_write) { - const auto addr_register{GetRegister(instr.gmem.gpr)}; - const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; - - const auto [base_address, index, offset] = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT_OR_EXECUTE_MSG( - base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); - - bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - - const GlobalMemoryBase descriptor{index, offset}; - const auto& entry = used_global_memory.try_emplace(descriptor).first; - auto& usage = entry->second; - usage.is_written |= is_write; - usage.is_read |= is_read; - - const auto real_address = - Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); - - return {real_address, base_address, descriptor}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::OpCode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::SystemVariable; - -using Index = Tegra::Shader::Attribute::Index; - -u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::NOP: { - UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); - UNIMPLEMENTED_IF(instr.nop.trigger != 0); - // With the previous preconditions, this instruction is a no-operation. - break; - } - case OpCode::Id::EXIT: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); - - switch (instr.flow.cond) { - case Tegra::Shader::FlowCondition::Always: - bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, - // otherwise we have to account for the possibility of the condition - // not being met, so continue processing the next instruction. - pc = MAX_PROGRAM_LENGTH - 1; - } - break; - - case Tegra::Shader::FlowCondition::Fcsm_Tr: - // TODO(bunnei): What is this used for? If we assume this conditon is not - // satisifed, dual vertex shaders in Farming Simulator make more sense - UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); - break; - - default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); - } - break; - } - case OpCode::Id::KIL: { - UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); - - bb.push_back(Operation(OperationCode::Discard)); - break; - } - case OpCode::Id::S2R: { - const Node value = [this, instr] { - switch (instr.sys20) { - case SystemVariable::LaneId: - return Operation(OperationCode::ThreadId); - case SystemVariable::InvocationId: - return Operation(OperationCode::InvocationId); - case SystemVariable::Ydirection: - uses_y_negate = true; - return Operation(OperationCode::YNegate); - case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0x00ff'0000U); - case SystemVariable::WscaleFactorXY: - UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); - return Immediate(0U); - case SystemVariable::WscaleFactorZ: - UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); - return Immediate(0U); - case SystemVariable::Tid: { - Node val = Immediate(0); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return val; - } - case SystemVariable::TidX: - return Operation(OperationCode::LocalInvocationIdX); - case SystemVariable::TidY: - return Operation(OperationCode::LocalInvocationIdY); - case SystemVariable::TidZ: - return Operation(OperationCode::LocalInvocationIdZ); - case SystemVariable::CtaIdX: - return Operation(OperationCode::WorkGroupIdX); - case SystemVariable::CtaIdY: - return Operation(OperationCode::WorkGroupIdY); - case SystemVariable::CtaIdZ: - return Operation(OperationCode::WorkGroupIdZ); - case SystemVariable::EqMask: - case SystemVariable::LtMask: - case SystemVariable::LeMask: - case SystemVariable::GtMask: - case SystemVariable::GeMask: - uses_warps = true; - switch (instr.sys20) { - case SystemVariable::EqMask: - return Operation(OperationCode::ThreadEqMask); - case SystemVariable::LtMask: - return Operation(OperationCode::ThreadLtMask); - case SystemVariable::LeMask: - return Operation(OperationCode::ThreadLeMask); - case SystemVariable::GtMask: - return Operation(OperationCode::ThreadGtMask); - case SystemVariable::GeMask: - return Operation(OperationCode::ThreadGeMask); - default: - UNREACHABLE(); - return Immediate(0u); - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); - return Immediate(0u); - } - }(); - SetRegister(bb, instr.gpr0, value); - - break; - } - case OpCode::Id::BRA: { - Node branch; - if (instr.bra.constant_buffer == 0) { - const u32 target = pc + instr.bra.GetBranchTarget(); - branch = Operation(OperationCode::Branch, Immediate(target)); - } else { - const u32 target = pc + 1; - const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - const Node operand = - Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - branch = Operation(OperationCode::BranchIndirect, operand); - } - - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::BRX: { - Node operand; - if (instr.brx.constant_buffer != 0) { - const s32 target = pc + 1; - const Node index = GetRegister(instr.gpr8); - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } else { - const s32 target = pc + instr.brx.GetBranchExtend(); - const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } - const Node branch = Operation(OperationCode::BranchIndirect, operand); - - const ConditionCode cc = instr.flow_condition_code; - if (cc != ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::SSY: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer flow is not supported"); - - if (disable_flow_stack) { - break; - } - - // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); - break; - } - case OpCode::Id::PBK: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer PBK is not supported"); - - if (disable_flow_stack) { - break; - } - - // PBK pushes to a stack the address where BRK will jump to. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); - break; - } - case OpCode::Id::SYNC: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); - - if (decompiled) { - break; - } - - // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); - break; - } - case OpCode::Id::BRK: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); - if (decompiled) { - break; - } - - // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); - break; - } - case OpCode::Id::IPA: { - const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Index index = attribute.index; - - Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(index, attribute.element); - - // Code taken from Ryujinx. - if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { - const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); - if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { - Node position_w = GetInputAttribute(Index::Position, 3); - value = Operation(OperationCode::FMul, move(value), move(position_w)); - } - } - - if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); - } - - value = GetSaturatedFloat(move(value), instr.ipa.saturate); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::OUT_R: { - UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, - "Stream buffer is not supported"); - - if (instr.out.emit) { - // gpr0 is used to store the next address and gpr8 contains the address to emit. - // Hardware uses pointers here but we just ignore it - bb.push_back(Operation(OperationCode::EmitVertex)); - SetRegister(bb, instr.gpr0, Immediate(0)); - } - if (instr.out.cut) { - bb.push_back(Operation(OperationCode::EndPrimitive)); - } - break; - } - case OpCode::Id::ISBERD: { - UNIMPLEMENTED_IF(instr.isberd.o != 0); - UNIMPLEMENTED_IF(instr.isberd.skew != 0); - UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); - UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); - LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); - SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); - break; - } - case OpCode::Id::BAR: { - UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); - bb.push_back(Operation(OperationCode::Barrier)); - break; - } - case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - const OperationCode type = [instr] { - switch (instr.membar.type) { - case Tegra::Shader::MembarType::CTA: - return OperationCode::MemoryBarrierGroup; - case Tegra::Shader::MembarType::GL: - return OperationCode::MemoryBarrierGlobal; - default: - UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); - return OperationCode::MemoryBarrierGlobal; - } - }(); - bb.push_back(Operation(type)); - break; - } - case OpCode::Id::DEPBAR: { - LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::PSETP: { - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - - if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled - SetPredicate(bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), - second_pred)); - } - break; - } - case OpCode::Id::CSETP: { - const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - const Node condition_code = GetConditionCode(instr.csetp.cc); - - const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); - - if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { - SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); - } - if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); - SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); - - const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); - const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); - - const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); - const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.pset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <utility> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { -constexpr u64 NUM_CONDITION_CODES = 4; -constexpr u64 NUM_PREDICATES = 7; -} // namespace - -u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node apply_mask = [this, opcode, instr] { - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: - case OpCode::Id::P2R_IMM: - return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; - - const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; - const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; - const auto get_entry = [this, cc](u64 entry) { - return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: { - Node mask = GetRegister(instr.gpr8); - - for (u64 entry = 0; entry < num_entries; ++entry) { - const u32 shift = static_cast<u32>(entry); - - Node apply = BitfieldExtract(apply_mask, shift, 1); - Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - - Node compare = BitfieldExtract(mask, offset + shift, 1); - Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - - Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); - bb.push_back(Conditional(condition, {move(code)})); - } - break; - } - case OpCode::Id::P2R_IMM: { - Node value = Immediate(0); - for (u64 entry = 0; entry < num_entries; ++entry) { - Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), - Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); - } - value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::ShfType; -using Tegra::Shader::ShfXmode; - -namespace { - -Node IsFull(Node shift) { - return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); -} - -Node Shift(OperationCode opcode, Node value, Node shift) { - Node shifted = Operation(opcode, move(value), shift); - return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); -} - -Node ClampShift(Node shift, s32 size = 32) { - shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); - return Operation(OperationCode::IMin, move(shift), Immediate(size)); -} - -Node WrapShift(Node shift, s32 size = 32) { - return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); -} - -Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); - Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); - } - - // And these when it's larger than or 32 - const bool is_signed = type == ShfType::S64; - const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(opcode, high, move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); - Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); - } - - // And these when it's larger than or 32 - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [this, instr] { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (const auto opid = opcode->get().GetId(); opid) { - case OpCode::Id::SHR_C: - case OpCode::Id::SHR_R: - case OpCode::Id::SHR_IMM: { - op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); - - Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - move(op_a), move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHL_C: - case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: { - Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHF_RIGHT_R: - case OpCode::Id::SHF_RIGHT_IMM: - case OpCode::Id::SHF_LEFT_R: - case OpCode::Id::SHF_LEFT_IMM: { - UNIMPLEMENTED_IF(instr.generates_cc); - UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - instr.shf.xmode.Value()); - - if (instr.is_b_imm) { - op_b = Immediate(static_cast<u32>(instr.shf.immediate)); - } - const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; - Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); - - Node negated_shift = Operation(OperationCode::INegate, shift); - Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); - - const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; - Node value = (is_right ? ShiftRight : ShiftLeft)( - move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null @@ -1,935 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <vector> -#include <fmt/format.h> - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); - return 0; - } -} - -u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - bool is_bindless = false; - switch (opcode->get().GetId()) { - case OpCode::Id::TEX: { - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); - break; - } - case OpCode::Id::TEX_B: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const TextureType texture_type{instr.tex_b.texture_type}; - const bool is_array = instr.tex_b.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, is_aoffi, {instr.gpr20})); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4_B: { - is_bindless = true; - [[fallthrough]]; - } - case OpCode::Id::TLD4: { - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) - : instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) - : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) - : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, - is_ptp, is_bindless)); - break; - } - case OpCode::Id::TLD4S: { - constexpr std::size_t num_coords = 2; - const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector<Node> coords; - std::vector<Node> aoffi; - Node depth_compare; - if (is_depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - if (is_aoffi) { - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - depth_compare = GetRegister(instr.gpr20.Value() + 1); - } else { - depth_compare = op_b; - } - } else { - // There's no depth compare - coords.push_back(op_a); - if (is_aoffi) { - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - } else { - coords.push_back(op_b); - } - } - const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); - - SamplerInfo info; - info.is_shadow = is_depth_compare; - const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, - {}, {}, component, element, {}}; - values[element] = Operation(OperationCode::TextureGather, meta, coords); - } - - if (instr.tld4s.fp16_flag) { - WriteTexsInstructionHalfFloat(bb, instr, values, true); - } else { - WriteTexsInstructionFloat(bb, instr, values, true); - } - break; - } - case OpCode::Id::TXD_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXD: { - UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const bool is_array = instr.txd.is_array != 0; - const auto derivate_reg = instr.gpr20.Value(); - const auto texture_type = instr.txd.texture_type.Value(); - const auto coord_count = GetCoordCount(texture_type); - u64 base_reg = instr.gpr8.Value(); - Node index_var; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); - WriteTexInstructionFloat(bb, instr, values); - break; - } - - if (is_bindless) { - base_reg++; - } - - std::vector<Node> coords; - std::vector<Node> derivates; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(base_reg + i)); - const std::size_t derivate = i * 2; - derivates.push_back(GetRegister(derivate_reg + derivate)); - derivates.push_back(GetRegister(derivate_reg + derivate + 1)); - } - - Node array_node = {}; - if (is_array) { - const Node info_reg = GetRegister(base_reg + coord_count); - array_node = BitfieldExtract(info_reg, 0, 16); - } - - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, - {}, {}, {}, element, index_var}; - values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); - } - - WriteTexInstructionFloat(bb, instr, values); - - break; - } - case OpCode::Id::TXQ_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXQ: { - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, - GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); - } - break; - } - case OpCode::Id::TMML_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - const auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) - : GetSampler(instr.sampler, info); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - const u64 base_index = is_array ? 1 : 0; - const u64 num_components = [texture_type] { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); - return 2; - } - }(); - // TODO: What's the array component used for? - - std::vector<Node> coords; - coords.reserve(num_components); - for (u64 component = 0; component < num_components; ++component) { - coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); - } - - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - Node value = Operation(OperationCode::TextureQueryLod, meta, coords); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::TLD: { - UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); - break; - } - case OpCode::Id::TLDS: { - const TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - const Node4 components = GetTldsCode(instr, texture_type, is_array); - - if (instr.tlds.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( - SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { - if (info.IsComplete()) { - return info; - } - if (!sampler) { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); - info.is_array = info.is_array.value_or(false); - info.is_shadow = info.is_shadow.value_or(false); - info.is_buffer = info.is_buffer.value_or(false); - return info; - } - info.type = info.type.value_or(sampler->texture_type); - info.is_array = info.is_array.value_or(sampler->is_array != 0); - info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); - info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); - return info; -} - -std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { - const u32 offset = static_cast<u32>(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const SamplerEntry& entry) { return entry.offset == offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); -} - -std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, - SamplerInfo info, Node& index_var) { - const Node sampler_register = GetRegister(reg); - const auto [base_node, tracked_sampler_info] = - TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); - if (!base_node) { - UNREACHABLE(); - return std::nullopt; - } - - if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { - const u32 buffer = sampler_info->index; - const u32 offset = sampler_info->offset; - info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const SamplerEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); - } - if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { - const std::pair indices = sampler_info->indices; - const std::pair offsets = sampler_info->offsets; - info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); - - // Try to use an already created sampler if it exists - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [indices, offsets](const SamplerEntry& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const u32 next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer); - } - if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { - const u32 base_offset = sampler_info->base_offset / 4; - index_var = GetCustomVariable(sampler_info->bindless_var); - info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), - [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && - it->is_indexed); - return *it; - } - - uses_indexed_samplers = true; - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, true); - } - return std::nullopt; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporary(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, - bool ignore_mask) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - SetTemporary(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components, bool ignore_mask) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporary(bb, 0, first_value); - SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr28, GetTemporary(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector<Node> aoffi, - std::optional<Tegra::Shader::Register> bindless_reg) { - const bool is_array = array != nullptr; - const bool is_shadow = depth_compare != nullptr; - const bool is_bindless = bindless_reg.has_value(); - - ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, - "Illegal texture type"); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = is_shadow; - info.is_buffer = false; - - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); - if (!sampler) { - return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; - } - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; - - Node bias; - Node lod; - switch (process_mode) { - case TextureProcessMode::None: - break; - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 field with - // an offset depending on the usage of the other registers. - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); - break; - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, - lod, {}, element, index_var}; - values[element] = Operation(opcode, meta, coords); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const bool lod_bias_enabled{ - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; - - const bool is_bindless = bindless_reg.has_value(); - - u64 parameter_register = instr.gpr20.Value(); - if (is_bindless) { - ++parameter_register; - } - - const u32 bias_lod_offset = (is_bindless ? 1 : 0); - if (lod_bias_enabled) { - ++parameter_register; - } - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 5); - const auto coord_count = std::get<0>(coord_counts); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - std::vector<Node> aoffi; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); - } - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - dc = GetRegister(parameter_register++); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, - aoffi, bindless_reg); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 4); - const auto coord_count = std::get<0>(coord_counts); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, - {}); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { - ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); - - const std::size_t coord_count = GetCoordCount(texture_type); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - - u64 parameter_register = instr.gpr20.Value(); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = depth_compare; - - Node index_var; - const std::optional<SamplerEntry> sampler = - is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; - } - - std::vector<Node> aoffi, ptp; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); - } else if (is_ptp) { - ptp = GetPtpCoordinates( - {GetRegister(parameter_register++), GetRegister(parameter_register++)}); - } - - Node dc; - if (depth_compare) { - dc = GetRegister(parameter_register++); - } - - const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) - : Immediate(static_cast<u32>(instr.tld4.component)); - - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, - index_var}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { - const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array != 0}; - const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; - const std::size_t coord_count{GetCoordCount(texture_type)}; - - u64 gpr8_cursor{instr.gpr8.Value()}; - const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; - - std::vector<Node> coords; - coords.reserve(coord_count); - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(gpr8_cursor++)); - } - - u64 gpr20_cursor{instr.gpr20.Value()}; - // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; - const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; - // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; - // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - - const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = false; - const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); - - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - - std::vector<Node> coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - std::vector<Node> aoffi; - if (aoffi_enabled) { - aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - -std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, - bool is_tld4) { - const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; - const u32 size = is_tld4 ? 6 : 4; - const s32 wrap_value = is_tld4 ? 32 : 8; - const s32 diff_value = is_tld4 ? 64 : 16; - const u32 mask = (1U << size) - 1; - - std::vector<Node> aoffi; - aoffi.reserve(coord_count); - - const auto aoffi_immediate{ - TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; - if (!aoffi_immediate) { - // Variable access, not supported on AMD. - LOG_WARNING(HW_GPU, - "AOFFI constant folding failed, some hardware might have graphical issues"); - for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); - aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return aoffi; - } - - for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; - if (value >= wrap_value) { - value -= diff_value; - } - aoffi.push_back(Immediate(value)); - } - return aoffi; -} - -std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { - static constexpr u32 num_entries = 8; - - std::vector<Node> ptp; - ptp.reserve(num_entries); - - const auto global_size = static_cast<s64>(global_code.size()); - const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); - const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); - if (!low || !high) { - for (u32 entry = 0; entry < num_entries; ++entry) { - const u32 reg = entry / 4; - const u32 offset = entry % 4; - const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); - ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return ptp; - } - - const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); - for (u32 entry = 0; entry < num_entries; ++entry) { - s32 value = (immediate >> (entry * 8)) & 0b111111; - if (value >= 32) { - value -= 64; - } - ptp.push_back(Immediate(value)); - } - - return ptp; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::VideoType; -using Tegra::Shader::VmadShr; -using Tegra::Shader::VmnmxOperation; -using Tegra::Shader::VmnmxType; - -u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::VMNMX) { - DecodeVMNMX(bb, instr); - return pc; - } - - const Node op_a = - GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, - instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [this, instr] { - if (instr.video.use_register_b) { - return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, - instr.video.signed_b, instr.video.type_b, - instr.video.byte_height_b); - } - if (instr.video.signed_b) { - const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); - return Immediate(static_cast<u32>(imm)); - } else { - return Immediate(instr.alu.GetImm20_16()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::VMAD: { - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node op_c = GetRegister(instr.gpr39); - - Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); - value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); - - if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { - const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); - value = - SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::VSETP: { - // We can't use the constant predicate as destination. - ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); - - const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); - const Node second_pred = GetPredicate(instr.vsetp.pred39, false); - - const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); - - if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, - u64 byte_height) { - if (!is_chunk) { - return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); - } - - switch (type) { - case VideoType::Size16_Low: - return BitfieldExtract(op, 0, 16); - case VideoType::Size16_High: - return BitfieldExtract(op, 16, 16); - case VideoType::Size32: - // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used - // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. - UNIMPLEMENTED(); - return Immediate(0); - case VideoType::Invalid: - UNREACHABLE_MSG("Invalid instruction encoding"); - return Immediate(0); - default: - UNREACHABLE(); - return Immediate(0); - } -} - -void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { - UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); - UNIMPLEMENTED_IF(instr.vmnmx.sat); - UNIMPLEMENTED_IF(instr.generates_cc); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node op_c = GetRegister(instr.gpr39); - - const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed - const bool is_oper2_signed = instr.vmnmx.is_dest_signed; - - const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; - Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); - - switch (instr.vmnmx.operation) { - case VmnmxOperation::Mrg_16H: - value = BitfieldInsert(move(op_c), move(value), 16, 16); - break; - case VmnmxOperation::Mrg_16L: - value = BitfieldInsert(move(op_c), move(value), 0, 16); - break; - case VmnmxOperation::Mrg_8B0: - value = BitfieldInsert(move(op_c), move(value), 0, 8); - break; - case VmnmxOperation::Mrg_8B2: - value = BitfieldInsert(move(op_c), move(value), 16, 8); - break; - case VmnmxOperation::Acc: - value = Operation(OperationCode::IAdd, move(value), move(op_c)); - break; - case VmnmxOperation::Min: - value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Max: - value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Nop: - break; - default: - UNREACHABLE(); - break; - } - - SetRegister(bb, instr.gpr0, move(value)); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::ShuffleOperation; -using Tegra::Shader::VoteOperation; - -namespace { - -OperationCode GetOperationCode(VoteOperation vote_op) { - switch (vote_op) { - case VoteOperation::All: - return OperationCode::VoteAll; - case VoteOperation::Any: - return OperationCode::VoteAny; - case VoteOperation::Eq: - return OperationCode::VoteEqual; - default: - UNREACHABLE_MSG("Invalid vote operation={}", vote_op); - return OperationCode::VoteAll; - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - // Signal the backend that this shader uses warp instructions. - uses_warps = true; - - switch (opcode->get().GetId()) { - case OpCode::Id::VOTE: { - const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); - const Node active = Operation(OperationCode::BallotThread, value); - const Node vote = Operation(GetOperationCode(instr.vote.operation), value); - SetRegister(bb, instr.gpr0, active); - SetPredicate(bb, instr.vote.dest_pred, vote); - break; - } - case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - - Node thread_id = Operation(OperationCode::ThreadId); - Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); - Node seg_mask = BitfieldExtract(mask, 8, 16); - - Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); - Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); - Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, - Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - - Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { - switch (instr.shfl.operation) { - case ShuffleOperation::Idx: - return Operation(OperationCode::IBitwiseOr, - Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), - min_thread_id); - case ShuffleOperation::Down: - return Operation(OperationCode::IAdd, thread_id, index); - case ShuffleOperation::Up: - return Operation(OperationCode::IAdd, thread_id, - Operation(OperationCode::INegate, index)); - case ShuffleOperation::Bfly: - return Operation(OperationCode::IBitwiseXor, thread_id, index); - } - UNREACHABLE(); - return Immediate(0U); - }(); - - Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); - } else { - return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); - } - }(); - - SetPredicate(bb, instr.shfl.pred48, in_bounds); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); - break; - } - case OpCode::Id::FSWZADD: { - UNIMPLEMENTED_IF(instr.fswzadd.ndv); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); - SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF(instr.xmad.sign_a); - UNIMPLEMENTED_IF(instr.xmad.sign_b); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); - - Node op_a = GetRegister(instr.gpr8); - - // TODO(bunnei): Needs to be fixed once op_a or op_b is signed - UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); - const bool is_signed_a = instr.xmad.sign_a == 1; - const bool is_signed_b = instr.xmad.sign_b == 1; - const bool is_signed_c = is_signed_a; - - auto [is_merge, is_psl, is_high_b, mode, op_b_binding, - op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { - switch (opcode->get().GetId()) { - case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, - instr.xmad.product_shift_left_second, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, - instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RC: - return {false, - false, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, - instr.xmad.product_shift_left, - false, - instr.xmad.mode, - Immediate(static_cast<u32>(instr.xmad.imm20_16)), - GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; - } - }(); - - op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), - instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); - - const Node original_b = op_b_binding; - const Node op_b = - SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), - is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - - // we already check sign_a and sign_b is difference or not before so just use one in here. - Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); - if (is_psl) { - product = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); - } - SetTemporary(bb, 0, product); - product = GetTemporary(0); - - Node original_c = op_c; - const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&] { - switch (set_mode) { - case Tegra::Shader::XmadMode::None: - return original_c; - case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(std::move(original_c), 0, 16); - case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(std::move(original_c), 16, 16); - case Tegra::Shader::XmadMode::CBcc: { - Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), - std::move(shifted_b)); - } - case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); - const Node comp_b = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); - const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); - - const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_a, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, - Immediate(0x80000000)), - Immediate(0)); - const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_b, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, - Immediate(0x80000000)), - Immediate(0)); - - Node new_c = Operation( - OperationCode::Select, comp_minus_a, - SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), - original_c); - new_c = Operation( - OperationCode::Select, comp_minus_b, - SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), - std::move(new_c)); - - return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); - } - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - SetTemporary(bb, 1, op_c); - op_c = GetTemporary(1); - - // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); - SetTemporary(bb, 2, sum); - sum = GetTemporary(2); - if (is_merge) { - const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), - Immediate(0), Immediate(16)); - const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, - Immediate(16)); - sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); - } - - SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(sum)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <memory> -#include <variant> - -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { -namespace { -bool ExprIsBoolean(const Expr& expr) { - return std::holds_alternative<ExprBoolean>(*expr); -} - -bool ExprBooleanGet(const Expr& expr) { - return std::get_if<ExprBoolean>(expr.get())->value; -} -} // Anonymous namespace - -bool ExprAnd::operator==(const ExprAnd& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprAnd::operator!=(const ExprAnd& b) const { - return !operator==(b); -} - -bool ExprOr::operator==(const ExprOr& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprOr::operator!=(const ExprOr& b) const { - return !operator==(b); -} - -bool ExprNot::operator==(const ExprNot& b) const { - return *operand1 == *b.operand1; -} - -bool ExprNot::operator!=(const ExprNot& b) const { - return !operator==(b); -} - -Expr MakeExprNot(Expr first) { - if (std::holds_alternative<ExprNot>(*first)) { - return std::get_if<ExprNot>(first.get())->operand1; - } - return MakeExpr<ExprNot>(std::move(first)); -} - -Expr MakeExprAnd(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? second : first; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? first : second; - } - return MakeExpr<ExprAnd>(std::move(first), std::move(second)); -} - -Expr MakeExprOr(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? first : second; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? second : first; - } - return MakeExpr<ExprOr>(std::move(first), std::move(second)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second) { - return (*first) == (*second); -} - -bool ExprAreOpposite(const Expr& first, const Expr& second) { - if (std::holds_alternative<ExprNot>(*first)) { - return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); - } - if (std::holds_alternative<ExprNot>(*second)) { - return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); - } - return false; -} - -bool ExprIsTrue(const Expr& first) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first); - } - return false; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <variant> - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -class ExprAnd; -class ExprBoolean; -class ExprCondCode; -class ExprGprEqual; -class ExprNot; -class ExprOr; -class ExprPredicate; -class ExprVar; - -using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, - ExprBoolean, ExprGprEqual>; -using Expr = std::shared_ptr<ExprData>; - -class ExprAnd final { -public: - explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprAnd& b) const; - bool operator!=(const ExprAnd& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprOr final { -public: - explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprOr& b) const; - bool operator!=(const ExprOr& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprNot final { -public: - explicit ExprNot(Expr a) : operand1{std::move(a)} {} - - bool operator==(const ExprNot& b) const; - bool operator!=(const ExprNot& b) const; - - Expr operand1; -}; - -class ExprVar final { -public: - explicit ExprVar(u32 index) : var_index{index} {} - - bool operator==(const ExprVar& b) const { - return var_index == b.var_index; - } - - bool operator!=(const ExprVar& b) const { - return !operator==(b); - } - - u32 var_index; -}; - -class ExprPredicate final { -public: - explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} - - bool operator==(const ExprPredicate& b) const { - return predicate == b.predicate; - } - - bool operator!=(const ExprPredicate& b) const { - return !operator==(b); - } - - u32 predicate; -}; - -class ExprCondCode final { -public: - explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} - - bool operator==(const ExprCondCode& b) const { - return cc == b.cc; - } - - bool operator!=(const ExprCondCode& b) const { - return !operator==(b); - } - - ConditionCode cc; -}; - -class ExprBoolean final { -public: - explicit ExprBoolean(bool val) : value{val} {} - - bool operator==(const ExprBoolean& b) const { - return value == b.value; - } - - bool operator!=(const ExprBoolean& b) const { - return !operator==(b); - } - - bool value; -}; - -class ExprGprEqual final { -public: - explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} - - bool operator==(const ExprGprEqual& b) const { - return gpr == b.gpr && value == b.value; - } - - bool operator!=(const ExprGprEqual& b) const { - return !operator==(b); - } - - u32 gpr; - u32 value; -}; - -template <typename T, typename... Args> -Expr MakeExpr(Args&&... args) { - static_assert(std::is_convertible_v<T, ExprData>); - return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second); - -bool ExprAreOpposite(const Expr& first, const Expr& second); - -Expr MakeExprNot(Expr first); - -Expr MakeExprAnd(Expr first, Expr second); - -Expr MakeExprOr(Expr first, Expr second); - -bool ExprIsTrue(const Expr& first); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <cstddef> - -#include <boost/container_hash/hash.hpp> - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; - return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; -} - -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; - static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; - - const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & MASK) == SELF_JUMPING_BRANCH) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code, is_compute)); - return code; -} - -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { - size_t unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return static_cast<u64>(unique_identifier); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon::Shader { - -using ProgramCode = std::vector<u64>; - -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); - -/// Gets if the current instruction offset is a scheduler instruction -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute); - -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <cstddef> -#include <memory> -#include <optional> -#include <string> -#include <tuple> -#include <utility> -#include <variant> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float - FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - FSwizzleAdd, /// (float a, float b, uint mask) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - IBitMSB, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - UBitMSB, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAnd2, /// (bool2 a) -> bool - - LogicalFOrdLessThan, /// (float a, float b) -> bool - LogicalFOrdEqual, /// (float a, float b) -> bool - LogicalFOrdLessEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterThan, /// (float a, float b) -> bool - LogicalFOrdNotEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterEqual, /// (float a, float b) -> bool - LogicalFOrdered, /// (float a, float b) -> bool - LogicalFUnordered, /// (float a, float b) -> bool - LogicalFUnordLessThan, /// (float a, float b) -> bool - LogicalFUnordEqual, /// (float a, float b) -> bool - LogicalFUnordLessEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterThan, /// (float a, float b) -> bool - LogicalFUnordNotEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterEqual, /// (float a, float b) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - LogicalAddCarry, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 - - ImageLoad, /// (MetaImage, int[N] coords) -> void - ImageStore, /// (MetaImage, int[N] coords) -> void - - AtomicImageAdd, /// (MetaImage, int[N] coords) -> void - AtomicImageAnd, /// (MetaImage, int[N] coords) -> void - AtomicImageOr, /// (MetaImage, int[N] coords) -> void - AtomicImageXor, /// (MetaImage, int[N] coords) -> void - AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - - AtomicUExchange, /// (memory, uint) -> uint - AtomicUAdd, /// (memory, uint) -> uint - AtomicUMin, /// (memory, uint) -> uint - AtomicUMax, /// (memory, uint) -> uint - AtomicUAnd, /// (memory, uint) -> uint - AtomicUOr, /// (memory, uint) -> uint - AtomicUXor, /// (memory, uint) -> uint - - AtomicIExchange, /// (memory, int) -> int - AtomicIAdd, /// (memory, int) -> int - AtomicIMin, /// (memory, int) -> int - AtomicIMax, /// (memory, int) -> int - AtomicIAnd, /// (memory, int) -> int - AtomicIOr, /// (memory, int) -> int - AtomicIXor, /// (memory, int) -> int - - ReduceUAdd, /// (memory, uint) -> void - ReduceUMin, /// (memory, uint) -> void - ReduceUMax, /// (memory, uint) -> void - ReduceUAnd, /// (memory, uint) -> void - ReduceUOr, /// (memory, uint) -> void - ReduceUXor, /// (memory, uint) -> void - - ReduceIAdd, /// (memory, int) -> void - ReduceIMin, /// (memory, int) -> void - ReduceIMax, /// (memory, int) -> void - ReduceIAnd, /// (memory, int) -> void - ReduceIOr, /// (memory, int) -> void - ReduceIXor, /// (memory, int) -> void - - Branch, /// (uint branch_target) -> void - BranchIndirect, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - InvocationId, /// () -> int - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - BallotThread, /// (bool) -> uint - VoteAll, /// (bool) -> bool - VoteAny, /// (bool) -> bool - VoteEqual, /// (bool) -> bool - - ThreadId, /// () -> uint - ThreadEqMask, /// () -> uint - ThreadGeMask, /// () -> uint - ThreadGtMask, /// () -> uint - ThreadLeMask, /// () -> uint - ThreadLtMask, /// () -> uint - ShuffleIndexed, /// (uint value, uint index) -> uint - - Barrier, /// () -> void - MemoryBarrierGroup, /// () -> void - MemoryBarrierGlobal, /// () -> void - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - -enum class MetaStackClass { - Ssy, - Pbk, -}; - -class OperationNode; -class ConditionalNode; -class GprNode; -class CustomVarNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; -class CbufNode; -class LmemNode; -class PatchNode; -class SmemNode; -class GmemNode; -class CommentNode; - -using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, - InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, - LmemNode, SmemNode, GmemNode, CommentNode>; -using Node = std::shared_ptr<NodeData>; -using Node4 = std::array<Node, 4>; -using NodeBlock = std::vector<Node>; - -struct ArraySamplerNode; -struct BindlessSamplerNode; -struct SeparateSamplerNode; - -using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; -using TrackSampler = std::shared_ptr<TrackSamplerData>; - -struct SamplerEntry { - /// Bound samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, - bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, - is_buffer{is_buffer_}, is_indexed{is_indexed_} {} - - /// Separate sampler constructor - explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} - - /// Bindless samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { - } - - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 secondary_offset = 0; ///< Secondary offset in the const buffer. - u32 buffer = 0; ///< Buffer where the bindless sampler is read. - u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. - u32 size = 1; ///< Size of the sampler. - - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. - bool is_separated = false; ///< Whether the image and sampler is separated or not. -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct ArraySamplerNode { - u32 index; - u32 base_offset; - u32 bindless_var; -}; - -/// Represents a tracked separate sampler image pair that was folded statically -struct SeparateSamplerNode { - std::pair<u32, u32> indices; - std::pair<u32, u32> offsets; -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct BindlessSamplerNode { - u32 index; - u32 offset; -}; - -struct ImageEntry { -public: - /// Bound images constructor - explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, type{type_} {} - - /// Bindless samplers constructor - explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} - - void MarkWrite() { - is_written = true; - } - - void MarkRead() { - is_read = true; - } - - void MarkAtomic() { - MarkWrite(); - MarkRead(); - is_atomic = true; - } - - u32 index = 0; - u32 offset = 0; - u32 buffer = 0; - - Tegra::Shader::ImageType type{}; - bool is_bindless = false; - bool is_written = false; - bool is_read = false; - bool is_atomic = false; -}; - -struct GlobalMemoryBase { - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - - [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - -/// Parameters describing an arithmetic operation -struct MetaArithmetic { - bool precise{}; ///< Whether the operation can be constraint or not -}; - -/// Parameters describing a texture sampler -struct MetaTexture { - SamplerEntry sampler; - Node array; - Node depth_compare; - std::vector<Node> aoffi; - std::vector<Node> ptp; - std::vector<Node> derivates; - Node bias; - Node lod; - Node component; - u32 element{}; - Node index; -}; - -struct MetaImage { - const ImageEntry& image; - std::vector<Node> values; - u32 element{}; -}; - -/// Parameters that modify an operation but are not part of any particular operand -using Meta = - std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; - -class AmendNode { -public: - [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const { - if (amend_index == amend_null_index) { - return std::nullopt; - } - return {amend_index}; - } - - void SetAmendIndex(std::size_t index) { - amend_index = index; - } - - void ClearAmend() { - amend_index = amend_null_index; - } - -private: - static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; - std::size_t amend_index{amend_null_index}; -}; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final : public AmendNode { -public: - explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - - explicit OperationNode(OperationCode code_, Meta meta_) - : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {} - - explicit OperationNode(OperationCode code_, std::vector<Node> operands_) - : OperationNode(code_, Meta{}, std::move(operands_)) {} - - explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_) - : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} - - template <typename... Args> - explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) - : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - - [[nodiscard]] OperationCode GetCode() const { - return code; - } - - [[nodiscard]] const Meta& GetMeta() const { - return meta; - } - - [[nodiscard]] std::size_t GetOperandsCount() const { - return operands.size(); - } - - [[nodiscard]] const Node& operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - OperationCode code{}; - Meta meta{}; - std::vector<Node> operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final : public AmendNode { -public: - explicit ConditionalNode(Node condition_, std::vector<Node>&& code_) - : condition{std::move(condition_)}, code{std::move(code_)} {} - - [[nodiscard]] const Node& GetCondition() const { - return condition; - } - - [[nodiscard]] const std::vector<Node>& GetCode() const { - return code; - } - -private: - Node condition; ///< Condition to be satisfied - std::vector<Node> code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return static_cast<u32>(index); - } - -private: - Tegra::Shader::Register index{}; -}; - -/// A custom variable -class CustomVarNode final { -public: - explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - - [[nodiscard]] constexpr u32 GetValue() const { - return value; - } - -private: - u32 value{}; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - - [[nodiscard]] constexpr InternalFlag GetFlag() const { - return flag; - } - -private: - InternalFlag flag{}; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) - : index{index_}, negated{negated_} {} - - [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { - return index; - } - - [[nodiscard]] constexpr bool IsNegated() const { - return negated; - } - -private: - Tegra::Shader::Pred index{}; - bool negated{}; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) - : buffer{std::move(buffer_)}, index{index_}, element{element_} {} - - // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address_, Node buffer_ = {}) - : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - - [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - [[nodiscard]] u32 GetElement() const { - return element; - } - - [[nodiscard]] const Node& GetBuffer() const { - return buffer; - } - - [[nodiscard]] bool IsPhysicalBuffer() const { - return static_cast<bool>(physical_address); - } - - [[nodiscard]] const Node& GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address; - Node buffer; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Patch memory (used to communicate tessellation stages). -class PatchNode final { -public: - explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - - [[nodiscard]] constexpr u32 GetOffset() const { - return offset; - } - -private: - u32 offset{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - - [[nodiscard]] u32 GetIndex() const { - return index; - } - - [[nodiscard]] const Node& GetOffset() const { - return offset; - } - -private: - u32 index{}; - Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit LmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Shared memory node -class SmemNode final { -public: - explicit SmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) - : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, - descriptor{descriptor_} {} - - [[nodiscard]] const Node& GetRealAddress() const { - return real_address; - } - - [[nodiscard]] const Node& GetBaseAddress() const { - return base_address; - } - - [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - Node real_address; - Node base_address; - GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text_) : text{std::move(text_)} {} - - [[nodiscard]] const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> -#include <vector> - -#include "common/common_types.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -Node Conditional(Node condition, std::vector<Node> code) { - return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); -} - -Node Comment(std::string text) { - return MakeNode<CommentNode>(std::move(text)); -} - -Node Immediate(u32 value) { - return MakeNode<ImmediateNode>(value); -} - -Node Immediate(s32 value) { - return Immediate(static_cast<u32>(value)); -} - -Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); -} - -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldExtract: - return OperationCode::UBitfieldExtract; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::AtomicIExchange: - return OperationCode::AtomicUExchange; - case OperationCode::AtomicIAdd: - return OperationCode::AtomicUAdd; - case OperationCode::AtomicIMin: - return OperationCode::AtomicUMin; - case OperationCode::AtomicIMax: - return OperationCode::AtomicUMax; - case OperationCode::AtomicIAnd: - return OperationCode::AtomicUAnd; - case OperationCode::AtomicIOr: - return OperationCode::AtomicUOr; - case OperationCode::AtomicIXor: - return OperationCode::AtomicUXor; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); - return {}; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <string> -#include <tuple> -#include <type_traits> -#include <utility> -#include <vector> - -#include "common/common_types.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -/// This arithmetic operation cannot be constraint -inline constexpr MetaArithmetic PRECISE = {true}; -/// This arithmetic operation can be optimized away -inline constexpr MetaArithmetic NO_PRECISE = {false}; - -/// Creates a conditional node -Node Conditional(Node condition, std::vector<Node> code); - -/// Creates a commentary node -Node Comment(std::string text); - -/// Creates an u32 immediate -Node Immediate(u32 value); - -/// Creates a s32 immediate -Node Immediate(s32 value); - -/// Creates a f32 immediate -Node Immediate(f32 value); - -/// Converts an signed operation code to an unsigned operation code -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - -template <typename T, typename... Args> -Node MakeNode(Args&&... args) { - static_assert(std::is_convertible_v<T, NodeData>); - return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); -} - -template <typename T, typename... Args> -TrackSampler MakeTrackSampler(Args&&... args) { - static_assert(std::is_convertible_v<T, TrackSamplerData>); - return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); -} - -template <typename... Args> -Node Operation(OperationCode code, Args&&... args) { - if constexpr (sizeof...(args) == 0) { - return MakeNode<OperationNode>(code); - } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, - Meta>) { - return MakeNode<OperationNode>(code, std::forward<Args>(args)...); - } else { - return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); - } -} - -template <typename... Args> -Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { - return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <tuple> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::ConstBufferEngineInterface; -using Tegra::Engines::SamplerDescriptor; -using Tegra::Engines::ShaderType; - -namespace { - -GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage == ShaderType::Compute) { - return {}; - } - - auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); - - return { - .tfb_layouts = graphics.regs.tfb_layouts, - .tfb_varying_locs = graphics.regs.tfb_varying_locs, - .primitive_topology = graphics.regs.draw.topology, - .tessellation_primitive = graphics.regs.tess_mode.prim, - .tessellation_spacing = graphics.regs.tess_mode.spacing, - .tfb_enabled = graphics.regs.tfb_enabled != 0, - .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, - }; -} - -ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage != ShaderType::Compute) { - return {}; - } - - auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); - const auto& launch = compute.launch_description; - - return { - .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, - .shared_memory_size_in_words = launch.shared_alloc, - .local_memory_size_in_words = launch.local_pos_alloc, - }; -} - -} // Anonymous namespace - -Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) - : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, - bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} - -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) - : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( - shader_stage, engine_)} {} - -Registry::~Registry() = default; - -std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { - const std::pair<u32, u32> key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( - std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { - SeparateSamplerKey key; - key.buffers = buffers; - key.offsets = offsets; - const auto iter = separate_samplers.find(key); - if (iter != separate_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - - const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); - const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); - const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); - separate_samplers.emplace(key, value); - return value; -} - -std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -bool Registry::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool Registry::HasEqualKeys(const Registry& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -const GraphicsInfo& Registry::GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; -} - -const ComputeInfo& Registry::GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <optional> -#include <type_traits> -#include <unordered_map> -#include <utility> - -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -struct SeparateSamplerKey { - std::pair<u32, u32> buffers; - std::pair<u32, u32> offsets; -}; - -} // namespace VideoCommon::Shader - -namespace std { - -template <> -struct hash<VideoCommon::Shader::SeparateSamplerKey> { - std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { - return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ - key.offsets.second); - } -}; - -template <> -struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { - bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, - const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { - return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; - } -}; - -} // namespace std - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; -using SeparateSamplerMap = - std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; -using BindlessSamplerMap = - std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -struct GraphicsInfo { - using Maxwell = Tegra::Engines::Maxwell3D::Regs; - - std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> - tfb_layouts{}; - std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; - Maxwell::PrimitiveTopology primitive_topology{}; - Maxwell::TessellationPrimitive tessellation_primitive{}; - Maxwell::TessellationSpacing tessellation_spacing{}; - bool tfb_enabled = false; - bool tessellation_clockwise = false; -}; -static_assert(std::is_trivially_copyable_v<GraphicsInfo> && - std::is_standard_layout_v<GraphicsInfo>); - -struct ComputeInfo { - std::array<u32, 3> workgroup_size{}; - u32 shared_memory_size_in_words = 0; - u32 local_memory_size_in_words = 0; -}; -static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); - -struct SerializedRegistryInfo { - VideoCore::GuestDriverProfile guest_driver_profile; - u32 bound_buffer = 0; - GraphicsInfo graphics; - ComputeInfo compute; -}; - -/** - * The Registry is a class use to interface the 3D and compute engines with the shader compiler. - * With it, the shader can obtain required data from GPU state and store it for disk shader - * compilation. - */ -class Registry { -public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); - - explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine_); - - ~Registry(); - - /// Retrieves a key from the registry, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional<u32> ObtainKey(u32 buffer, u32 offset); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( - std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Checks keys and samplers against engine's current const buffers. - /// Returns true if they are the same value, false otherwise. - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the registry. - bool HasEqualKeys(const Registry& rhs) const; - - /// Returns graphics information from this shader - const GraphicsInfo& GetGraphicsInfo() const; - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - SeparateSamplerMap separate_samplers; - BindlessSamplerMap bindless_samplers; - u32 bound_buffer; - GraphicsInfo graphics_info; - ComputeInfo compute_info; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <cmath> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Attribute; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaMode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredCondition; -using Tegra::Shader::PredOperation; -using Tegra::Shader::Register; - -ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, - Registry& registry_) - : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ - registry_} { - Decode(); - PostDecode(); -} - -ShaderIR::~ShaderIR() = default; - -Node ShaderIR::GetRegister(Register reg) { - if (reg != Register::ZeroIndex) { - used_registers.insert(static_cast<u32>(reg)); - } - return MakeNode<GprNode>(reg); -} - -Node ShaderIR::GetCustomVariable(u32 id) { - return MakeNode<CustomVarNode>(id); -} - -Node ShaderIR::GetImmediate19(Instruction instr) { - return Immediate(instr.alu.GetImm20_19()); -} - -Node ShaderIR::GetImmediate32(Instruction instr) { - return Immediate(instr.alu.GetImm20_32()); -} - -Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { - const auto index = static_cast<u32>(index_); - const auto offset = static_cast<u32>(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); - - return MakeNode<CbufNode>(index, Immediate(offset)); -} - -Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { - const auto index = static_cast<u32>(index_); - const auto offset = static_cast<u32>(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); - - Node final_offset = [&] { - // Attempt to inline constant buffer without a variable offset. This is done to allow - // tracking LDC calls. - if (const auto gpr = std::get_if<GprNode>(&*node)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - return Immediate(offset); - } - } - return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); - }(); - return MakeNode<CbufNode>(index, std::move(final_offset)); -} - -Node ShaderIR::GetPredicate(u64 pred_, bool negated) { - const auto pred = static_cast<Pred>(pred_); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - used_predicates.insert(pred); - } - - return MakeNode<PredicateNode>(pred, negated); -} - -Node ShaderIR::GetPredicate(bool immediate) { - return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); -} - -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_input_attributes.emplace(index); - return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); -} - -Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { - uses_physical_attributes = true; - return MakeNode<AbufNode>(GetRegister(physical_address), buffer); -} - -Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_output_attributes.insert(index); - return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); -} - -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - Node node = MakeNode<InternalFlagNode>(flag); - if (negated) { - return Operation(OperationCode::LogicalNegate, std::move(node)); - } - return node; -} - -Node ShaderIR::GetLocalMemory(Node address) { - return MakeNode<LmemNode>(std::move(address)); -} - -Node ShaderIR::GetSharedMemory(Node address) { - return MakeNode<SmemNode>(std::move(address)); -} - -Node ShaderIR::GetTemporary(u32 id) { - return GetRegister(Register::ZeroIndex + 1 + id); -} - -Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { - switch (size) { - case Register::Size::Byte: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - return value; - case Register::Size::Short: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - return value; - case Register::Size::Word: - // Default - do nothing - return value; - default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", size); - return value; - } -} - -Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { - if (!is_signed) { - // Absolute or negate on an unsigned is pointless - return value; - } - if (absolute) { - value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { - Node value = Immediate(instr.half_imm.PackImmediates()); - if (!has_negation) { - return value; - } - - Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); - Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - - return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), - std::move(second_negate)); -} - -Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { - return Operation(OperationCode::HUnpack, type, std::move(value)); -} - -Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { - switch (merge) { - case Tegra::Shader::HalfMerge::H0_H1: - return src; - case Tegra::Shader::HalfMerge::F32: - return Operation(OperationCode::HMergeF32, std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H0: - return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H1: - return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); - } - UNREACHABLE(); - return src; -} - -Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), - GetPredicate(true)); - } - return value; -} - -Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - if (condition == PredCondition::T) { - return GetPredicate(true); - } else if (condition == PredCondition::F) { - return GetPredicate(false); - } - - static constexpr std::array comparison_table{ - OperationCode(0), - OperationCode::LogicalFOrdLessThan, // LT - OperationCode::LogicalFOrdEqual, // EQ - OperationCode::LogicalFOrdLessEqual, // LE - OperationCode::LogicalFOrdGreaterThan, // GT - OperationCode::LogicalFOrdNotEqual, // NE - OperationCode::LogicalFOrdGreaterEqual, // GE - OperationCode::LogicalFOrdered, // NUM - OperationCode::LogicalFUnordered, // NAN - OperationCode::LogicalFUnordLessThan, // LTU - OperationCode::LogicalFUnordEqual, // EQU - OperationCode::LogicalFUnordLessEqual, // LEU - OperationCode::LogicalFUnordGreaterThan, // GTU - OperationCode::LogicalFUnordNotEqual, // NEU - OperationCode::LogicalFUnordGreaterEqual, // GEU - }; - const std::size_t index = static_cast<std::size_t>(condition); - ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - - return Operation(comparison_table[index], op_a, op_b); -} - -Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, - std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); -} - -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); -} - -OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static constexpr std::array operation_table{ - OperationCode::LogicalAnd, - OperationCode::LogicalOr, - OperationCode::LogicalXor, - }; - - const auto index = static_cast<std::size_t>(operation); - if (index >= operation_table.size()) { - UNIMPLEMENTED_MSG("Unknown predicate operation."); - return {}; - } - - return operation_table[index]; -} - -Node ShaderIR::GetConditionCode(ConditionCode cc) const { - switch (cc) { - case ConditionCode::NEU: - return GetInternalFlag(InternalFlag::Zero, true); - case ConditionCode::FCSM_TR: - UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); - return MakeNode<PredicateNode>(Pred::NeverExecute, false); - default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); - return MakeNode<PredicateNode>(Pred::NeverExecute, false); - } -} - -void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { - bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); -} - -void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); -} - -void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); -} - -void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { - SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); -} - -void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), - Immediate(offset), Immediate(bits)); -} - -Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), - Immediate(bits)); -} - -void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { - switch (index) { - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - break; - case 1: - uses_layer = true; - break; - case 2: - uses_viewport_index = true; - break; - case 3: - uses_point_size = true; - break; - } - break; - case Attribute::Index::TessCoordInstanceIDVertexID: - switch (element) { - case 2: - uses_instance_id = true; - break; - case 3: - uses_vertex_id = true; - break; - } - break; - case Attribute::Index::ClipDistances0123: - case Attribute::Index::ClipDistances4567: { - const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; - used_clip_distances.at(clip_index) = true; - break; - } - case Attribute::Index::FrontColor: - case Attribute::Index::FrontSecondaryColor: - case Attribute::Index::BackColor: - case Attribute::Index::BackSecondaryColor: - uses_legacy_varyings = true; - break; - default: - if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { - uses_legacy_varyings = true; - } - break; - } -} - -std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const auto id = amend_code.size(); - amend_code.push_back(std::move(new_amend)); - return id; -} - -u32 ShaderIR::NewCustomVariable() { - return num_custom_variables++; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <list> -#include <map> -#include <optional> -#include <set> -#include <tuple> -#include <vector> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct ShaderBlock; - -constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; - -struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) - : max_offset{max_offset_}, is_indirect{is_indirect_} {} - - constexpr ConstBuffer() = default; - - void MarkAsUsed(u64 offset) { - max_offset = std::max(max_offset, static_cast<u32>(offset)); - } - - void MarkAsUsedIndirect() { - is_indirect = true; - } - - bool IsIndirect() const { - return is_indirect; - } - - u32 GetSize() const { - return max_offset + static_cast<u32>(sizeof(float)); - } - - u32 GetMaxOffset() const { - return max_offset; - } - -private: - u32 max_offset = 0; - bool is_indirect = false; -}; - -struct GlobalMemoryUsage { - bool is_read{}; - bool is_written{}; -}; - -class ShaderIR final { -public: - explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, - CompilerSettings settings_, Registry& registry_); - ~ShaderIR(); - - const std::map<u32, NodeBlock>& GetBasicBlocks() const { - return basic_blocks; - } - - const std::set<u32>& GetRegisters() const { - return used_registers; - } - - const std::set<Tegra::Shader::Pred>& GetPredicates() const { - return used_predicates; - } - - const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { - return used_input_attributes; - } - - const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { - return used_output_attributes; - } - - const std::map<u32, ConstBuffer>& GetConstantBuffers() const { - return used_cbufs; - } - - const std::list<SamplerEntry>& GetSamplers() const { - return used_samplers; - } - - const std::list<ImageEntry>& GetImages() const { - return used_images; - } - - const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() - const { - return used_clip_distances; - } - - const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { - return used_global_memory; - } - - std::size_t GetLength() const { - return static_cast<std::size_t>(coverage_end * sizeof(u64)); - } - - bool UsesLayer() const { - return uses_layer; - } - - bool UsesViewportIndex() const { - return uses_viewport_index; - } - - bool UsesPointSize() const { - return uses_point_size; - } - - bool UsesInstanceId() const { - return uses_instance_id; - } - - bool UsesVertexId() const { - return uses_vertex_id; - } - - bool UsesLegacyVaryings() const { - return uses_legacy_varyings; - } - - bool UsesYNegate() const { - return uses_y_negate; - } - - bool UsesWarps() const { - return uses_warps; - } - - bool HasPhysicalAttributes() const { - return uses_physical_attributes; - } - - const Tegra::Shader::Header& GetHeader() const { - return header; - } - - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - u32 ConvertAddressToNvidiaSpace(u32 address) const { - return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); - } - - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; - - const Node& GetAmendNode(std::size_t index) const { - return amend_code[index]; - } - - u32 GetNumCustomVariables() const { - return num_custom_variables; - } - -private: - friend class ASTDecoder; - - struct SamplerInfo { - std::optional<Tegra::Shader::TextureType> type; - std::optional<bool> is_array; - std::optional<bool> is_shadow; - std::optional<bool> is_buffer; - - constexpr bool IsComplete() const noexcept { - return type && is_array && is_shadow && is_buffer; - } - }; - - void Decode(); - void PostDecode(); - - NodeBlock DecodeRange(u32 begin, u32 end); - void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); - void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); - - /** - * Decodes a single instruction from Tegra to IR. - * @param bb Basic block where the nodes will be written to. - * @param pc Program counter. Offset to decode. - * @return Next address to decode. - */ - u32 DecodeInstr(NodeBlock& bb, u32 pc); - - u32 DecodeArithmetic(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); - u32 DecodeBfe(NodeBlock& bb, u32 pc); - u32 DecodeBfi(NodeBlock& bb, u32 pc); - u32 DecodeShift(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); - u32 DecodeFfma(NodeBlock& bb, u32 pc); - u32 DecodeHfma2(NodeBlock& bb, u32 pc); - u32 DecodeConversion(NodeBlock& bb, u32 pc); - u32 DecodeWarp(NodeBlock& bb, u32 pc); - u32 DecodeMemory(NodeBlock& bb, u32 pc); - u32 DecodeTexture(NodeBlock& bb, u32 pc); - u32 DecodeImage(NodeBlock& bb, u32 pc); - u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeFloatSet(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); - u32 DecodeHalfSet(NodeBlock& bb, u32 pc); - u32 DecodeVideo(NodeBlock& bb, u32 pc); - u32 DecodeXmad(NodeBlock& bb, u32 pc); - u32 DecodeOther(NodeBlock& bb, u32 pc); - - /// Generates a node for a passed register. - Node GetRegister(Tegra::Shader::Register reg); - /// Generates a node for a custom variable - Node GetCustomVariable(u32 id); - /// Generates a node representing a 19-bit immediate value - Node GetImmediate19(Tegra::Shader::Instruction instr); - /// Generates a node representing a 32-bit immediate value - Node GetImmediate32(Tegra::Shader::Instruction instr); - /// Generates a node representing a constant buffer - Node GetConstBuffer(u64 index, u64 offset); - /// Generates a node representing a constant buffer with a variadic offset - Node GetConstBufferIndirect(u64 index, u64 offset, Node node); - /// Generates a node for a passed predicate. It can be optionally negated - Node GetPredicate(u64 pred, bool negated = false); - /// Generates a predicate node for an immediate true or false value - Node GetPredicate(bool immediate); - /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); - /// Generates a node representing a physical input attribute. - Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); - /// Generates a node representing an output attribute. Keeps track of used attributes. - Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); - /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false) const; - /// Generates a node representing a local memory address - Node GetLocalMemory(Node address); - /// Generates a node representing a shared memory address - Node GetSharedMemory(Node address); - /// Generates a temporary, internally it uses a post-RZ register - Node GetTemporary(u32 id); - - /// Sets a register. src value must be a number-evaluated node. - void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); - /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(NodeBlock& bb, u64 dest, Node src); - /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address with a value. - void SetLocalMemory(NodeBlock& bb, Node address, Node value); - /// Sets a shared memory address with a value. - void SetSharedMemory(NodeBlock& bb, Node address, Node value); - /// Sets a temporary. Internally it uses a post-RZ register - void SetTemporary(NodeBlock& bb, u32 id, Node value); - - /// Sets internal flags from a float - void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); - /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); - - /// Conditionally absolute/negated float. Absolute is applied first - Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); - /// Conditionally saturates a float - Node GetSaturatedFloat(Node value, bool saturate = true); - - /// Converts an integer to different sizes. - Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); - /// Conditionally absolute/negated integer. Absolute is applied first - Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); - - /// Unpacks a half immediate from an instruction - Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); - /// Unpacks a binary value into a half float pair with a type format - Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); - /// Merges a half pair into another value - Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); - /// Conditionally absolute/negated half float pair. Absolute is applied first - Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); - /// Conditionally saturates a half float pair - Node GetSaturatedHalfFloat(Node value, bool saturate = true); - - /// Get image component value by type and size - std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, - u32 component_size, Node original_value); - - /// Returns a predicate comparing two floats - Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - /// Returns a predicate comparing two integers - Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, - Node op_a, Node op_b); - /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - - /// Returns a predicate combiner operation - OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - - /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, - std::optional<Tegra::Engines::SamplerDescriptor> sampler); - - /// Accesses a texture sampler. - std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); - - /// Accesses a texture sampler for a bindless texture. - std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); - - /// Accesses an image. - ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); - - /// Access a bindless image sampler. - ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - - /// Extracts a sequence of bits from a node - Node BitfieldExtract(Node value, u32 offset, u32 bits); - - /// Inserts a sequence of bits from a node - Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); - - /// Marks the usage of a input or output attribute. - void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); - - /// Decodes VMNMX instruction and inserts its code into the passed basic block. - void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); - - void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); - - void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi, - std::optional<Tegra::Shader::Register> bindless_reg); - - Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); - - Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, - bool is_bindless); - - Node4 GetTldCode(Tegra::Shader::Instruction instr); - - Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool is_array); - - std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( - Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, - bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); - - std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, - std::optional<Tegra::Shader::Register> bindless_reg); - - Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, - u64 byte_height); - - void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, - Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, - Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut, bool sets_cc); - - std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); - - std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, - const OperationNode& operation, - Node gpr, Node base_offset, - Node tracked, const NodeBlock& code, - s64 cursor); - - std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const; - - std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, - Tegra::Shader::Instruction instr, - bool is_read, bool is_write); - - /// Register new amending code and obtain the reference id. - std::size_t DeclareAmend(Node new_amend); - - u32 NewCustomVariable(); - - const ProgramCode& program_code; - const u32 main_offset; - const CompilerSettings settings; - Registry& registry; - - bool decompiled{}; - bool disable_flow_stack{}; - - u32 coverage_begin{}; - u32 coverage_end{}; - - std::map<u32, NodeBlock> basic_blocks; - NodeBlock global_code; - ASTManager program_manager{true, true}; - std::vector<Node> amend_code; - u32 num_custom_variables{}; - - std::set<u32> used_registers; - std::set<Tegra::Shader::Pred> used_predicates; - std::set<Tegra::Shader::Attribute::Index> used_input_attributes; - std::set<Tegra::Shader::Attribute::Index> used_output_attributes; - std::map<u32, ConstBuffer> used_cbufs; - std::list<SamplerEntry> used_samplers; - std::list<ImageEntry> used_images; - std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; - bool uses_layer{}; - bool uses_viewport_index{}; - bool uses_point_size{}; - bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes - bool uses_instance_id{}; - bool uses_vertex_id{}; - bool uses_legacy_varyings{}; - bool uses_y_negate{}; - bool uses_warps{}; - bool uses_indexed_samplers{}; - - Tegra::Shader::Header header; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <utility> -#include <variant> - -#include "common/common_types.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, - OperationCode operation_code) { - for (; cursor >= 0; --cursor) { - Node node = code.at(cursor); - - if (const auto operation = std::get_if<OperationNode>(&*node)) { - if (operation->GetCode() == operation_code) { - return {std::move(node), cursor}; - } - } - - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - const auto& conditional_code = conditional->GetCode(); - auto result = FindOperation( - conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); - auto& found = result.first; - if (found) { - return {std::move(found), cursor}; - } - } - } - return {}; -} - -std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { - if (operation.GetCode() != OperationCode::UAdd) { - return std::nullopt; - } - Node gpr; - Node offset; - ASSERT(operation.GetOperandsCount() == 2); - for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { - Node operand = operation[i]; - if (std::holds_alternative<ImmediateNode>(*operand)) { - offset = operation[i]; - } else if (std::holds_alternative<GprNode>(*operand)) { - gpr = operation[i]; - } - } - if (offset && gpr) { - return std::make_pair(gpr, offset); - } - return std::nullopt; -} - -bool AmendNodeCv(std::size_t amend_index, Node node) { - if (const auto operation = std::get_if<OperationNode>(&*node)) { - operation->SetAmendIndex(amend_index); - return true; - } - if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { - conditional->SetAmendIndex(amend_index); - return true; - } - return false; -} - -} // Anonymous namespace - -std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { - if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { - const u32 cbuf_index = cbuf->GetIndex(); - - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); - return {tracked, track}; - } - if (const auto operation = std::get_if<OperationNode>(&*offset)) { - const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf_index) { - return {}; - } - if (const std::optional pair = DecoupleIndirectRead(*operation)) { - auto [gpr, base_offset] = *pair; - return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, - code, cursor); - } - } - return {}; - } - if (const auto gpr = std::get_if<GprNode>(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackBindlessSampler(source, code, new_cursor); - } - if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - const OperationNode& op = *operation; - - const OperationCode opcode = operation->GetCode(); - if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { - ASSERT(op.GetOperandsCount() == 2); - auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); - auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); - if (node_a && node_b) { - auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, - std::pair{offset_a, offset_b}); - return {tracked, std::move(track)}; - } - } - std::size_t i = op.GetOperandsCount(); - while (i--) { - if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { - // Constant buffer found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackBindlessSampler(tracked, conditional_code, - static_cast<s64>(conditional_code.size())); - } - return {}; -} - -std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( - const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, - const NodeBlock& code, s64 cursor) { - const auto offset_imm = std::get<ImmediateNode>(*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); - Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); - - Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - - // TODO: Implement bindless index custom variable - auto track = - MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); - return {tracked, track}; -} - -std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, - s64 cursor) const { - if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - return {tracked, cbuf->GetIndex(), immediate->GetValue()}; - } - return {}; - } - if (const auto gpr = std::get_if<GprNode>(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackCbuf(source, code, new_cursor); - } - if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { - // Cbuf found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); - } - return {}; -} - -std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register - // that it uses as operand - const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); - const auto& found = result.first; - if (!found) { - return std::nullopt; - } - if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { - return immediate->GetValue(); - } - return std::nullopt; -} - -std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const { - for (; cursor >= 0; --cursor) { - const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); - if (!found_node) { - return {}; - } - const auto operation = std::get_if<OperationNode>(&*found_node); - ASSERT(operation); - - const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if<GprNode>(&*target)) { - if (gpr_target->GetIndex() == tracked->GetIndex()) { - return {(*operation)[1], new_cursor}; - } - } - } - return {}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <unordered_map> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/transform_feedback.h" - -namespace VideoCommon::Shader { - -namespace { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 - -/// Attribute offsets that describe a vector -constexpr std::array VECTORS = { - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] -}; -} // namespace - -std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { - - std::unordered_map<u8, VaryingTFB> tfb; - - for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = info.tfb_varying_locs[buffer]; - const auto& layout = info.tfb_layouts[buffer]; - const std::size_t varying_count = layout.varying_count; - - std::size_t highest = 0; - - for (std::size_t offset = 0; offset < varying_count; ++offset) { - const std::size_t base_offset = offset; - const u8 location = locations[offset]; - - VaryingTFB varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * sizeof(u32); - varying.components = 1; - - if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - - [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; - UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); - - highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); - } - - UNIMPLEMENTED_IF(highest != layout.stride); - } - return tfb; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <unordered_map> - -#include "common/common_types.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct VaryingTFB { - std::size_t buffer; - std::size_t stride; - std::size_t offset; - std::size_t components; -}; - -std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); - -} // namespace VideoCommon::Shader |