diff options
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_context.cpp | 79 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_context.h | 11 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 58 | ||||
-rw-r--r-- | src/shader_recompiler/frontend/ir/modifiers.h | 17 | ||||
-rw-r--r-- | src/shader_recompiler/frontend/ir/opcodes.inc | 24 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/texture_pass.cpp | 89 | ||||
-rw-r--r-- | src/shader_recompiler/shader_info.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/pipeline_helper.h | 50 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 46 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 63 |
10 files changed, 284 insertions, 157 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 7f16cb0dc..8e625f8fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -380,6 +380,24 @@ Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_p ctx.OpFunctionEnd(); return func; } + +template <typename Desc> +std::string NameOf(const Desc& desc, std::string_view prefix) { + if (desc.count > 1) { + return fmt::format("{}{}_{:02x}x{}", prefix, desc.cbuf_index, desc.cbuf_offset, desc.count); + } else { + return fmt::format("{}{}_{:02x}", prefix, desc.cbuf_index, desc.cbuf_offset); + } +} + +Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { + if (count > 1) { + const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; + return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); + } else { + return pointer_type; + } +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -971,12 +989,15 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - texture_buffers.insert(texture_buffers.end(), desc.count, id); + Name(id, NameOf(desc, "texbuf")); + texture_buffers.push_back({ + .id = id, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } @@ -992,44 +1013,41 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("imgbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - const ImageBufferDefinition def{ + Name(id, NameOf(desc, "imgbuf")); + image_buffers.push_back({ .id = id, .image_type = image_type, - }; - image_buffers.insert(image_buffers.end(), desc.count, def); + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { - if (desc.count != 1) { - throw NotImplementedException("Array of textures"); - } const Id image_type{ImageType(*this, desc)}; const Id sampled_type{TypeSampledImage(image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; + const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - for (u32 index = 0; index < desc.count; ++index) { - // TODO: Pass count info - textures.push_back(TextureDefinition{ - .id{id}, - .sampled_type{sampled_type}, - .image_type{image_type}, - }); - } + Name(id, NameOf(desc, "tex")); + textures.push_back({ + .id = id, + .sampled_type = sampled_type, + .pointer_type = pointer_type, + .image_type = image_type, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } @@ -1037,24 +1055,23 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { images.reserve(info.image_descriptors.size()); for (const ImageDescriptor& desc : info.image_descriptors) { if (desc.count != 1) { - throw NotImplementedException("Array of textures"); + throw NotImplementedException("Array of images"); } const Id image_type{ImageType(*this, desc)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("img{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - for (u32 index = 0; index < desc.count; ++index) { - images.push_back(ImageDefinition{ - .id{id}, - .image_type{image_type}, - }); - } + Name(id, NameOf(desc, "img")); + images.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index a4503c7ab..c52544fb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -32,17 +32,26 @@ private: struct TextureDefinition { Id id; Id sampled_type; + Id pointer_type; Id image_type; + u32 count; +}; + +struct TextureBufferDefinition { + Id id; + u32 count; }; struct ImageBufferDefinition { Id id; Id image_type; + u32 count; }; struct ImageDefinition { Id id; Id image_type; + u32 count; }; struct UniformDefinitions { @@ -162,7 +171,7 @@ public: std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{}; - std::vector<Id> texture_buffers; + std::vector<TextureBufferDefinition> texture_buffers; std::vector<ImageBufferDefinition> image_buffers; std::vector<TextureDefinition> textures; std::vector<ImageDefinition> images; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 90817f161..6008980af 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -147,24 +147,31 @@ private: spv::ImageOperandsMask mask{}; }; -Id Texture(EmitContext& ctx, const IR::Value& index) { - if (index.IsImmediate()) { - const TextureDefinition def{ctx.textures.at(index.U32())}; +Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; + return ctx.OpLoad(def.sampled_type, pointer); + } else { return ctx.OpLoad(def.sampled_type, def.id); } - throw NotImplementedException("Indirect texture sample"); } -Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { - if (!index.IsImmediate()) { - throw NotImplementedException("Indirect texture sample"); - } +Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { if (info.type == TextureType::Buffer) { - const Id sampler_id{ctx.texture_buffers.at(index.U32())}; + const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + const Id sampler_id{def.id}; const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; return ctx.OpImage(ctx.image_buffer_type, id); } else { - const TextureDefinition def{ctx.textures.at(index.U32())}; + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } } @@ -311,7 +318,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& bias_lc, offset); return Emit(&EmitContext::OpImageSparseSampleImplicitLod, &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } else { // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as // if the lod was explicitly zero. This may change on Turing with implicit compute @@ -320,7 +327,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } } @@ -329,8 +336,8 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -340,7 +347,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va offset); return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -349,7 +356,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -357,15 +364,17 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, ctx.Const(info.gather_component), + ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), operands.Mask(), operands.Span()); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.Mask(), + operands.Span()); } Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, @@ -376,12 +385,12 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, index, info), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { const auto info{inst->Flags<IR::TextureInstInfo>()}; - const Id image{TextureImage(ctx, index, info)}; + const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { @@ -405,9 +414,10 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i throw LogicError("Unspecified image type {}", info.type.Value()); } -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coords) { +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags<IR::TextureInstInfo>()}; const Id zero{ctx.f32_zero_value}; - const Id sampler{Texture(ctx, index)}; + const Id sampler{Texture(ctx, info, index)}; return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), zero, zero); } @@ -418,8 +428,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 5d7efa14c..77cda1f8a 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -34,14 +34,15 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; - BitField<0, 8, TextureType> type; - BitField<8, 1, u32> is_depth; - BitField<9, 1, u32> has_bias; - BitField<10, 1, u32> has_lod_clamp; - BitField<11, 1, u32> relaxed_precision; - BitField<12, 2, u32> gather_component; - BitField<14, 2, u32> num_derivates; - BitField<16, 3, ImageFormat> image_format; + BitField<0, 16, u32> descriptor_index; + BitField<16, 3, TextureType> type; + BitField<19, 1, u32> is_depth; + BitField<20, 1, u32> has_bias; + BitField<21, 1, u32> has_lod_clamp; + BitField<22, 1, u32> relaxed_precision; + BitField<23, 2, u32> gather_component; + BitField<25, 2, u32> num_derivates; + BitField<27, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b6869d4e4..8f32c9e74 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -482,18 +482,18 @@ OPCODE(BoundImageGradient, F32x4, U32, OPCODE(BoundImageRead, U32x4, U32, Opaque, ) OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) -OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) -OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) -OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageRead, U32x4, U32, Opaque, ) -OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) // Warp operations OPCODE(LaneId, U32, ) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 5ac485522..cfa6b34b9 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <bit> #include <optional> #include <boost/container/small_vector.hpp> @@ -21,6 +22,8 @@ struct ConstBufferAddr { u32 offset; u32 secondary_index; u32 secondary_offset; + IR::U32 dynamic_offset; + u32 count; bool has_secondary; }; @@ -32,6 +35,9 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector<TextureInst, 24>; +constexpr u32 DESCRIPTOR_SIZE = 8; +constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE)); + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -131,6 +137,9 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { if (lhs->has_secondary || rhs->has_secondary) { return std::nullopt; } + if (lhs->count > 1 || rhs->count > 1) { + return std::nullopt; + } if (lhs->index > rhs->index || lhs->offset > rhs->offset) { std::swap(lhs, rhs); } @@ -139,9 +148,12 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { .offset = lhs->offset, .secondary_index = rhs->index, .secondary_offset = rhs->offset, + .dynamic_offset = {}, + .count = 1, .has_secondary = true, }; } + case IR::Opcode::GetCbufU32x2: case IR::Opcode::GetCbufU32: break; } @@ -152,15 +164,39 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { // but not supported here at the moment return std::nullopt; } - if (!offset.IsImmediate()) { - // TODO: Support arrays of textures + if (offset.IsImmediate()) { + return ConstBufferAddr{ + .index = index.U32(), + .offset = offset.U32(), + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, + }; + } + IR::Inst* const offset_inst{offset.InstRecursive()}; + if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { + return std::nullopt; + } + u32 base_offset{}; + IR::U32 dynamic_offset; + if (offset_inst->Arg(0).IsImmediate()) { + base_offset = offset_inst->Arg(0).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(1)}; + } else if (offset_inst->Arg(1).IsImmediate()) { + base_offset = offset_inst->Arg(1).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(0)}; + } else { return std::nullopt; } return ConstBufferAddr{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = base_offset, .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = dynamic_offset, + .count = 8, .has_secondary = false, }; } @@ -179,11 +215,13 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { .offset = inst.Arg(0).U32(), .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, .has_secondary = false, }; } return TextureInst{ - .cbuf{addr}, + .cbuf = addr, .inst = &inst, .block = block, }; @@ -209,18 +247,20 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.has_secondary == existing.has_secondary && - desc.cbuf_index == existing.cbuf_index && + return desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift && + desc.has_secondary == existing.has_secondary; }); } u32 Add(const ImageBufferDescriptor& desc) { return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; }); } @@ -231,7 +271,8 @@ public: desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift; }); } @@ -239,7 +280,8 @@ public: const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; return index; @@ -310,7 +352,6 @@ void TexturePass(Environment& env, IR::Program& program) { // This happens on Fire Emblem: Three Houses flags.type.Assign(TextureType::Buffer); } - inst->SetFlags(flags); break; default: break; @@ -329,7 +370,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(ImageDescriptor{ @@ -338,7 +380,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; @@ -351,7 +394,8 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(TextureDescriptor{ @@ -362,12 +406,23 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; } - inst->SetArg(0, IR::Value{index}); + flags.descriptor_index.Assign(index); + inst->SetFlags(flags); + + if (cbuf.count > 1) { + const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; + IR::IREmitter ir{*texture_inst.block, insert_point}; + const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; + inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); + } else { + inst->SetArg(0, IR::Value{}); + } } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 0f45bdfb6..0f28ae07b 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -67,6 +67,7 @@ struct TextureBufferDescriptor { u32 secondary_cbuf_index; u32 secondary_cbuf_offset; u32 count; + u32 size_shift; }; using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; @@ -76,6 +77,7 @@ struct ImageBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + u32 size_shift; }; using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; @@ -88,6 +90,7 @@ struct TextureDescriptor { u32 secondary_cbuf_index; u32 secondary_cbuf_offset; u32 count; + u32 size_shift; }; using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; @@ -98,6 +101,7 @@ struct ImageDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + u32 size_shift; }; using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index aaf9a735e..dd7d2cc0c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,28 +85,30 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + template <typename Descriptors> + void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { + const size_t num{descriptors.size()}; for (size_t i = 0; i < num; ++i) { bindings.push_back({ .binding = binding, .descriptorType = type, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .stageFlags = stage, .pImmutableSamplers = nullptr, }); entries.push_back({ .dstBinding = binding, .dstArrayElement = 0, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .descriptorType = type, .offset = offset, .stride = sizeof(DescriptorUpdateEntry), @@ -126,21 +128,29 @@ private: inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { - image_view_ids += info.texture_buffer_descriptors.size(); - image_view_ids += info.image_buffer_descriptors.size(); + for (const auto& desc : info.texture_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + image_view_ids += desc.count; + } for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(desc.type)}; - update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + for (u32 index = 0; index < desc.count; ++index) { + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + } } for (const auto& desc : info.image_descriptors) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - if (desc.is_written) { - texture_cache.MarkModification(image_view.image_id); + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); } - const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - update_descriptor_queue.AddImage(vk_image_view); } } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 45d837ca4..6e9f66262 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -91,35 +91,41 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { if (desc.has_secondary) { ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - desc.secondary_cbuf_offset}; + secondary_offset}; const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TextureHandle{lhs_raw | rhs_raw, via_header_index}; } } return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - samplers.push_back(sampler->Handle()); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); @@ -130,16 +136,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) { + is_written = desc.is_written; + } + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++texture_buffer_ids; + ++index; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); - buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format, is_written); - ++texture_buffer_ids; - ++index; }}; std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 08f00b9ce..b7688aef9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -161,23 +161,26 @@ void GraphicsPipeline::Configure(bool is_indexed) { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++index; + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); - const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - desc.secondary_cbuf_offset}; + second_offset}; const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; @@ -187,17 +190,21 @@ void GraphicsPipeline::Configure(bool is_indexed) { return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); } @@ -208,24 +215,30 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v<decltype(desc), + const Shader::ImageBufferDescriptor&>) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++index; + ++texture_buffer_index; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written); - ++index; - ++texture_buffer_index; }}; const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsTextureBuffers(stage); std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - texture_buffer_index += info.texture_descriptors.size(); - texture_buffer_index += info.image_descriptors.size(); + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } buffer_cache.UpdateGraphicsBuffers(is_indexed); |