diff options
author | bunnei <bunneidev@gmail.com> | 2021-07-25 20:39:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-25 20:39:04 +0200 |
commit | 98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch) | |
tree | 816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |
parent | Merge pull request #6699 from lat9nq/common-threads (diff) | |
parent | shader: Support out of bound local memory reads and immediate writes (diff) | |
download | yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.bz2 yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.lz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.zst yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip |
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm_image.cpp')
-rw-r--r-- | src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 850 |
1 files changed, 850 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..09e3a9b82 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -0,0 +1,850 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <utility> + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +struct ScopedRegister { + ScopedRegister() = default; + ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} + + ~ScopedRegister() { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + } + + ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + reg_alloc = std::exchange(rhs.reg_alloc, nullptr); + reg = rhs.reg; + return *this; + } + + ScopedRegister(ScopedRegister&& rhs) noexcept + : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} + + ScopedRegister& operator=(const ScopedRegister&) = delete; + ScopedRegister(const ScopedRegister&) = delete; + + RegAlloc* reg_alloc{}; + Register reg; +}; + +std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); + } +} + +std::string Image(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); + } +} + +std::string_view TextureType(IR::TextureInstInfo info) { + if (info.is_depth) { + switch (info.type) { + case TextureType::Color1D: + return "SHADOW1D"; + case TextureType::ColorArray1D: + return "SHADOWARRAY1D"; + case TextureType::Color2D: + return "SHADOW2D"; + case TextureType::ColorArray2D: + return "SHADOWARRAY2D"; + case TextureType::Color3D: + return "SHADOW3D"; + case TextureType::ColorCube: + return "SHADOWCUBE"; + case TextureType::ColorArrayCube: + return "SHADOWARRAYCUBE"; + case TextureType::Buffer: + return "SHADOWBUFFER"; + } + } else { + switch (info.type) { + case TextureType::Color1D: + return "1D"; + case TextureType::ColorArray1D: + return "ARRAY1D"; + case TextureType::Color2D: + return "2D"; + case TextureType::ColorArray2D: + return "ARRAY2D"; + case TextureType::Color3D: + return "3D"; + case TextureType::ColorCube: + return "CUBE"; + case TextureType::ColorArrayCube: + return "ARRAYCUBE"; + case TextureType::Buffer: + return "BUFFER"; + } + } + throw InvalidArgument("Invalid texture type {}", info.type.Value()); +} + +std::string Offset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return ""; + } + return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); +} + +std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx, + const IR::Value& offset2) { + if (offset2.IsEmpty()) { + return {}; + } else { + return {ctx.reg_alloc, ctx.reg_alloc}; + } +} + +void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, + const IR::Value& offset2) { + const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; + const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; + // Input swizzle: [XYXY] [XYXY] + // Output swizzle: [XXXX] [YYYY] + ctx.Add("MOV {}.x,{}.x;" + "MOV {}.y,{}.z;" + "MOV {}.z,{}.x;" + "MOV {}.w,{}.z;" + "MOV {}.x,{}.y;" + "MOV {}.y,{}.w;" + "MOV {}.z,{}.y;" + "MOV {}.w,{}.w;", + off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, + offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); +} + +std::string GradOffset(const IR::Value& offset) { + if (offset.IsImmediate()) { + LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); + return ""; + } + IR::Inst* const vector{offset.InstRecursive()}; + if (!vector->AreAllArgsImmediates()) { + LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); + return ""; + } + switch (vector->NumArgs()) { + case 1: + return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32())); + case 2: + return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()), + static_cast<s32>(vector->Arg(1).U32())); + default: + throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); + } +} + +std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) { + if (coord.IsImmediate()) { + ScopedRegister scoped_reg(ctx.reg_alloc); + ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); + return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; + } + std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; + if (coord.InstRecursive()->HasUses()) { + // Move non-dead coords to a separate register, although this should never happen because + // vectors are only assembled for immediate texture instructions + ctx.Add("MOV.F RC,{};", coord_vec); + coord_vec = "RC"; + } + return {std::move(coord_vec), ScopedRegister{}}; +} + +void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { + if (!sparse_inst) { + return; + } + const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; + ctx.Add("MOV.S {},-1;" + "MOV.S {}(NONRESIDENT),0;", + sparse_ret, sparse_ret); +} + +std::string_view FormatStorage(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return "U"; + case ImageFormat::R8_UINT: + return "U8"; + case ImageFormat::R8_SINT: + return "S8"; + case ImageFormat::R16_UINT: + return "U16"; + case ImageFormat::R16_SINT: + return "S16"; + case ImageFormat::R32_UINT: + return "U32"; + case ImageFormat::R32G32_UINT: + return "U32X2"; + case ImageFormat::R32G32B32A32_UINT: + return "U32X4"; + } + throw InvalidArgument("Invalid image format {}", format); +} + +template <typename T> +void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, + std::string_view op) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} +} // Anonymous namespace + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, Register bias_lc, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); + } else { + if (info.has_lod_clamp) { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + } + } else { + if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { + ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); + } else { + ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + } + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset) { + // Allocate early to avoid aliases + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.has_lod_clamp) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + bias_lc_vec, texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, + offset_vec); + break; + default: + throw NotImplementedException("Invalid type {} with bias and lod clamp", + info.type.Value()); + } + } else { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{}.x;" + "TXB.F{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + } + } else { + if (info.has_lod_clamp) { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, + texture, type, offset_vec); + } else { + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + } + } else { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F{} {},{},{},{}{};", + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + } else { + ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, + type, offset_vec); + } + } + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset) { + // Allocate early to avoid aliases + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{},{}{};", + coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TXL.F{} {},{},{},{},{}{};", + staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const char comp{"xyzw"[info.gather_component]}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, + offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, + texture, comp, type); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref) { + // FIXME: This instruction is not working as expected + + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + std::string args; + switch (info.type) { + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArrayCube: + args = fmt::format("{},{}", coord_vec, dref_value); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, + type); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::Buffer) { + ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else if (ms.type != Type::Void) { + ctx.Add("MOV.S {}.w,{};" + "TXFMS.F{} {},{},{},{}{};", + coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else { + ctx.Add("MOV.S {}.w,{};" + "TXF.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + ScalarS32 lod) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + ScopedRegister dpdx, dpdy; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + if (multi_component) { + // Allocate this early to avoid aliasing other registers + dpdx = ScopedRegister{ctx.reg_alloc}; + dpdy = ScopedRegister{ctx.reg_alloc}; + } + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{GradOffset(offset)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (multi_component) { + ctx.Add("MOV.F {}.x,{}.x;" + "MOV.F {}.y,{}.z;" + "MOV.F {}.x,{}.y;" + "MOV.F {}.y,{}.w;", + dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, + dpdy.reg, derivatives_vec); + if (info.has_lod_clamp) { + const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; + ctx.Add("MOV.F {}.w,{};" + "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", + dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } else { + ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } + } else { + ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, + derivatives_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const auto sparse_inst{PrepareSparse(inst)}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); + StoreSparse(ctx, sparse_inst); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + Register color) { + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coord, ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::GLASM |