summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2021-07-25 20:39:04 +0200
committerGitHub <noreply@github.com>2021-07-25 20:39:04 +0200
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.bz2
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.lz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.zst
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm_image.cpp')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp850
1 files changed, 850 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
new file mode 100644
index 000000000..09e3a9b82
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -0,0 +1,850 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "shader_recompiler/backend/glasm/emit_context.h"
+#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/ir/value.h"
+
+namespace Shader::Backend::GLASM {
+namespace {
+struct ScopedRegister {
+ ScopedRegister() = default;
+ ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{&reg_alloc_}, reg{reg_alloc->AllocReg()} {}
+
+ ~ScopedRegister() {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ }
+
+ ScopedRegister& operator=(ScopedRegister&& rhs) noexcept {
+ if (reg_alloc) {
+ reg_alloc->FreeReg(reg);
+ }
+ reg_alloc = std::exchange(rhs.reg_alloc, nullptr);
+ reg = rhs.reg;
+ return *this;
+ }
+
+ ScopedRegister(ScopedRegister&& rhs) noexcept
+ : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {}
+
+ ScopedRegister& operator=(const ScopedRegister&) = delete;
+ ScopedRegister(const ScopedRegister&) = delete;
+
+ RegAlloc* reg_alloc{};
+ Register reg;
+};
+
+std::string Texture(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
+ [[maybe_unused]] const IR::Value& index) {
+ // FIXME: indexed reads
+ if (info.type == TextureType::Buffer) {
+ return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index));
+ } else {
+ return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index));
+ }
+}
+
+std::string_view TextureType(IR::TextureInstInfo info) {
+ if (info.is_depth) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "SHADOW1D";
+ case TextureType::ColorArray1D:
+ return "SHADOWARRAY1D";
+ case TextureType::Color2D:
+ return "SHADOW2D";
+ case TextureType::ColorArray2D:
+ return "SHADOWARRAY2D";
+ case TextureType::Color3D:
+ return "SHADOW3D";
+ case TextureType::ColorCube:
+ return "SHADOWCUBE";
+ case TextureType::ColorArrayCube:
+ return "SHADOWARRAYCUBE";
+ case TextureType::Buffer:
+ return "SHADOWBUFFER";
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ return "1D";
+ case TextureType::ColorArray1D:
+ return "ARRAY1D";
+ case TextureType::Color2D:
+ return "2D";
+ case TextureType::ColorArray2D:
+ return "ARRAY2D";
+ case TextureType::Color3D:
+ return "3D";
+ case TextureType::ColorCube:
+ return "CUBE";
+ case TextureType::ColorArrayCube:
+ return "ARRAYCUBE";
+ case TextureType::Buffer:
+ return "BUFFER";
+ }
+ }
+ throw InvalidArgument("Invalid texture type {}", info.type.Value());
+}
+
+std::string Offset(EmitContext& ctx, const IR::Value& offset) {
+ if (offset.IsEmpty()) {
+ return "";
+ }
+ return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)});
+}
+
+std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx,
+ const IR::Value& offset2) {
+ if (offset2.IsEmpty()) {
+ return {};
+ } else {
+ return {ctx.reg_alloc, ctx.reg_alloc};
+ }
+}
+
+void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1,
+ const IR::Value& offset2) {
+ const Register offsets_a{ctx.reg_alloc.Consume(offset1)};
+ const Register offsets_b{ctx.reg_alloc.Consume(offset2)};
+ // Input swizzle: [XYXY] [XYXY]
+ // Output swizzle: [XXXX] [YYYY]
+ ctx.Add("MOV {}.x,{}.x;"
+ "MOV {}.y,{}.z;"
+ "MOV {}.z,{}.x;"
+ "MOV {}.w,{}.z;"
+ "MOV {}.x,{}.y;"
+ "MOV {}.y,{}.w;"
+ "MOV {}.z,{}.y;"
+ "MOV {}.w,{}.w;",
+ off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y,
+ offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b);
+}
+
+std::string GradOffset(const IR::Value& offset) {
+ if (offset.IsImmediate()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate");
+ return "";
+ }
+ IR::Inst* const vector{offset.InstRecursive()};
+ if (!vector->AreAllArgsImmediates()) {
+ LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate");
+ return "";
+ }
+ switch (vector->NumArgs()) {
+ case 1:
+ return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32()));
+ case 2:
+ return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()),
+ static_cast<s32>(vector->Arg(1).U32()));
+ default:
+ throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs());
+ }
+}
+
+std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) {
+ if (coord.IsImmediate()) {
+ ScopedRegister scoped_reg(ctx.reg_alloc);
+ ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)});
+ return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)};
+ }
+ std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})};
+ if (coord.InstRecursive()->HasUses()) {
+ // Move non-dead coords to a separate register, although this should never happen because
+ // vectors are only assembled for immediate texture instructions
+ ctx.Add("MOV.F RC,{};", coord_vec);
+ coord_vec = "RC";
+ }
+ return {std::move(coord_vec), ScopedRegister{}};
+}
+
+void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
+ if (!sparse_inst) {
+ return;
+ }
+ const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)};
+ ctx.Add("MOV.S {},-1;"
+ "MOV.S {}(NONRESIDENT),0;",
+ sparse_ret, sparse_ret);
+}
+
+std::string_view FormatStorage(ImageFormat format) {
+ switch (format) {
+ case ImageFormat::Typeless:
+ return "U";
+ case ImageFormat::R8_UINT:
+ return "U8";
+ case ImageFormat::R8_SINT:
+ return "S8";
+ case ImageFormat::R16_UINT:
+ return "U16";
+ case ImageFormat::R16_SINT:
+ return "S16";
+ case ImageFormat::R32_UINT:
+ return "U32";
+ case ImageFormat::R32G32_UINT:
+ return "U32X2";
+ case ImageFormat::R32G32B32A32_UINT:
+ return "U32X4";
+ }
+ throw InvalidArgument("Invalid image format {}", format);
+}
+
+template <typename T>
+void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value,
+ std::string_view op) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
+}
+
+IR::Inst* PrepareSparse(IR::Inst& inst) {
+ const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
+ if (sparse_inst) {
+ sparse_inst->Invalidate();
+ }
+ return sparse_inst;
+}
+} // Anonymous namespace
+
+void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ if (info.has_lod_clamp) {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ }
+ } else {
+ if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec,
+ bias_lc, texture, offset_vec);
+ } else {
+ ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::ColorArrayCube) {
+ ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture,
+ offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& bias_lc, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.has_bias) {
+ if (info.has_lod_clamp) {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ bias_lc_vec, texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {} with bias and lod clamp",
+ info.type.Value());
+ }
+ } else {
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{}.x;"
+ "TXB.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec,
+ texture, type, offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXB.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{}.x;"
+ "TXB.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ }
+ } else {
+ if (info.has_lod_clamp) {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TEX.F.LODCLAMP{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ }
+ } else {
+ if (info.type != TextureType::ColorArrayCube) {
+ const bool w_swizzle{info.type == TextureType::ColorArray2D ||
+ info.type == TextureType::ColorCube};
+ const char dref_swizzle{w_swizzle ? 'w' : 'z'};
+ ctx.Add("MOV.F {}.{},{};"
+ "TEX.F{} {},{},{},{}{};",
+ coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture,
+ type, offset_vec);
+ } else {
+ ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture,
+ type, offset_vec);
+ }
+ }
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& dref,
+ const IR::Value& lod, const IR::Value& offset) {
+ // Allocate early to avoid aliases
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister staging;
+ if (info.type == TextureType::ColorArrayCube) {
+ staging = ScopedRegister{ctx.reg_alloc};
+ }
+ const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
+ const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ switch (info.type) {
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};"
+ "MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{}{};",
+ coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type,
+ offset_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ ctx.Add("MOV.F {}.x,{};"
+ "MOV.F {}.y,{};"
+ "TXL.F{} {},{},{},{},{}{};",
+ staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec,
+ staging.reg, texture, type, offset_vec);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) {
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const char comp{"xyzw"[info.gather_component]};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type,
+ offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg,
+ texture, comp, type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2,
+ const IR::Value& dref) {
+ // FIXME: This instruction is not working as expected
+
+ // Allocate offsets early so they don't overwrite any consumed register
+ const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ std::string args;
+ switch (info.type) {
+ case TextureType::Color2D:
+ ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArray2D:
+ case TextureType::ColorCube:
+ ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value);
+ args = fmt::to_string(coord_vec);
+ break;
+ case TextureType::ColorArrayCube:
+ args = fmt::format("{},{}", coord_vec, dref_value);
+ break;
+ default:
+ throw NotImplementedException("Invalid type {}", info.type.Value());
+ }
+ if (offset2.IsEmpty()) {
+ const std::string offset_vec{Offset(ctx, offset)};
+ ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec);
+ } else {
+ SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2);
+ ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture,
+ type);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{Offset(ctx, offset)};
+ const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (info.type == TextureType::Buffer) {
+ ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else if (ms.type != Type::Void) {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXFMS.F{} {},{},{},{}{};",
+ coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ } else {
+ ctx.Add("MOV.S {}.w,{};"
+ "TXF.F{} {},{},{},{}{};",
+ coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ ScalarS32 lod) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
+}
+
+void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string_view type{TextureType(info)};
+ ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type);
+}
+
+void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ const IR::Value& coord, const IR::Value& derivatives,
+ const IR::Value& offset, const IR::Value& lod_clamp) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ ScopedRegister dpdx, dpdy;
+ const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
+ if (multi_component) {
+ // Allocate this early to avoid aliasing other registers
+ dpdx = ScopedRegister{ctx.reg_alloc};
+ dpdy = ScopedRegister{ctx.reg_alloc};
+ }
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string texture{Texture(ctx, info, index)};
+ const std::string offset_vec{GradOffset(offset)};
+ const Register coord_vec{ctx.reg_alloc.Consume(coord)};
+ const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ if (multi_component) {
+ ctx.Add("MOV.F {}.x,{}.x;"
+ "MOV.F {}.y,{}.z;"
+ "MOV.F {}.x,{}.y;"
+ "MOV.F {}.y,{}.w;",
+ dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
+ dpdy.reg, derivatives_vec);
+ if (info.has_lod_clamp) {
+ const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
+ ctx.Add("MOV.F {}.w,{};"
+ "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
+ dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ } else {
+ ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
+ texture, type, offset_vec);
+ }
+ } else {
+ ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
+ derivatives_vec, texture, type, offset_vec);
+ }
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const auto sparse_inst{PrepareSparse(inst)};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ const Register ret{ctx.reg_alloc.Define(inst)};
+ ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type);
+ StoreSparse(ctx, sparse_inst);
+}
+
+void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ Register color) {
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const std::string_view format{FormatStorage(info.image_format)};
+ const std::string_view type{TextureType(info)};
+ const std::string image{Image(ctx, info, index)};
+ ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type);
+}
+
+void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "ADD.U32");
+}
+
+void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.S32");
+}
+
+void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MIN.U32");
+}
+
+void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarS32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.S32");
+}
+
+void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "MAX.U32");
+}
+
+void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32");
+}
+
+void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32");
+}
+
+void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "AND.U32");
+}
+
+void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "OR.U32");
+}
+
+void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord,
+ ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "XOR.U32");
+}
+
+void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
+ Register coord, ScalarU32 value) {
+ ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32");
+}
+
+void EmitBindlessImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGather(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGatherDref(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageFetch(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryDimensions(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageQueryLod(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageGradient(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageRead(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageWrite(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBindlessImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicIAdd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMin32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicSMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicUMax32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicInc32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicDec32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicAnd32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicOr32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicXor32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+void EmitBoundImageAtomicExchange32(EmitContext&) {
+ throw LogicError("Unreachable instruction");
+}
+
+} // namespace Shader::Backend::GLASM