diff options
Diffstat (limited to 'src/shader_recompiler/frontend')
8 files changed, 277 insertions, 16 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { Inst(Opcode::WriteGlobal128, address, vector); } +U32 IREmitter::LoadLocal(const IR::U32& word_offset) { + return Inst<U32>(Opcode::LoadLocal, word_offset); +} + +void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { + Inst(Opcode::WriteLocal, word_offset, value); +} + +Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { + switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); + case 32: + return Inst(Opcode::LoadSharedU32, offset); + case 64: + return Inst(Opcode::LoadSharedU64, offset); + case 128: + return Inst(Opcode::LoadSharedU128, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size); +} + +void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { + switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + case 128: + Inst(Opcode::WriteSharedU128, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + } +} + U1 IREmitter::GetZeroFromOp(const Value& op) { return Inst<U1>(Opcode::GetZeroFromOp, op); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,12 @@ public: void WriteGlobal64(const U64& address, const IR::Value& vector); void WriteGlobal128(const U64& address, const IR::Value& vector); + [[nodiscard]] U32 LoadLocal(const U32& word_offset); + void WriteLocal(const U32& word_offset, const U32& value); + + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const U32& offset, const Value& value); + [[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteStorage32: case Opcode::WriteStorage64: case Opcode::WriteStorage128: + case Opcode::WriteLocal: + case Opcode::WriteSharedU8: + case Opcode::WriteSharedU16: + case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: + case Opcode::WriteSharedU128: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +// Local memory operations +OPCODE(LoadLocal, U32, U32, ) +OPCODE(WriteLocal, Void, U32, U32, ) + +// Shared memory operations +OPCODE(LoadSharedU8, U32, U32, ) +OPCODE(LoadSharedS8, U32, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) +OPCODE(LoadSharedS16, U32, U32, ) +OPCODE(LoadSharedU32, U32, U32, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(LoadSharedU128, U32x4, U32, ) +OPCODE(WriteSharedU8, Void, U32, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) +OPCODE(WriteSharedU32, Void, U32, U32, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) +OPCODE(WriteSharedU128, Void, U32, U32x4, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -21,6 +21,8 @@ struct Program { Info info; Stage stage{}; std::array<u32, 3> workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.blocks = VisitAST(inst_pool, block_pool, env, cfg); program.post_order_blocks = PostOrder(program.blocks); program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); if (program.stage == Stage::Compute) { program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); } RemoveUnreachableBlocks(program); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..68963c8ea --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -0,0 +1,197 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +IR::U32 Offset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); + } else { + const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair<int, bool> GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + const Size nnn = encoding.size; + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, ir.LoadLocal(word_offset)); + for (int i = 1; i < bit_size / 32; ++i) { + X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, bit_size / 32)) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDL(u64) { - ThrowNotImplemented(Opcode::LDL); -} - -void TranslatorVisitor::LDS(u64) { - ThrowNotImplemented(Opcode::LDS); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { ThrowNotImplemented(Opcode::ST); } -void TranslatorVisitor::STL(u64) { - ThrowNotImplemented(Opcode::STL); -} - void TranslatorVisitor::STP(u64) { ThrowNotImplemented(Opcode::STP); } -void TranslatorVisitor::STS(u64) { - ThrowNotImplemented(Opcode::STS); -} - void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } |