diff options
author | FernandoS27 <fsahmkow27@gmail.com> | 2021-04-17 11:56:45 +0200 |
---|---|---|
committer | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-07-23 03:51:28 +0200 |
commit | 04c459fc8d99b41fa8a03c49523599e9bf797f9d (patch) | |
tree | 199934b3ef85a5affbe9ef115e9b0a1085851b36 /src/shader_recompiler/ir_opt | |
parent | shader: Implement SR_Y_DIRECTION (diff) | |
download | yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.gz yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.bz2 yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.lz yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.xz yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.tar.zst yuzu-04c459fc8d99b41fa8a03c49523599e9bf797f9d.zip |
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r-- | src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 4 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 67 |
2 files changed, 71 insertions, 0 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60b7d3a36..e5688667b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -530,6 +530,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::DPdxFine: + case IR::Opcode::DPdyFine: + info.uses_derivates = true; + break; case IR::Opcode::LoadStorageU8: case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ee73b5b60..983fb20ab 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -412,6 +412,71 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser inst.ReplaceUsesWith(*result); } +IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { + if (value.IsImmediate()) { + return value; + } + IR::Inst* const inst{value.InstRecursive()}; + if (inst->GetOpcode() == expected_cast) { + return inst->Arg(0).Resolve(); + } + return value; +} + +void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { + const IR::Value swizzle{inst.Arg(2)}; + if (!swizzle.IsImmediate()) { + return; + } + + const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; + const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; + + if (value_1.IsImmediate()) { + return; + } + + const u32 swizzle_value{swizzle.U32()}; + if (swizzle_value != 0x99 && swizzle_value != 0xA5) { + return; + } + + IR::Inst* const inst2{value_1.InstRecursive()}; + if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { + return; + } + const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; + if (value_2 != value_3) { + return; + } + + const IR::Value index{inst2->Arg(1)}; + const IR::Value clamp{inst2->Arg(2)}; + const IR::Value segmentation_mask{inst2->Arg(3)}; + + if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { + return; + } + + if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { + return; + } + + if (swizzle_value == 0x99) { + // DPdxFine + if (index.U32() == 1) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); + } + } else if (swizzle_value == 0xA5) { + // DPdyFine + if (index.U32() == 2) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); + } + } +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: @@ -532,6 +597,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::CompositeExtractF16x4: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, IR::Opcode::CompositeInsertF16x4); + case IR::Opcode::FSwizzleAdd: + return FoldFSwizzleAdd(block, inst); default: break; } |