diff options
author | bunnei <bunneidev@gmail.com> | 2018-04-02 16:04:19 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-02 16:04:19 +0200 |
commit | 3413f1f7ceef0d35f0dc867f26c185643829918b (patch) | |
tree | f16ce5af90c2872ed2f0c709f520d98f66f416d1 | |
parent | Merge pull request #293 from N00byKing/drkthm (diff) | |
parent | GPU: Use the MacroInterpreter class to execute the GPU macros instead of HLEing them. (diff) | |
download | yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.tar yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.tar.gz yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.tar.bz2 yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.tar.lz yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.tar.xz yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.tar.zst yuzu-3413f1f7ceef0d35f0dc867f26c185643829918b.zip |
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 113 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 29 | ||||
-rw-r--r-- | src/video_core/macro_interpreter.cpp | 257 | ||||
-rw-r--r-- | src/video_core/macro_interpreter.h | 164 |
5 files changed, 444 insertions, 121 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 841f27d7f..a710c4bc5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -11,6 +11,8 @@ add_library(video_core STATIC engines/maxwell_compute.h gpu.cpp gpu.h + macro_interpreter.cpp + macro_interpreter.h memory_manager.cpp memory_manager.h rasterizer_interface.h diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5359d21a2..124753032 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -19,35 +19,21 @@ namespace Engines { /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; -const std::unordered_map<u32, Maxwell3D::MethodInfo> Maxwell3D::method_handlers = { - {0xE1A, {"BindTextureInfoBuffer", 1, &Maxwell3D::BindTextureInfoBuffer}}, - {0xE24, {"SetShader", 5, &Maxwell3D::SetShader}}, - {0xE2A, {"BindStorageBuffer", 1, &Maxwell3D::BindStorageBuffer}}, -}; - -Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} +Maxwell3D::Maxwell3D(MemoryManager& memory_manager) + : memory_manager(memory_manager), macro_interpreter(*this) {} void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) { uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code); } -void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) { - // TODO(Subv): Write an interpreter for the macros uploaded via registers 0x45 and 0x47 - +void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { + auto macro_code = uploaded_macros.find(method); // The requested macro must have been uploaded already. - ASSERT_MSG(uploaded_macros.find(method) != uploaded_macros.end(), "Macro %08X was not uploaded", - method); - - auto itr = method_handlers.find(method); - ASSERT_MSG(itr != method_handlers.end(), "Unhandled method call %08X", method); - - ASSERT(itr->second.arguments == parameters.size()); - - (this->*itr->second.handler)(parameters); + ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); - // Reset the current macro and its parameters. + // Reset the current macro and execute it. executing_macro = 0; - macro_params.clear(); + macro_interpreter.Execute(macro_code->second, std::move(parameters)); } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { @@ -77,7 +63,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { // Call the macro when there are no more parameters in the command buffer if (remaining_params == 0) { - CallMacroMethod(executing_macro, macro_params); + CallMacroMethod(executing_macro, std::move(macro_params)); } return; } @@ -193,84 +179,6 @@ void Maxwell3D::DrawArrays() { VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/); } -void Maxwell3D::BindTextureInfoBuffer(const std::vector<u32>& parameters) { - /** - * Parameters description: - * [0] = Shader stage, usually 4 for FragmentShader - */ - - u32 stage = parameters[0]; - - // Perform the same operations as the real macro code. - GPUVAddr address = static_cast<GPUVAddr>(regs.tex_info_buffers.address[stage]) << 8; - u32 size = regs.tex_info_buffers.size[stage]; - - regs.const_buffer.cb_size = size; - regs.const_buffer.cb_address_high = address >> 32; - regs.const_buffer.cb_address_low = address & 0xFFFFFFFF; -} - -void Maxwell3D::SetShader(const std::vector<u32>& parameters) { - /** - * Parameters description: - * [0] = Shader Program. - * [1] = Unknown, presumably the shader id. - * [2] = Offset to the start of the shader, after the 0x30 bytes header. - * [3] = Shader Stage. - * [4] = Const Buffer Address >> 8. - */ - auto shader_program = static_cast<Regs::ShaderProgram>(parameters[0]); - // TODO(Subv): This address is probably an offset from the CODE_ADDRESS register. - GPUVAddr address = parameters[2]; - auto shader_stage = static_cast<Regs::ShaderStage>(parameters[3]); - GPUVAddr cb_address = parameters[4] << 8; - - auto& shader = state.shader_programs[static_cast<size_t>(shader_program)]; - shader.program = shader_program; - shader.stage = shader_stage; - shader.address = address; - - // Perform the same operations as the real macro code. - // TODO(Subv): Early exit if register 0xD1C + shader_program contains the same as params[1]. - auto& shader_regs = regs.shader_config[static_cast<size_t>(shader_program)]; - shader_regs.start_id = address; - // TODO(Subv): Write params[1] to register 0xD1C + shader_program. - // TODO(Subv): Write params[2] to register 0xD22 + shader_program. - - // Note: This value is hardcoded in the macro's code. - static constexpr u32 DefaultCBSize = 0x10000; - regs.const_buffer.cb_size = DefaultCBSize; - regs.const_buffer.cb_address_high = cb_address >> 32; - regs.const_buffer.cb_address_low = cb_address & 0xFFFFFFFF; - - // Write a hardcoded 0x11 to CB_BIND, this binds the current const buffer to buffer c1[] in the - // shader. It's likely that these are the constants for the shader. - regs.cb_bind[static_cast<size_t>(shader_stage)].valid.Assign(1); - regs.cb_bind[static_cast<size_t>(shader_stage)].index.Assign(1); - - ProcessCBBind(shader_stage); -} - -void Maxwell3D::BindStorageBuffer(const std::vector<u32>& parameters) { - /** - * Parameters description: - * [0] = Buffer offset >> 2 - */ - - u32 buffer_offset = parameters[0] << 2; - - // Perform the same operations as the real macro code. - // Note: This value is hardcoded in the macro's code. - static constexpr u32 DefaultCBSize = 0x5F00; - regs.const_buffer.cb_size = DefaultCBSize; - - GPUVAddr address = regs.ssbo_info.BufferAddress(); - regs.const_buffer.cb_address_high = address >> 32; - regs.const_buffer.cb_address_low = address & 0xFFFFFFFF; - - regs.const_buffer.cb_pos = buffer_offset; -} - void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. auto& shader = state.shader_stages[static_cast<size_t>(stage)]; @@ -386,5 +294,10 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } +u32 Maxwell3D::GetRegisterValue(u32 method) const { + ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); + return regs.reg_array[method]; +} + } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 3066bc606..98b39b2ff 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "video_core/gpu.h" +#include "video_core/macro_interpreter.h" #include "video_core/memory_manager.h" #include "video_core/textures/texture.h" @@ -498,22 +499,18 @@ public: bool enabled; }; - struct ShaderProgramInfo { - Regs::ShaderStage stage; - Regs::ShaderProgram program; - GPUVAddr address; - }; - struct ShaderStageInfo { std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers; }; std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; - std::array<ShaderProgramInfo, Regs::MaxShaderProgram> shader_programs; }; State state{}; + /// Reads a register value located at the input method address + u32 GetRegisterValue(u32 method) const; + /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value, u32 remaining_params); @@ -533,6 +530,9 @@ private: /// Parameters that have been submitted to the macro call so far. std::vector<u32> macro_params; + /// Interpreter for the macro codes uploaded to the GPU. + MacroInterpreter macro_interpreter; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -544,7 +544,7 @@ private: * @param method Method to call * @param parameters Arguments to the method call */ - void CallMacroMethod(u32 method, const std::vector<u32>& parameters); + void CallMacroMethod(u32 method, std::vector<u32> parameters); /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); @@ -557,19 +557,6 @@ private: /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); - - /// Method call handlers - void BindTextureInfoBuffer(const std::vector<u32>& parameters); - void SetShader(const std::vector<u32>& parameters); - void BindStorageBuffer(const std::vector<u32>& parameters); - - struct MethodInfo { - const char* name; - u32 arguments; - void (Maxwell3D::*handler)(const std::vector<u32>& parameters); - }; - - static const std::unordered_map<u32, MethodInfo> method_handlers; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp new file mode 100644 index 000000000..993a67746 --- /dev/null +++ b/src/video_core/macro_interpreter.cpp @@ -0,0 +1,257 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro_interpreter.h" + +namespace Tegra { + +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} + +void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { + Reset(); + registers[1] = parameters[0]; + this->parameters = std::move(parameters); + + // Execute the code until we hit an exit condition. + bool keep_executing = true; + while (keep_executing) { + keep_executing = Step(code, false); + } + + // Assert the the macro used all the input parameters + ASSERT(next_parameter_index == this->parameters.size()); +} + +void MacroInterpreter::Reset() { + registers = {}; + pc = 0; + delayed_pc = boost::none; + method_address.raw = 0; + parameters.clear(); + // The next parameter index starts at 1, because $r1 already has the value of the first + // parameter. + next_parameter_index = 1; +} + +bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { + u32 base_address = pc; + + Opcode opcode = GetOpcode(code); + pc += 4; + + // Update the program counter if we were delayed + if (delayed_pc != boost::none) { + ASSERT(is_delay_slot); + pc = *delayed_pc; + delayed_pc = boost::none; + } + + switch (opcode.operation) { + case Operation::ALU: { + u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), + GetRegister(opcode.src_b)); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::AddImmediate: { + ProcessResult(opcode.result_operation, opcode.dst, + GetRegister(opcode.src_a) + opcode.immediate); + break; + } + case Operation::ExtractInsert: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); + dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + dst |= src << opcode.bf_dst_bit; + ProcessResult(opcode.result_operation, opcode.dst, dst); + break; + } + case Operation::ExtractShiftLeftImmediate: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::ExtractShiftLeftRegister: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::Read: { + u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Operation::Branch: { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + u32 value = GetRegister(opcode.src_a); + bool taken = EvaluateBranchCondition(opcode.branch_condition, value); + if (taken) { + // Ignore the delay slot if the branch has the annul bit. + if (opcode.branch_annul) { + pc = base_address + (opcode.immediate << 2); + return true; + } + + delayed_pc = base_address + (opcode.immediate << 2); + // Execute one more instruction due to the delay slot. + return Step(code, true); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation %u", + static_cast<u32>(opcode.operation.Value())); + } + + if (opcode.is_exit) { + // Exit has a delay slot, execute the next instruction + // Note: Executing an exit during a branch delay slot will cause the instruction at the + // branch target to be executed before exiting. + Step(code, true); + return false; + } + + return true; +} + +MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { + ASSERT((pc % sizeof(u32)) == 0); + ASSERT(pc < code.size() * sizeof(u32)); + return {code[pc / sizeof(u32)]}; +} + +u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { + switch (operation) { + case ALUOperation::Add: + return src_a + src_b; + // TODO(Subv): Implement AddWithCarry + case ALUOperation::Subtract: + return src_a - src_b; + // TODO(Subv): Implement SubtractWithBorrow + case ALUOperation::Xor: + return src_a ^ src_b; + case ALUOperation::Or: + return src_a | src_b; + case ALUOperation::And: + return src_a & src_b; + case ALUOperation::AndNot: + return src_a & ~src_b; + case ALUOperation::Nand: + return ~(src_a & src_b); + + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation %u", static_cast<u32>(operation)); + } +} + +void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) { + switch (operation) { + case ResultOperation::IgnoreAndFetch: + // Fetch parameter and ignore result. + SetRegister(reg, FetchParameter()); + break; + case ResultOperation::Move: + // Move result. + SetRegister(reg, result); + break; + case ResultOperation::MoveAndSetMethod: + // Move result and use as Method Address. + SetRegister(reg, result); + SetMethodAddress(result); + break; + case ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, FetchParameter()); + Send(result); + break; + case ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, result); + Send(result); + break; + case ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, FetchParameter()); + SetMethodAddress(result); + break; + case ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, result); + SetMethodAddress(result); + Send(FetchParameter()); + break; + case ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, result); + SetMethodAddress(result); + Send((result >> 12) & 0b111111); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented result operation %u", static_cast<u32>(operation)); + } +} + +u32 MacroInterpreter::FetchParameter() { + ASSERT(next_parameter_index < parameters.size()); + return parameters[next_parameter_index++]; +} + +u32 MacroInterpreter::GetRegister(u32 register_id) const { + // Register 0 is supposed to always return 0. + if (register_id == 0) + return 0; + + ASSERT(register_id < registers.size()); + return registers[register_id]; +} + +void MacroInterpreter::SetRegister(u32 register_id, u32 value) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (register_id == 0) + return; + + ASSERT(register_id < registers.size()); + registers[register_id] = value; +} + +void MacroInterpreter::SetMethodAddress(u32 address) { + method_address.raw = address; +} + +void MacroInterpreter::Send(u32 value) { + maxwell3d.WriteReg(method_address.address, value, 0); + // Increment the method address by the method increment. + method_address.address.Assign(method_address.address.Value() + + method_address.increment.Value()); +} + +u32 MacroInterpreter::Read(u32 method) const { + return maxwell3d.GetRegisterValue(method); +} + +bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const { + switch (cond) { + case BranchCondition::Zero: + return value == 0; + case BranchCondition::NotZero: + return value != 0; + } + UNREACHABLE(); +} + +} // namespace Tegra diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h new file mode 100644 index 000000000..a71e359d8 --- /dev/null +++ b/src/video_core/macro_interpreter.h @@ -0,0 +1,164 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> +#include <boost/optional.hpp> +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Tegra { +namespace Engines { +class Maxwell3D; +} + +class MacroInterpreter final { +public: + explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); + + /** + * Executes the macro code with the specified input parameters. + * @param code The macro byte code to execute + * @param parameters The parameters of the macro + */ + void Execute(const std::vector<u32>& code, std::vector<u32> parameters); + +private: + enum class Operation : u32 { + ALU = 0, + AddImmediate = 1, + ExtractInsert = 2, + ExtractShiftLeftImmediate = 3, + ExtractShiftLeftRegister = 4, + Read = 5, + Unused = 6, // This operation doesn't seem to be a valid encoding. + Branch = 7, + }; + + enum class ALUOperation : u32 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + // Operations 4-7 don't seem to be valid encodings. + Xor = 8, + Or = 9, + And = 10, + AndNot = 11, + Nand = 12 + }; + + enum class ResultOperation : u32 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodFetchAndSend = 6, + MoveAndSetMethodSend = 7 + }; + + enum class BranchCondition : u32 { + Zero = 0, + NotZero = 1, + }; + + union Opcode { + u32 raw; + BitField<0, 3, Operation> operation; + BitField<4, 3, ResultOperation> result_operation; + BitField<4, 1, BranchCondition> branch_condition; + BitField<5, 1, u32> + branch_annul; // If set on a branch, then the branch doesn't have a delay slot. + BitField<7, 1, u32> is_exit; + BitField<8, 3, u32> dst; + BitField<11, 3, u32> src_a; + BitField<14, 3, u32> src_b; + // The signed immediate overlaps the second source operand and the alu operation. + BitField<14, 18, s32> immediate; + + BitField<17, 5, ALUOperation> alu_operation; + + // Bitfield instructions data + BitField<17, 5, u32> bf_src_bit; + BitField<22, 5, u32> bf_size; + BitField<27, 5, u32> bf_dst_bit; + + u32 GetBitfieldMask() const { + return (1 << bf_size) - 1; + } + }; + + union MethodAddress { + u32 raw; + BitField<0, 12, u32> address; + BitField<12, 6, u32> increment; + }; + + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * @param code The macro code to execute. + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(const std::vector<u32>& code, bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Opcode GetOpcode(const std::vector<u32>& code) const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); + + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + Engines::Maxwell3D& maxwell3d; + + u32 pc; ///< Current program counter + boost::optional<u32> + delayed_pc; ///< Program counter to execute at after the delay slot is executed. + + static constexpr size_t NumMacroRegisters = 8; + + /// General purpose macro registers. + std::array<u32, NumMacroRegisters> registers = {}; + + /// Method address to use for the next Send instruction. + MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::vector<u32> parameters; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; +}; +} // namespace Tegra |