diff options
Diffstat (limited to 'src')
32 files changed, 545 insertions, 197 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index ea9d8f77c..0027888c7 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -134,7 +134,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _ size_t dir_end = full_path.find_last_of("/" // windows needs the : included for something like just "C:" to be considered a directory #ifdef _WIN32 - ":" + "\\:" #endif ); if (std::string::npos == dir_end) diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index ce6c5616d..f239cf0ea 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -193,11 +193,11 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) { } Kernel::Thread* thread = Kernel::GetCurrentThread(); SaveContext(thread->context); - if (last_bkpt_hit) { + if (last_bkpt_hit || (num_instructions == 1)) { last_bkpt_hit = false; GDBStub::Break(); + GDBStub::SendTrap(thread, 5); } - GDBStub::SendTrap(thread, 5); } } diff --git a/src/core/file_sys/disk_filesystem.cpp b/src/core/file_sys/disk_filesystem.cpp index 8c6f15bb5..d248c2df4 100644 --- a/src/core/file_sys/disk_filesystem.cpp +++ b/src/core/file_sys/disk_filesystem.cpp @@ -58,11 +58,13 @@ ResultVal<std::unique_ptr<StorageBackend>> Disk_FileSystem::OpenFile(const std:: } ResultCode Disk_FileSystem::DeleteFile(const std::string& path) const { - if (!FileUtil::Exists(path)) { + std::string full_path = base_directory + path; + + if (!FileUtil::Exists(full_path)) { return ERROR_PATH_NOT_FOUND; } - FileUtil::Delete(path); + FileUtil::Delete(full_path); return RESULT_SUCCESS; } diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 938852a1a..6062de13c 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -61,10 +61,16 @@ const u32 SIGTERM = 15; const u32 MSG_WAITALL = 8; #endif -const u32 X30_REGISTER = 30; +const u32 LR_REGISTER = 30; const u32 SP_REGISTER = 31; const u32 PC_REGISTER = 32; const u32 CPSR_REGISTER = 33; +const u32 UC_ARM64_REG_Q0 = 34; +const u32 FPSCR_REGISTER = 66; + +// TODO/WiP - Used while working on support for FPU +const u32 TODO_DUMMY_REG_997 = 997; +const u32 TODO_DUMMY_REG_998 = 998; // For sample XML files see the GDB source /gdb/features // GDB also wants the l character at the start @@ -130,6 +136,8 @@ static const char* target_xml = </flags> <reg name="cpsr" bitsize="32" type="cpsr_flags"/> </feature> + <feature name="org.gnu.gdb.aarch64.fpu"> + </feature> </target> )"; @@ -144,6 +152,7 @@ static u32 latest_signal = 0; static bool memory_break = false; static Kernel::Thread* current_thread = nullptr; +static u32 current_core = 0; // Binding to a port within the reserved ports range (0-1023) requires root permissions, // so default to a port outside of that range. @@ -171,13 +180,34 @@ static std::map<u64, Breakpoint> breakpoints_execute; static std::map<u64, Breakpoint> breakpoints_read; static std::map<u64, Breakpoint> breakpoints_write; +struct Module { + std::string name; + PAddr beg; + PAddr end; +}; + +static std::vector<Module> modules; + +void RegisterModule(std::string name, PAddr beg, PAddr end, bool add_elf_ext) { + Module module; + if (add_elf_ext) { + Common::SplitPath(name, nullptr, &module.name, nullptr); + module.name += ".elf"; + } else { + module.name = std::move(name); + } + module.beg = beg; + module.end = end; + modules.push_back(std::move(module)); +} + static Kernel::Thread* FindThreadById(int id) { - for (int core = 0; core < Core::NUM_CPU_CORES; core++) { - auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList(); - for (auto thread : threads) { + for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { + const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList(); + for (auto& thread : threads) { if (thread->GetThreadId() == id) { - current_thread = thread.get(); - return current_thread; + current_core = core; + return thread.get(); } } } @@ -197,6 +227,8 @@ static u64 RegRead(int id, Kernel::Thread* thread = nullptr) { return thread->context.pc; } else if (id == CPSR_REGISTER) { return thread->context.cpsr; + } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) { + return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0]; } else { return 0; } @@ -215,6 +247,8 @@ static void RegWrite(int id, u64 val, Kernel::Thread* thread = nullptr) { thread->context.pc = val; } else if (id == CPSR_REGISTER) { thread->context.cpsr = val; + } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) { + thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val; } } @@ -534,7 +568,11 @@ static void HandleQuery() { SendReply("T0"); } else if (strncmp(query, "Supported", strlen("Supported")) == 0) { // PacketSize needs to be large enough for target xml - SendReply("PacketSize=2000;qXfer:features:read+"); + std::string buffer = "PacketSize=2000;qXfer:features:read+;qXfer:threads:read+"; + if (!modules.empty()) { + buffer += ";qXfer:libraries:read+"; + } + SendReply(buffer.c_str()); } else if (strncmp(query, "Xfer:features:read:target.xml:", strlen("Xfer:features:read:target.xml:")) == 0) { SendReply(target_xml); @@ -543,9 +581,9 @@ static void HandleQuery() { SendReply(buffer.c_str()); } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) { std::string val = "m"; - for (int core = 0; core < Core::NUM_CPU_CORES; core++) { - auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList(); - for (auto thread : threads) { + for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { + const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList(); + for (const auto& thread : threads) { val += fmt::format("{:x}", thread->GetThreadId()); val += ","; } @@ -554,6 +592,31 @@ static void HandleQuery() { SendReply(val.c_str()); } else if (strncmp(query, "sThreadInfo", strlen("sThreadInfo")) == 0) { SendReply("l"); + } else if (strncmp(query, "Xfer:threads:read", strlen("Xfer:threads:read")) == 0) { + std::string buffer; + buffer += "l<?xml version=\"1.0\"?>"; + buffer += "<threads>"; + for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { + const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList(); + for (const auto& thread : threads) { + buffer += + fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*", + thread->GetThreadId(), core, thread->GetThreadId()); + } + } + buffer += "</threads>"; + SendReply(buffer.c_str()); + } else if (strncmp(query, "Xfer:libraries:read", strlen("Xfer:libraries:read")) == 0) { + std::string buffer; + buffer += "l<?xml version=\"1.0\"?>"; + buffer += "<library-list>"; + for (const auto& module : modules) { + buffer += + fmt::format(R"*("<library name = "{}"><segment address = "0x{:x}"/></library>)*", + module.name, module.beg); + } + buffer += "</library-list>"; + SendReply(buffer.c_str()); } else { SendReply(""); } @@ -561,33 +624,27 @@ static void HandleQuery() { /// Handle set thread command from gdb client. static void HandleSetThread() { - if (memcmp(command_buffer, "Hc", 2) == 0 || memcmp(command_buffer, "Hg", 2) == 0) { - int thread_id = -1; - if (command_buffer[2] != '-') { - thread_id = static_cast<int>(HexToInt( - command_buffer + 2, - command_length - 2 /*strlen(reinterpret_cast<char*>(command_buffer) + 2)*/)); - } - if (thread_id >= 1) { - current_thread = FindThreadById(thread_id); - } - if (!current_thread) { - thread_id = 1; - current_thread = FindThreadById(thread_id); - } - if (current_thread) { - SendReply("OK"); - return; - } + int thread_id = -1; + if (command_buffer[2] != '-') { + thread_id = static_cast<int>(HexToInt(command_buffer + 2, command_length - 2)); + } + if (thread_id >= 1) { + current_thread = FindThreadById(thread_id); + } + if (!current_thread) { + thread_id = 1; + current_thread = FindThreadById(thread_id); + } + if (current_thread) { + SendReply("OK"); + return; } SendReply("E01"); } /// Handle thread alive command from gdb client. static void HandleThreadAlive() { - int thread_id = static_cast<int>( - HexToInt(command_buffer + 1, - command_length - 1 /*strlen(reinterpret_cast<char*>(command_buffer) + 1)*/)); + int thread_id = static_cast<int>(HexToInt(command_buffer + 1, command_length - 1)); if (thread_id == 0) { thread_id = 1; } @@ -610,16 +667,23 @@ static void SendSignal(Kernel::Thread* thread, u32 signal, bool full = true) { latest_signal = signal; + if (!thread) { + full = false; + } + std::string buffer; if (full) { - buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};", latest_signal, PC_REGISTER, - Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER, - Common::swap64(RegRead(SP_REGISTER, thread))); + buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};{:02x}:{:016x}", latest_signal, + PC_REGISTER, Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER, + Common::swap64(RegRead(SP_REGISTER, thread)), LR_REGISTER, + Common::swap64(RegRead(LR_REGISTER, thread))); } else { - buffer = fmt::format("T{:02x};", latest_signal); + buffer = fmt::format("T{:02x}", latest_signal); } - buffer += fmt::format("thread:{:x};", thread->GetThreadId()); + if (thread) { + buffer += fmt::format(";thread:{:x};", thread->GetThreadId()); + } SendReply(buffer.c_str()); } @@ -711,8 +775,12 @@ static void ReadRegister() { LongToGdbHex(reply, RegRead(id, current_thread)); } else if (id == CPSR_REGISTER) { IntToGdbHex(reply, (u32)RegRead(id, current_thread)); + } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) { + LongToGdbHex(reply, RegRead(id, current_thread)); + } else if (id == FPSCR_REGISTER) { + LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread)); } else { - return SendReply("E01"); + LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread)); } SendReply(reinterpret_cast<char*>(reply)); @@ -729,7 +797,7 @@ static void ReadRegisters() { LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread)); } - bufptr += (32 * 16); + bufptr += 32 * 16; LongToGdbHex(bufptr, RegRead(PC_REGISTER, current_thread)); @@ -739,6 +807,16 @@ static void ReadRegisters() { bufptr += 8; + for (int reg = UC_ARM64_REG_Q0; reg <= UC_ARM64_REG_Q0 + 31; reg++) { + LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread)); + } + + bufptr += 32 * 32; + + LongToGdbHex(bufptr, RegRead(TODO_DUMMY_REG_998, current_thread)); + + bufptr += 8; + SendReply(reinterpret_cast<char*>(buffer)); } @@ -759,10 +837,17 @@ static void WriteRegister() { RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); } else if (id == CPSR_REGISTER) { RegWrite(id, GdbHexToInt(buffer_ptr), current_thread); + } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) { + RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); + } else if (id == FPSCR_REGISTER) { + RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread); } else { - return SendReply("E01"); + RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread); } + // Update Unicorn context skipping scheduler, no running threads at this point + Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context); + SendReply("OK"); } @@ -773,18 +858,25 @@ static void WriteRegisters() { if (command_buffer[0] != 'G') return SendReply("E01"); - for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) { + for (int i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) { if (reg <= SP_REGISTER) { RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); } else if (reg == PC_REGISTER) { RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread); } else if (reg == CPSR_REGISTER) { RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread); + } else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) { + RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); + } else if (reg == FPSCR_REGISTER) { + RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread); } else { UNIMPLEMENTED(); } } + // Update Unicorn context skipping scheduler, no running threads at this point + Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context); + SendReply("OK"); } @@ -806,6 +898,10 @@ static void ReadMemory() { SendReply("E01"); } + if (addr < Memory::PROCESS_IMAGE_VADDR || addr >= Memory::MAP_REGION_VADDR_END) { + return SendReply("E00"); + } + if (!Memory::IsValidVirtualAddress(addr)) { return SendReply("E00"); } @@ -840,16 +936,18 @@ static void WriteMemory() { } void Break(bool is_memory_break) { - if (!halt_loop) { - halt_loop = true; - send_trap = true; - } + send_trap = true; memory_break = is_memory_break; } /// Tell the CPU that it should perform a single step. static void Step() { + if (command_length > 1) { + RegWrite(PC_REGISTER, GdbHexToLong(command_buffer + 1), current_thread); + // Update Unicorn context skipping scheduler, no running threads at this point + Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context); + } step_loop = true; halt_loop = true; send_trap = true; @@ -1090,6 +1188,8 @@ static void Init(u16 port) { breakpoints_read.clear(); breakpoints_write.clear(); + modules.clear(); + // Start gdb server LOG_INFO(Debug_GDBStub, "Starting GDB server on port {}...", port); @@ -1192,8 +1292,12 @@ void SetCpuStepFlag(bool is_step) { void SendTrap(Kernel::Thread* thread, int trap) { if (send_trap) { + if (!halt_loop || current_thread == thread) { + current_thread = thread; + SendSignal(thread, trap); + } + halt_loop = true; send_trap = false; - SendSignal(thread, trap); } } }; // namespace GDBStub diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h index f2418c9e4..a6b50c26c 100644 --- a/src/core/gdbstub/gdbstub.h +++ b/src/core/gdbstub/gdbstub.h @@ -6,6 +6,7 @@ #pragma once +#include <string> #include "common/common_types.h" #include "core/hle/kernel/thread.h" @@ -51,6 +52,9 @@ bool IsServerEnabled(); /// Returns true if there is an active socket connection. bool IsConnected(); +/// Register module. +void RegisterModule(std::string name, PAddr beg, PAddr end, bool add_elf_ext = true); + /** * Signal to the gdbstub server that it should halt CPU execution. * @@ -80,10 +84,10 @@ BreakpointAddress GetNextBreakpointFromAddress(PAddr addr, GDBStub::BreakpointTy */ bool CheckBreakpoint(PAddr addr, GDBStub::BreakpointType type); -// If set to true, the CPU will halt at the beginning of the next CPU loop. +/// If set to true, the CPU will halt at the beginning of the next CPU loop. bool GetCpuHaltFlag(); -// If set to true and the CPU is halted, the CPU will step one instruction. +/// If set to true and the CPU is halted, the CPU will step one instruction. bool GetCpuStepFlag(); /** diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 609cdbff2..2532dd450 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -214,8 +214,8 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { (sizeof(IPC::CommandHeader) + sizeof(IPC::HandleDescriptorHeader)) / sizeof(u32); ASSERT_MSG(!handle_descriptor_header->send_current_pid, "Sending PID is not implemented"); - ASSERT_MSG(copy_objects.size() == handle_descriptor_header->num_handles_to_copy); - ASSERT_MSG(move_objects.size() == handle_descriptor_header->num_handles_to_move); + ASSERT(copy_objects.size() == handle_descriptor_header->num_handles_to_copy); + ASSERT(move_objects.size() == handle_descriptor_header->num_handles_to_move); // We don't make a distinction between copy and move handles when translating since HLE // services don't deal with handles directly. However, the guest applications might check diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 1b4b649d8..8bf273b22 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -27,12 +27,12 @@ public: {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"}, {1, &IAudioOut::StartAudioOut, "StartAudioOut"}, {2, &IAudioOut::StopAudioOut, "StopAudioOut"}, - {3, &IAudioOut::AppendAudioOutBuffer, "AppendAudioOutBuffer"}, + {3, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBuffer"}, {4, &IAudioOut::RegisterBufferEvent, "RegisterBufferEvent"}, - {5, &IAudioOut::GetReleasedAudioOutBuffer, "GetReleasedAudioOutBuffer"}, + {5, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBuffer"}, {6, nullptr, "ContainsAudioOutBuffer"}, - {7, nullptr, "AppendAudioOutBufferAuto"}, - {8, nullptr, "GetReleasedAudioOutBufferAuto"}, + {7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"}, + {8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"}, {9, nullptr, "GetAudioOutBufferCount"}, {10, nullptr, "GetAudioOutPlayedSampleCount"}, {11, nullptr, "FlushAudioOutBuffers"}, @@ -96,7 +96,7 @@ private: rb.PushCopyObjects(buffer_event); } - void AppendAudioOutBuffer(Kernel::HLERequestContext& ctx) { + void AppendAudioOutBufferImpl(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); IPC::RequestParser rp{ctx}; @@ -107,7 +107,7 @@ private: rb.Push(RESULT_SUCCESS); } - void GetReleasedAudioOutBuffer(Kernel::HLERequestContext& ctx) { + void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); // TODO(st4rk): This is how libtransistor currently implements the @@ -163,7 +163,7 @@ private: AudioState audio_out_state; }; -void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) { +void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); IPC::RequestParser rp{ctx}; @@ -179,7 +179,7 @@ void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) { rb.Push<u32>(1); } -void AudOutU::OpenAudioOut(Kernel::HLERequestContext& ctx) { +void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); if (!audio_out_interface) { @@ -196,10 +196,10 @@ void AudOutU::OpenAudioOut(Kernel::HLERequestContext& ctx) { } AudOutU::AudOutU() : ServiceFramework("audout:u") { - static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOuts, "ListAudioOuts"}, - {1, &AudOutU::OpenAudioOut, "OpenAudioOut"}, - {2, nullptr, "ListAudioOutsAuto"}, - {3, nullptr, "OpenAudioOutAuto"}}; + static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"}, + {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"}, + {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"}, + {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}}; RegisterHandlers(functions); } diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h index 1f9bb9bcf..847d86aa6 100644 --- a/src/core/hle/service/audio/audout_u.h +++ b/src/core/hle/service/audio/audout_u.h @@ -22,8 +22,8 @@ public: private: std::shared_ptr<IAudioOut> audio_out_interface; - void ListAudioOuts(Kernel::HLERequestContext& ctx); - void OpenAudioOut(Kernel::HLERequestContext& ctx); + void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); + void OpenAudioOutImpl(Kernel::HLERequestContext& ctx); enum class PcmFormat : u32 { Invalid = 0, diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 2da936b27..b7f591c6d 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -47,7 +47,7 @@ public: // Start the audio event CoreTiming::ScheduleEvent(audio_ticks, audio_event); - voice_status_list.reserve(worker_params.voice_count); + voice_status_list.resize(worker_params.voice_count); } ~IAudioRenderer() { CoreTiming::UnscheduleEvent(audio_event, 0); @@ -87,8 +87,6 @@ private: memory_pool[i].state = MemoryPoolStates::Attached; else if (mem_pool_info[i].pool_state == MemoryPoolStates::RequestDetach) memory_pool[i].state = MemoryPoolStates::Detached; - else - memory_pool[i].state = mem_pool_info[i].pool_state; } std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(), response_data.memory_pools_size); @@ -183,7 +181,9 @@ private: behavior_size = 0xb0; memory_pools_size = (config.effect_count + (config.voice_count * 4)) * 0x10; voices_size = config.voice_count * 0x10; + voice_resource_size = 0x0; effects_size = config.effect_count * 0x10; + mixes_size = 0x0; sinks_size = config.sink_count * 0x20; performance_manager_size = 0x10; total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 54a151c26..0d951084b 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -148,6 +148,24 @@ private: LOG_DEBUG(Service_NIFM, "called"); } + void IsWirelessCommunicationEnabled(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_NIFM, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u8>(0); + } + void IsEthernetCommunicationEnabled(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_NIFM, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u8>(0); + } + void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_NIFM, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u8>(0); + } }; IGeneralService::IGeneralService() : ServiceFramework("IGeneralService") { @@ -167,11 +185,11 @@ IGeneralService::IGeneralService() : ServiceFramework("IGeneralService") { {14, &IGeneralService::CreateTemporaryNetworkProfile, "CreateTemporaryNetworkProfile"}, {15, nullptr, "GetCurrentIpConfigInfo"}, {16, nullptr, "SetWirelessCommunicationEnabled"}, - {17, nullptr, "IsWirelessCommunicationEnabled"}, + {17, &IGeneralService::IsWirelessCommunicationEnabled, "IsWirelessCommunicationEnabled"}, {18, nullptr, "GetInternetConnectionStatus"}, {19, nullptr, "SetEthernetCommunicationEnabled"}, - {20, nullptr, "IsEthernetCommunicationEnabled"}, - {21, nullptr, "IsAnyInternetRequestAccepted"}, + {20, &IGeneralService::IsEthernetCommunicationEnabled, "IsEthernetCommunicationEnabled"}, + {21, &IGeneralService::IsAnyInternetRequestAccepted, "IsAnyInternetRequestAccepted"}, {22, nullptr, "IsAnyForegroundRequestAccepted"}, {23, nullptr, "PutToSleep"}, {24, nullptr, "WakeUp"}, diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 32648bdd9..6aa1e2511 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -19,10 +19,9 @@ void BSD::RegisterClient(Kernel::HLERequestContext& ctx) { void BSD::StartMonitoring(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 3}; + IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); - rb.Push<u32>(0); // bsd errno } void BSD::Socket(Kernel::HLERequestContext& ctx) { diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index eb7feb617..5fdb1d289 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -9,6 +9,7 @@ #include "common/logging/log.h" #include "common/string_util.h" #include "core/file_sys/romfs_factory.h" +#include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/service/filesystem/filesystem.h" @@ -133,6 +134,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load( next_load_addr = AppLoader_NSO::LoadModule(path, load_addr); if (next_load_addr) { LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); + // Register module with GDBStub + GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false); } else { next_load_addr = load_addr; } diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp index da064f8e3..0fd930ae2 100644 --- a/src/core/loader/nca.cpp +++ b/src/core/loader/nca.cpp @@ -7,10 +7,12 @@ #include "common/common_funcs.h" #include "common/file_util.h" #include "common/logging/log.h" +#include "common/string_util.h" #include "common/swap.h" #include "core/core.h" #include "core/file_sys/program_metadata.h" #include "core/file_sys/romfs_factory.h" +#include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/service/filesystem/filesystem.h" @@ -259,6 +261,8 @@ ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) { next_load_addr = AppLoader_NSO::LoadModule(module, nca->GetExeFsFile(module), load_addr); if (next_load_addr) { LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); + // Register module with GDBStub + GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false); } else { next_load_addr = load_addr; } diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 3853cfa1a..4d7c69a22 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -9,6 +9,7 @@ #include "common/logging/log.h" #include "common/swap.h" #include "core/core.h" +#include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/loader/nro.h" @@ -115,6 +116,9 @@ bool AppLoader_NRO::LoadNro(const std::string& path, VAddr load_base) { codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image)); Core::CurrentProcess()->LoadModule(codeset, load_base); + // Register module with GDBStub + GDBStub::RegisterModule(codeset->name, load_base, load_base); + return true; } diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 7f84e4b1b..1c629e21f 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -10,6 +10,7 @@ #include "common/logging/log.h" #include "common/swap.h" #include "core/core.h" +#include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/loader/nso.h" @@ -147,6 +148,9 @@ VAddr AppLoader_NSO::LoadModule(const std::string& name, const std::vector<u8>& codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image)); Core::CurrentProcess()->LoadModule(codeset, load_base); + // Register module with GDBStub + GDBStub::RegisterModule(codeset->name, load_base, load_base); + return load_base + image_size; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 3bca16364..dfbf80abd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -398,27 +398,6 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const { - // The Vertex stage is always enabled. - if (stage == Regs::ShaderStage::Vertex) - return true; - - switch (stage) { - case Regs::ShaderStage::TesselationControl: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)] - .enable != 0; - case Regs::ShaderStage::TesselationEval: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)] - .enable != 0; - case Regs::ShaderStage::Geometry: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0; - case Regs::ShaderStage::Fragment: - return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0; - } - - UNREACHABLE(); -} - void Maxwell3D::ProcessClearBuffers() { ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && regs.clear_buffers.R == regs.clear_buffers.B && diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5a7cf0107..6f0170ff7 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -379,6 +379,14 @@ public: } }; + bool IsShaderConfigEnabled(size_t index) const { + // The VertexB is always enabled. + if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) { + return true; + } + return shader_config[index].enable != 0; + } + union { struct { INSERT_PADDING_WORDS(0x45); @@ -780,9 +788,6 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; - /// Returns whether the specified shader stage is enabled or not. - bool IsShaderStageEnabled(Regs::ShaderStage stage) const; - private: std::unordered_map<u32, std::vector<u32>> uploaded_macros; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 2bc1782ad..65fa1495f 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -142,6 +142,7 @@ enum class PredCondition : u64 { GreaterThan = 4, NotEqual = 5, GreaterEqual = 6, + LessThanWithNan = 9, NotEqualWithNan = 13, // TODO(Subv): Other condition types }; @@ -201,6 +202,11 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class FlowCondition : u64 { + Always = 0xF, + Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -298,6 +304,13 @@ union Instruction { } iadd32i; union { + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<56, 1, u64> negate_a; + BitField<57, 1, u64> abs_b; + } fadd32i; + + union { BitField<20, 8, u64> shift_position; BitField<28, 8, u64> shift_length; BitField<48, 1, u64> negate_b; @@ -309,6 +322,10 @@ union Instruction { } bfe; union { + BitField<0, 5, FlowCondition> cond; + } flow; + + union { BitField<48, 1, u64> negate_b; BitField<49, 1, u64> negate_c; } ffma; @@ -487,6 +504,7 @@ public: FADD_C, FADD_R, FADD_IMM, + FADD32I, FMUL_C, FMUL_R, FMUL_IMM, @@ -679,13 +697,14 @@ private: INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), + INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), + INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ea138d402..eecbc5ff0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -15,6 +15,7 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" +#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" @@ -22,6 +23,7 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/video_core.h" using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PixelFormat = SurfaceParams::PixelFormat; @@ -181,6 +183,19 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, return {array_ptr, buffer_offset}; } +static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) { + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; + Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + + return program_code; +} + void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { // Helper function for uploading uniform data const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { @@ -193,26 +208,23 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { }; auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); - ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. u32 current_constbuffer_bindpoint = uniform_buffers.size(); u32 current_texture_bindpoint = 0; - for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; - const auto& stage = index - 1; // Stage indices are 0 - 5 - - const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage)); - // Skip stages that are not enabled - if (!is_enabled) { + if (!gpu.regs.IsShaderConfigEnabled(index)) { continue; } + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 + GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); @@ -228,16 +240,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { buffer_ptr += sizeof(GLShader::MaxwellUniformData); buffer_offset += sizeof(GLShader::MaxwellUniformData); - // Fetch program code from memory - GLShader::ProgramCode program_code; - const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; - const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; - Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); - GLShader::ShaderSetup setup{std::move(program_code)}; - + GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; GLShader::ShaderEntries shader_resources; switch (program) { + case Maxwell::ShaderProgram::VertexA: { + // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. + // Conventional HW does not support this, so we combine VertexA and VertexB into one + // stage here. + setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB)); + GLShader::MaxwellVSConfig vs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::VertexB: { GLShader::MaxwellVSConfig vs_config{setup}; shader_resources = @@ -268,6 +285,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, current_texture_bindpoint, shader_resources.texture_samplers); + + // When VertexA is enabled, we have dual vertex shaders + if (program == Maxwell::ShaderProgram::VertexA) { + // VertexB was combined with VertexA, so we skip the VertexB iteration + index++; + } } shader_program_manager->UseTrivialGeometryShader(); @@ -301,9 +324,6 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c bool using_depth_fb) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - // Sync the depth test state before configuring the framebuffer surfaces. - SyncDepthTestState(); - // TODO(bunnei): Implement this const bool has_stencil = false; @@ -368,11 +388,20 @@ void RasterizerOpenGL::Clear() { if (regs.clear_buffers.Z) { clear_mask |= GL_DEPTH_BUFFER_BIT; use_depth_fb = true; + + // Always enable the depth write when clearing the depth buffer. The depth write mask is + // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. + state.depth.test_enabled = true; + state.depth.write_mask = GL_TRUE; + state.depth.test_func = GL_ALWAYS; + state.Apply(); } if (clear_mask == 0) return; + ScopeAcquireGLContext acquire_context; + auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(use_color_fb, use_depth_fb); @@ -399,9 +428,12 @@ void RasterizerOpenGL::DrawArrays() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + ScopeAcquireGLContext acquire_context; + auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(true, regs.zeta.Address() != 0); + SyncDepthTestState(); SyncBlendState(); SyncCullMode(); @@ -605,9 +637,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr auto& gpu = Core::System::GetInstance().GPU(); auto& maxwell3d = gpu.Get3DEngine(); - ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), - "Attempted to upload constbuffer of disabled shader stage"); - // Reset all buffer draw state for this stage. for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { buffer.bindpoint = 0; @@ -674,9 +703,6 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, auto& gpu = Core::System::GetInstance().GPU(); auto& maxwell3d = gpu.Get3DEngine(); - ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), - "Attempted to upload textures of disabled shader stage"); - ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), "Exceeded the number of active textures."); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 323ff7408..c171c4c5b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -105,6 +105,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8 // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, @@ -112,6 +113,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, false}, // S8Z24 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, + false}, // Z16 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -194,8 +197,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), @@ -215,10 +219,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::ABGR8>, + nullptr, + MortonCopy<false, PixelFormat::G8R8>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, + MortonCopy<false, PixelFormat::Z16>, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -271,9 +277,10 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { S8Z24 input_pixel{}; Z24S8 output_pixel{}; + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { - const size_t offset{y * width + x}; + const size_t offset{bpp * (y * width + x)}; std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); output_pixel.s8.Assign(input_pixel.s8); output_pixel.z24.Assign(input_pixel.z24); @@ -281,6 +288,19 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { } } } + +static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)}; + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const size_t offset{bpp * (y * width + x)}; + const u8 temp{data[offset]}; + data[offset] = data[offset + 1]; + data[offset + 1] = temp; + } + } +} + /** * Helper function to perform software conversion (as needed) when loading a buffer from Switch * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with @@ -301,6 +321,11 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. ConvertS8Z24ToZ24S8(data, width, height); break; + + case PixelFormat::G8R8: + // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. + ConvertG8R8ToR8G8(data, width, height); + break; } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 1bedae992..718c45ce1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -37,13 +37,15 @@ struct SurfaceParams { DXN1 = 11, // This is also known as BC4 BC7U = 12, ASTC_2D_4X4 = 13, + G8R8 = 14, MaxColorFormat, // DepthStencil formats - Z24S8 = 14, - S8Z24 = 15, - Z32F = 16, + Z24S8 = 15, + S8Z24 = 16, + Z32F = 17, + Z16 = 18, MaxDepthStencilFormat, @@ -95,9 +97,11 @@ struct SurfaceParams { 4, // DXN1 4, // BC7U 4, // ASTC_2D_4X4 + 1, // G8R8 1, // Z24S8 1, // S8Z24 1, // Z32F + 1, // Z16 }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -123,9 +127,11 @@ struct SurfaceParams { 64, // DXN1 128, // BC7U 32, // ASTC_2D_4X4 + 16, // G8R8 32, // Z24S8 32, // S8Z24 32, // Z32F + 16, // Z16 }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -143,6 +149,8 @@ struct SurfaceParams { return PixelFormat::Z24S8; case Tegra::DepthFormat::Z32_FLOAT: return PixelFormat::Z32F; + case Tegra::DepthFormat::Z16_UNORM: + return PixelFormat::Z16; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -181,6 +189,8 @@ struct SurfaceParams { return PixelFormat::A1B5G5R5; case Tegra::Texture::TextureFormat::R8: return PixelFormat::R8; + case Tegra::Texture::TextureFormat::G8R8: + return PixelFormat::G8R8; case Tegra::Texture::TextureFormat::R16_G16_B16_A16: return PixelFormat::RGBA16F; case Tegra::Texture::TextureFormat::BF10GF11RF11: @@ -218,6 +228,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::A1B5G5R5; case PixelFormat::R8: return Tegra::Texture::TextureFormat::R8; + case PixelFormat::G8R8: + return Tegra::Texture::TextureFormat::G8R8; case PixelFormat::RGBA16F: return Tegra::Texture::TextureFormat::R16_G16_B16_A16; case PixelFormat::R11FG11FB10F: @@ -249,6 +261,8 @@ struct SurfaceParams { return Tegra::DepthFormat::Z24_S8_UNORM; case PixelFormat::Z32F: return Tegra::DepthFormat::Z32_FLOAT; + case PixelFormat::Z16: + return Tegra::DepthFormat::Z16_UNORM; default: UNREACHABLE(); } @@ -295,6 +309,7 @@ struct SurfaceParams { static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { switch (format) { + case Tegra::DepthFormat::Z16_UNORM: case Tegra::DepthFormat::S8_Z24_UNORM: case Tegra::DepthFormat::Z24_S8_UNORM: return ComponentType::UNorm; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5914077e8..5fae95788 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -42,13 +42,14 @@ enum class ExitMethod { struct Subroutine { /// Generates a name suitable for GLSL source code. std::string GetName() const { - return "sub_" + std::to_string(begin) + '_' + std::to_string(end); + return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix; } - u32 begin; ///< Entry point of the subroutine. - u32 end; ///< Return point of the subroutine. - ExitMethod exit_method; ///< Exit method of the subroutine. - std::set<u32> labels; ///< Addresses refereced by JMP instructions. + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set<u32> labels; ///< Addresses refereced by JMP instructions. bool operator<(const Subroutine& rhs) const { return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); @@ -58,11 +59,11 @@ struct Subroutine { /// Analyzes shader code and produces a set of subroutines. class ControlFlowAnalyzer { public: - ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) : program_code(program_code) { // Recursively finds all subroutines. - const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); if (program_main.exit_method != ExitMethod::AlwaysEnd) throw DecompileFail("Program does not always end"); } @@ -77,12 +78,12 @@ private: std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; /// Adds and analyzes a new subroutine if it is not added yet. - const Subroutine& AddSubroutine(u32 begin, u32 end) { - auto iter = subroutines.find(Subroutine{begin, end}); + const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { + auto iter = subroutines.find(Subroutine{begin, end, suffix}); if (iter != subroutines.end()) return *iter; - Subroutine subroutine{begin, end}; + Subroutine subroutine{begin, end, suffix}; subroutine.exit_method = Scan(begin, end, subroutine.labels); if (subroutine.exit_method == ExitMethod::Undetermined) throw DecompileFail("Recursive function detected"); @@ -191,7 +192,8 @@ public: UnsignedInteger, }; - GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {} + GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix) + : index{index}, shader{shader}, suffix{suffix} {} /// Gets the GLSL type string for a register static std::string GetTypeString(Type type) { @@ -216,7 +218,7 @@ public: /// Returns a GLSL string representing the current state of the register const std::string GetActiveString() { declr_type.insert(active_type); - return GetPrefixString(active_type) + std::to_string(index); + return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix; } /// Returns true if the active type is a float @@ -251,6 +253,7 @@ private: ShaderWriter& shader; Type active_type{Type::Float}; std::set<Type> declr_type; + const std::string& suffix; }; /** @@ -262,8 +265,8 @@ private: class GLSLRegisterManager { public: GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, - const Maxwell3D::Regs::ShaderStage& stage) - : shader{shader}, declarations{declarations}, stage{stage} { + const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix) + : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} { BuildRegisterList(); } @@ -430,12 +433,12 @@ public: } /// Add declarations for registers - void GenerateDeclarations() { + void GenerateDeclarations(const std::string& suffix) { for (const auto& reg : regs) { for (const auto& type : reg.DeclaredTypes()) { declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' + - GLSLRegister::GetPrefixString(type) + - std::to_string(reg.GetIndex()) + " = 0;"); + reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) + + '_' + suffix + " = 0;"); } } declarations.AddNewLine(); @@ -558,7 +561,7 @@ private: /// Build the GLSL register list. void BuildRegisterList() { for (size_t index = 0; index < Register::NumRegisters; ++index) { - regs.emplace_back(index, shader); + regs.emplace_back(index, shader, suffix); } } @@ -620,16 +623,17 @@ private: std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; std::vector<SamplerEntry> used_samplers; const Maxwell3D::Regs::ShaderStage& stage; + const std::string& suffix; }; class GLSLGenerator { public: GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, - u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) : subroutines(subroutines), program_code(program_code), main_offset(main_offset), - stage(stage) { + stage(stage), suffix(suffix) { - Generate(); + Generate(suffix); } std::string GetShaderCode() { @@ -644,7 +648,7 @@ public: private: /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { - auto iter = subroutines.find(Subroutine{begin, end}); + auto iter = subroutines.find(Subroutine{begin, end, suffix}); ASSERT(iter != subroutines.end()); return *iter; } @@ -689,7 +693,7 @@ private: // Can't assign to the constant predicate. ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); - std::string variable = 'p' + std::to_string(pred); + std::string variable = 'p' + std::to_string(pred) + '_' + suffix; shader.AddLine(variable + " = " + value + ';'); declr_predicates.insert(std::move(variable)); } @@ -707,7 +711,7 @@ private: if (index == static_cast<u64>(Pred::UnusedIndex)) variable = "true"; else - variable = 'p' + std::to_string(index); + variable = 'p' + std::to_string(index) + '_' + suffix; if (negate) { return "!(" + variable + ')'; @@ -728,10 +732,10 @@ private: const std::string& op_a, const std::string& op_b) const { using Tegra::Shader::PredCondition; static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { - {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, - {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, - {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, - {PredCondition::NotEqualWithNan, "!="}, + {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, + {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, + {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="}, }; const auto& comparison{PredicateComparisonStrings.find(condition)}; @@ -739,7 +743,8 @@ private: "Unknown predicate comparison operation"); std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; - if (condition == PredCondition::NotEqualWithNan) { + if (condition == PredCondition::LessThanWithNan || + condition == PredCondition::NotEqualWithNan) { predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; } @@ -968,6 +973,29 @@ private: regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1); break; } + case OpCode::Id::FADD32I: { + std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + std::string op_b = GetImmediate32(instr); + + if (instr.fadd32i.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + if (instr.fadd32i.negate_a) { + op_a = "-(" + op_a + ')'; + } + + if (instr.fadd32i.abs_b) { + op_b = "abs(" + op_b + ')'; + } + + if (instr.fadd32i.negate_b) { + op_b = "-(" + op_b + ')'; + } + + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); + break; + } } break; } @@ -1616,16 +1644,32 @@ private: shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';'); } - shader.AddLine("return true;"); - if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, otherwise - // we have to account for the possibility of the condition not being met, so - // continue processing the next instruction. - offset = PROGRAM_END - 1; + switch (instr.flow.cond) { + case Tegra::Shader::FlowCondition::Always: + shader.AddLine("return true;"); + if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { + // If this is an unconditional exit then just end processing here, + // otherwise we have to account for the possibility of the condition + // not being met, so continue processing the next instruction. + offset = PROGRAM_END - 1; + } + break; + + case Tegra::Shader::FlowCondition::Fcsm_Tr: + // TODO(bunnei): What is this used for? If we assume this conditon is not + // satisifed, dual vertex shaders in Farming Simulator make more sense + LOG_CRITICAL(HW_GPU, "Skipping unknown FlowCondition::Fcsm_Tr"); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unhandled flow condition: {}", + static_cast<u32>(instr.flow.cond.Value())); + UNREACHABLE(); } break; } case OpCode::Id::KIL: { + ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); shader.AddLine("discard;"); break; } @@ -1646,8 +1690,9 @@ private: // can ignore this when generating GLSL code. break; } - case OpCode::Id::DEPBAR: - case OpCode::Id::SYNC: { + case OpCode::Id::SYNC: + ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + case OpCode::Id::DEPBAR: { // TODO(Subv): Find out if we actually have to care about these instructions or if // the GLSL compiler takes care of that for us. LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); @@ -1687,7 +1732,7 @@ private: return program_counter; } - void Generate() { + void Generate(const std::string& suffix) { // Add declarations for all subroutines for (const auto& subroutine : subroutines) { shader.AddLine("bool " + subroutine.GetName() + "();"); @@ -1695,7 +1740,7 @@ private: shader.AddNewLine(); // Add the main entry point - shader.AddLine("bool exec_shader() {"); + shader.AddLine("bool exec_" + suffix + "() {"); ++shader.scope; CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); --shader.scope; @@ -1758,7 +1803,7 @@ private: /// Add declarations for registers void GenerateDeclarations() { - regs.GenerateDeclarations(); + regs.GenerateDeclarations(suffix); for (const auto& pred : declr_predicates) { declarations.AddLine("bool " + pred + " = false;"); @@ -1771,27 +1816,30 @@ private: const ProgramCode& program_code; const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; + const std::string& suffix; ShaderWriter shader; ShaderWriter declarations; - GLSLRegisterManager regs{shader, declarations, stage}; + GLSLRegisterManager regs{shader, declarations, stage, suffix}; // Declarations std::set<std::string> declr_predicates; }; // namespace Decompiler std::string GetCommonDeclarations() { - std::string declarations = "bool exec_shader();\n"; + std::string declarations; declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); + declarations += '\n'; return declarations; } boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage) { + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix) { try { - auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); - GLSLGenerator generator(subroutines, program_code, main_offset, stage); + auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 382c76b7a..7610dad3a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D; std::string GetCommonDeclarations(); boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage); + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix); } // namespace Decompiler } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index c1e6fac9f..129c777d1 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); + out += "bool exec_vertex();\n"; + + if (setup.IsDualProgram()) { + out += "bool exec_vertex_b();\n"; + } + + ProgramResult program = + Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex, "vertex") + .get_value_or({}); - ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, - Maxwell3D::Regs::ShaderStage::Vertex) - .get_value_or({}); out += R"( out gl_PerVertex { @@ -34,7 +41,14 @@ layout (std140) uniform vs_config { }; void main() { - exec_shader(); + exec_vertex(); +)"; + + if (setup.IsDualProgram()) { + out += " exec_vertex_b();"; + } + + out += R"( // Viewport can be flipped, which is unsupported by glViewport position.xy *= viewport_flip.xy; @@ -44,8 +58,19 @@ void main() { // For now, this is here to bring order in lieu of proper emulation position.w = 1.0; } + )"; + out += program.first; + + if (setup.IsDualProgram()) { + ProgramResult program_b = + Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") + .get_value_or({}); + out += program_b.first; + } + return {out, program.second}; } @@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); + out += "bool exec_fragment();\n"; - ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, - Maxwell3D::Regs::ShaderStage::Fragment) - .get_value_or({}); + ProgramResult program = + Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Fragment, "fragment") + .get_value_or({}); out += R"( - in vec4 position; out vec4 color; @@ -67,7 +93,7 @@ layout (std140) uniform fs_config { }; void main() { - exec_shader(); + exec_fragment(); } )"; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index ed890e0f9..4729ce0fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -115,21 +115,48 @@ struct ShaderEntries { using ProgramResult = std::pair<std::string, ShaderEntries>; struct ShaderSetup { - ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + ShaderSetup(const ProgramCode& program_code) { + program.code = program_code; + } + + struct { + ProgramCode code; + ProgramCode code_b; // Used for dual vertex shaders + } program; - ProgramCode program_code; bool program_code_hash_dirty = true; u64 GetProgramCodeHash() { if (program_code_hash_dirty) { - program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash = GetNewHash(); program_code_hash_dirty = false; } return program_code_hash; } + /// Used in scenarios where we have a dual vertex shaders + void SetProgramB(const ProgramCode& program_b) { + program.code_b = program_b; + has_program_b = true; + } + + bool IsDualProgram() const { + return has_program_b; + } + private: + u64 GetNewHash() const { + if (has_program_b) { + // Compute hash over dual shader programs + return Common::ComputeHash64(&program, sizeof(program)); + } else { + // Compute hash over a single shader program + return Common::ComputeHash64(&program.code, program.code.size()); + } + } + u64 program_code_hash{}; + bool has_program_b{}; }; struct MaxwellShaderConfigCommon { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 00841e937..1930fa6ef 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -92,11 +92,24 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons return matrix; } +ScopeAcquireGLContext::ScopeAcquireGLContext() { + if (Settings::values.use_multi_core) { + VideoCore::g_emu_window->MakeCurrent(); + } +} +ScopeAcquireGLContext::~ScopeAcquireGLContext() { + if (Settings::values.use_multi_core) { + VideoCore::g_emu_window->DoneCurrent(); + } +} + RendererOpenGL::RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) { + ScopeAcquireGLContext acquire_context; + Core::System::GetInstance().perf_stats.EndSystemFrame(); // Maintain the rasterizer's state as a priority @@ -418,7 +431,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum /// Initialize the renderer bool RendererOpenGL::Init() { - render_window->MakeCurrent(); + ScopeAcquireGLContext acquire_context; if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 21f0d298c..fd0267cf5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -31,6 +31,13 @@ struct ScreenInfo { TextureInfo texture; }; +/// Helper class to acquire/release OpenGL context within a given scope +class ScopeAcquireGLContext : NonCopyable { +public: + ScopeAcquireGLContext(); + ~ScopeAcquireGLContext(); +}; + class RendererOpenGL : public RendererBase { public: RendererOpenGL(); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index b3937b2fe..be18aa299 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -62,6 +62,7 @@ u32 BytesPerPixel(TextureFormat format) { return 4; case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: + case TextureFormat::G8R8: return 2; case TextureFormat::R8: return 1; @@ -77,6 +78,8 @@ u32 BytesPerPixel(TextureFormat format) { static u32 DepthBytesPerPixel(DepthFormat format) { switch (format) { + case DepthFormat::Z16_UNORM: + return 2; case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: case DepthFormat::Z32_FLOAT: @@ -110,6 +113,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: case TextureFormat::R8: + case TextureFormat::G8R8: case TextureFormat::R16_G16_B16_A16: case TextureFormat::R32_G32_B32_A32: case TextureFormat::BF10GF11RF11: @@ -133,6 +137,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); switch (format) { + case DepthFormat::Z16_UNORM: case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: case DepthFormat::Z32_FLOAT: @@ -164,6 +169,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::A1B5G5R5: case TextureFormat::B5G6R5: case TextureFormat::R8: + case TextureFormat::G8R8: case TextureFormat::BF10GF11RF11: case TextureFormat::R32_G32_B32_A32: // TODO(Subv): For the time being just forward the same data without any decoding. diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 833085559..159b2c32b 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -20,7 +20,10 @@ EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} void EmuThread::run() { - render_window->MakeCurrent(); + if (!Settings::values.use_multi_core) { + // Single core mode must acquire OpenGL context for entire emulation session + render_window->MakeCurrent(); + } MicroProfileOnThreadCreate("EmuThread"); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9ce8d7c27..16812e077 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -374,6 +374,8 @@ bool GMainWindow::LoadROM(const QString& filename) { const Core::System::ResultStatus result{system.Load(render_window, filename.toStdString())}; + render_window->DoneCurrent(); + if (result != Core::System::ResultStatus::Success) { switch (result) { case Core::System::ResultStatus::ErrorGetLoader: @@ -916,6 +918,7 @@ int main(int argc, char* argv[]) { QCoreApplication::setApplicationName("yuzu"); QApplication::setAttribute(Qt::AA_X11InitThreads); + QApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); QApplication app(argc, argv); // Qt changes the locale and causes issues in float conversion using std::to_string() when diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index e6f0bbe8f..ec73f08bd 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -126,7 +126,7 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); if (render_window == nullptr) { - LOG_CRITICAL(Frontend, "Failed to create SDL2 window! Exiting..."); + LOG_CRITICAL(Frontend, "Failed to create SDL2 window! {}", SDL_GetError()); exit(1); } @@ -137,12 +137,12 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { gl_context = SDL_GL_CreateContext(render_window); if (gl_context == nullptr) { - LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! Exiting..."); + LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! {}", SDL_GetError()); exit(1); } if (!gladLoadGLLoader(static_cast<GLADloadproc>(SDL_GL_GetProcAddress))) { - LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting..."); + LOG_CRITICAL(Frontend, "Failed to initialize GL functions! {}", SDL_GetError()); exit(1); } diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 5f67ae4ee..24db1065a 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -22,10 +22,8 @@ #include "yuzu_cmd/config.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" -#ifdef _MSC_VER -#include <getopt.h> -#else #include <getopt.h> +#ifndef _MSC_VER #include <unistd.h> #endif @@ -150,6 +148,11 @@ int main(int argc, char** argv) { std::unique_ptr<EmuWindow_SDL2> emu_window{std::make_unique<EmuWindow_SDL2>(fullscreen)}; + if (!Settings::values.use_multi_core) { + // Single core mode must acquire OpenGL context for entire emulation session + emu_window->MakeCurrent(); + } + Core::System& system{Core::System::GetInstance()}; SCOPE_EXIT({ system.Shutdown(); }); |