summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/string_util.cpp2
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp4
-rw-r--r--src/core/file_sys/disk_filesystem.cpp6
-rw-r--r--src/core/gdbstub/gdbstub.cpp194
-rw-r--r--src/core/gdbstub/gdbstub.h8
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp4
-rw-r--r--src/core/hle/service/audio/audout_u.cpp24
-rw-r--r--src/core/hle/service/audio/audout_u.h4
-rw-r--r--src/core/hle/service/audio/audren_u.cpp6
-rw-r--r--src/core/hle/service/nifm/nifm.cpp24
-rw-r--r--src/core/hle/service/sockets/bsd.cpp3
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp3
-rw-r--r--src/core/loader/nca.cpp4
-rw-r--r--src/core/loader/nro.cpp4
-rw-r--r--src/core/loader/nso.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp21
-rw-r--r--src/video_core/engines/maxwell_3d.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h21
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp33
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp138
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h33
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp15
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h7
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/yuzu/bootmanager.cpp5
-rw-r--r--src/yuzu/main.cpp3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp6
-rw-r--r--src/yuzu_cmd/yuzu.cpp9
32 files changed, 545 insertions, 197 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index ea9d8f77c..0027888c7 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -134,7 +134,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
size_t dir_end = full_path.find_last_of("/"
// windows needs the : included for something like just "C:" to be considered a directory
#ifdef _WIN32
- ":"
+ "\\:"
#endif
);
if (std::string::npos == dir_end)
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index ce6c5616d..f239cf0ea 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -193,11 +193,11 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
}
Kernel::Thread* thread = Kernel::GetCurrentThread();
SaveContext(thread->context);
- if (last_bkpt_hit) {
+ if (last_bkpt_hit || (num_instructions == 1)) {
last_bkpt_hit = false;
GDBStub::Break();
+ GDBStub::SendTrap(thread, 5);
}
- GDBStub::SendTrap(thread, 5);
}
}
diff --git a/src/core/file_sys/disk_filesystem.cpp b/src/core/file_sys/disk_filesystem.cpp
index 8c6f15bb5..d248c2df4 100644
--- a/src/core/file_sys/disk_filesystem.cpp
+++ b/src/core/file_sys/disk_filesystem.cpp
@@ -58,11 +58,13 @@ ResultVal<std::unique_ptr<StorageBackend>> Disk_FileSystem::OpenFile(const std::
}
ResultCode Disk_FileSystem::DeleteFile(const std::string& path) const {
- if (!FileUtil::Exists(path)) {
+ std::string full_path = base_directory + path;
+
+ if (!FileUtil::Exists(full_path)) {
return ERROR_PATH_NOT_FOUND;
}
- FileUtil::Delete(path);
+ FileUtil::Delete(full_path);
return RESULT_SUCCESS;
}
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 938852a1a..6062de13c 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -61,10 +61,16 @@ const u32 SIGTERM = 15;
const u32 MSG_WAITALL = 8;
#endif
-const u32 X30_REGISTER = 30;
+const u32 LR_REGISTER = 30;
const u32 SP_REGISTER = 31;
const u32 PC_REGISTER = 32;
const u32 CPSR_REGISTER = 33;
+const u32 UC_ARM64_REG_Q0 = 34;
+const u32 FPSCR_REGISTER = 66;
+
+// TODO/WiP - Used while working on support for FPU
+const u32 TODO_DUMMY_REG_997 = 997;
+const u32 TODO_DUMMY_REG_998 = 998;
// For sample XML files see the GDB source /gdb/features
// GDB also wants the l character at the start
@@ -130,6 +136,8 @@ static const char* target_xml =
</flags>
<reg name="cpsr" bitsize="32" type="cpsr_flags"/>
</feature>
+ <feature name="org.gnu.gdb.aarch64.fpu">
+ </feature>
</target>
)";
@@ -144,6 +152,7 @@ static u32 latest_signal = 0;
static bool memory_break = false;
static Kernel::Thread* current_thread = nullptr;
+static u32 current_core = 0;
// Binding to a port within the reserved ports range (0-1023) requires root permissions,
// so default to a port outside of that range.
@@ -171,13 +180,34 @@ static std::map<u64, Breakpoint> breakpoints_execute;
static std::map<u64, Breakpoint> breakpoints_read;
static std::map<u64, Breakpoint> breakpoints_write;
+struct Module {
+ std::string name;
+ PAddr beg;
+ PAddr end;
+};
+
+static std::vector<Module> modules;
+
+void RegisterModule(std::string name, PAddr beg, PAddr end, bool add_elf_ext) {
+ Module module;
+ if (add_elf_ext) {
+ Common::SplitPath(name, nullptr, &module.name, nullptr);
+ module.name += ".elf";
+ } else {
+ module.name = std::move(name);
+ }
+ module.beg = beg;
+ module.end = end;
+ modules.push_back(std::move(module));
+}
+
static Kernel::Thread* FindThreadById(int id) {
- for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
- auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
- for (auto thread : threads) {
+ for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
+ const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
+ for (auto& thread : threads) {
if (thread->GetThreadId() == id) {
- current_thread = thread.get();
- return current_thread;
+ current_core = core;
+ return thread.get();
}
}
}
@@ -197,6 +227,8 @@ static u64 RegRead(int id, Kernel::Thread* thread = nullptr) {
return thread->context.pc;
} else if (id == CPSR_REGISTER) {
return thread->context.cpsr;
+ } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
+ return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0];
} else {
return 0;
}
@@ -215,6 +247,8 @@ static void RegWrite(int id, u64 val, Kernel::Thread* thread = nullptr) {
thread->context.pc = val;
} else if (id == CPSR_REGISTER) {
thread->context.cpsr = val;
+ } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) {
+ thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val;
}
}
@@ -534,7 +568,11 @@ static void HandleQuery() {
SendReply("T0");
} else if (strncmp(query, "Supported", strlen("Supported")) == 0) {
// PacketSize needs to be large enough for target xml
- SendReply("PacketSize=2000;qXfer:features:read+");
+ std::string buffer = "PacketSize=2000;qXfer:features:read+;qXfer:threads:read+";
+ if (!modules.empty()) {
+ buffer += ";qXfer:libraries:read+";
+ }
+ SendReply(buffer.c_str());
} else if (strncmp(query, "Xfer:features:read:target.xml:",
strlen("Xfer:features:read:target.xml:")) == 0) {
SendReply(target_xml);
@@ -543,9 +581,9 @@ static void HandleQuery() {
SendReply(buffer.c_str());
} else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
std::string val = "m";
- for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
- auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
- for (auto thread : threads) {
+ for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
+ const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
+ for (const auto& thread : threads) {
val += fmt::format("{:x}", thread->GetThreadId());
val += ",";
}
@@ -554,6 +592,31 @@ static void HandleQuery() {
SendReply(val.c_str());
} else if (strncmp(query, "sThreadInfo", strlen("sThreadInfo")) == 0) {
SendReply("l");
+ } else if (strncmp(query, "Xfer:threads:read", strlen("Xfer:threads:read")) == 0) {
+ std::string buffer;
+ buffer += "l<?xml version=\"1.0\"?>";
+ buffer += "<threads>";
+ for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
+ const auto& threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
+ for (const auto& thread : threads) {
+ buffer +=
+ fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
+ thread->GetThreadId(), core, thread->GetThreadId());
+ }
+ }
+ buffer += "</threads>";
+ SendReply(buffer.c_str());
+ } else if (strncmp(query, "Xfer:libraries:read", strlen("Xfer:libraries:read")) == 0) {
+ std::string buffer;
+ buffer += "l<?xml version=\"1.0\"?>";
+ buffer += "<library-list>";
+ for (const auto& module : modules) {
+ buffer +=
+ fmt::format(R"*("<library name = "{}"><segment address = "0x{:x}"/></library>)*",
+ module.name, module.beg);
+ }
+ buffer += "</library-list>";
+ SendReply(buffer.c_str());
} else {
SendReply("");
}
@@ -561,33 +624,27 @@ static void HandleQuery() {
/// Handle set thread command from gdb client.
static void HandleSetThread() {
- if (memcmp(command_buffer, "Hc", 2) == 0 || memcmp(command_buffer, "Hg", 2) == 0) {
- int thread_id = -1;
- if (command_buffer[2] != '-') {
- thread_id = static_cast<int>(HexToInt(
- command_buffer + 2,
- command_length - 2 /*strlen(reinterpret_cast<char*>(command_buffer) + 2)*/));
- }
- if (thread_id >= 1) {
- current_thread = FindThreadById(thread_id);
- }
- if (!current_thread) {
- thread_id = 1;
- current_thread = FindThreadById(thread_id);
- }
- if (current_thread) {
- SendReply("OK");
- return;
- }
+ int thread_id = -1;
+ if (command_buffer[2] != '-') {
+ thread_id = static_cast<int>(HexToInt(command_buffer + 2, command_length - 2));
+ }
+ if (thread_id >= 1) {
+ current_thread = FindThreadById(thread_id);
+ }
+ if (!current_thread) {
+ thread_id = 1;
+ current_thread = FindThreadById(thread_id);
+ }
+ if (current_thread) {
+ SendReply("OK");
+ return;
}
SendReply("E01");
}
/// Handle thread alive command from gdb client.
static void HandleThreadAlive() {
- int thread_id = static_cast<int>(
- HexToInt(command_buffer + 1,
- command_length - 1 /*strlen(reinterpret_cast<char*>(command_buffer) + 1)*/));
+ int thread_id = static_cast<int>(HexToInt(command_buffer + 1, command_length - 1));
if (thread_id == 0) {
thread_id = 1;
}
@@ -610,16 +667,23 @@ static void SendSignal(Kernel::Thread* thread, u32 signal, bool full = true) {
latest_signal = signal;
+ if (!thread) {
+ full = false;
+ }
+
std::string buffer;
if (full) {
- buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};", latest_signal, PC_REGISTER,
- Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
- Common::swap64(RegRead(SP_REGISTER, thread)));
+ buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};{:02x}:{:016x}", latest_signal,
+ PC_REGISTER, Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
+ Common::swap64(RegRead(SP_REGISTER, thread)), LR_REGISTER,
+ Common::swap64(RegRead(LR_REGISTER, thread)));
} else {
- buffer = fmt::format("T{:02x};", latest_signal);
+ buffer = fmt::format("T{:02x}", latest_signal);
}
- buffer += fmt::format("thread:{:x};", thread->GetThreadId());
+ if (thread) {
+ buffer += fmt::format(";thread:{:x};", thread->GetThreadId());
+ }
SendReply(buffer.c_str());
}
@@ -711,8 +775,12 @@ static void ReadRegister() {
LongToGdbHex(reply, RegRead(id, current_thread));
} else if (id == CPSR_REGISTER) {
IntToGdbHex(reply, (u32)RegRead(id, current_thread));
+ } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
+ LongToGdbHex(reply, RegRead(id, current_thread));
+ } else if (id == FPSCR_REGISTER) {
+ LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread));
} else {
- return SendReply("E01");
+ LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread));
}
SendReply(reinterpret_cast<char*>(reply));
@@ -729,7 +797,7 @@ static void ReadRegisters() {
LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
}
- bufptr += (32 * 16);
+ bufptr += 32 * 16;
LongToGdbHex(bufptr, RegRead(PC_REGISTER, current_thread));
@@ -739,6 +807,16 @@ static void ReadRegisters() {
bufptr += 8;
+ for (int reg = UC_ARM64_REG_Q0; reg <= UC_ARM64_REG_Q0 + 31; reg++) {
+ LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
+ }
+
+ bufptr += 32 * 32;
+
+ LongToGdbHex(bufptr, RegRead(TODO_DUMMY_REG_998, current_thread));
+
+ bufptr += 8;
+
SendReply(reinterpret_cast<char*>(buffer));
}
@@ -759,10 +837,17 @@ static void WriteRegister() {
RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
} else if (id == CPSR_REGISTER) {
RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
+ } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) {
+ RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
+ } else if (id == FPSCR_REGISTER) {
+ RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread);
} else {
- return SendReply("E01");
+ RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread);
}
+ // Update Unicorn context skipping scheduler, no running threads at this point
+ Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context);
+
SendReply("OK");
}
@@ -773,18 +858,25 @@ static void WriteRegisters() {
if (command_buffer[0] != 'G')
return SendReply("E01");
- for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
+ for (int i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) {
if (reg <= SP_REGISTER) {
RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == PC_REGISTER) {
RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == CPSR_REGISTER) {
RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
+ } else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) {
+ RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
+ } else if (reg == FPSCR_REGISTER) {
+ RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else {
UNIMPLEMENTED();
}
}
+ // Update Unicorn context skipping scheduler, no running threads at this point
+ Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context);
+
SendReply("OK");
}
@@ -806,6 +898,10 @@ static void ReadMemory() {
SendReply("E01");
}
+ if (addr < Memory::PROCESS_IMAGE_VADDR || addr >= Memory::MAP_REGION_VADDR_END) {
+ return SendReply("E00");
+ }
+
if (!Memory::IsValidVirtualAddress(addr)) {
return SendReply("E00");
}
@@ -840,16 +936,18 @@ static void WriteMemory() {
}
void Break(bool is_memory_break) {
- if (!halt_loop) {
- halt_loop = true;
- send_trap = true;
- }
+ send_trap = true;
memory_break = is_memory_break;
}
/// Tell the CPU that it should perform a single step.
static void Step() {
+ if (command_length > 1) {
+ RegWrite(PC_REGISTER, GdbHexToLong(command_buffer + 1), current_thread);
+ // Update Unicorn context skipping scheduler, no running threads at this point
+ Core::System::GetInstance().ArmInterface(current_core).LoadContext(current_thread->context);
+ }
step_loop = true;
halt_loop = true;
send_trap = true;
@@ -1090,6 +1188,8 @@ static void Init(u16 port) {
breakpoints_read.clear();
breakpoints_write.clear();
+ modules.clear();
+
// Start gdb server
LOG_INFO(Debug_GDBStub, "Starting GDB server on port {}...", port);
@@ -1192,8 +1292,12 @@ void SetCpuStepFlag(bool is_step) {
void SendTrap(Kernel::Thread* thread, int trap) {
if (send_trap) {
+ if (!halt_loop || current_thread == thread) {
+ current_thread = thread;
+ SendSignal(thread, trap);
+ }
+ halt_loop = true;
send_trap = false;
- SendSignal(thread, trap);
}
}
}; // namespace GDBStub
diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h
index f2418c9e4..a6b50c26c 100644
--- a/src/core/gdbstub/gdbstub.h
+++ b/src/core/gdbstub/gdbstub.h
@@ -6,6 +6,7 @@
#pragma once
+#include <string>
#include "common/common_types.h"
#include "core/hle/kernel/thread.h"
@@ -51,6 +52,9 @@ bool IsServerEnabled();
/// Returns true if there is an active socket connection.
bool IsConnected();
+/// Register module.
+void RegisterModule(std::string name, PAddr beg, PAddr end, bool add_elf_ext = true);
+
/**
* Signal to the gdbstub server that it should halt CPU execution.
*
@@ -80,10 +84,10 @@ BreakpointAddress GetNextBreakpointFromAddress(PAddr addr, GDBStub::BreakpointTy
*/
bool CheckBreakpoint(PAddr addr, GDBStub::BreakpointType type);
-// If set to true, the CPU will halt at the beginning of the next CPU loop.
+/// If set to true, the CPU will halt at the beginning of the next CPU loop.
bool GetCpuHaltFlag();
-// If set to true and the CPU is halted, the CPU will step one instruction.
+/// If set to true and the CPU is halted, the CPU will step one instruction.
bool GetCpuStepFlag();
/**
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 609cdbff2..2532dd450 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -214,8 +214,8 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
(sizeof(IPC::CommandHeader) + sizeof(IPC::HandleDescriptorHeader)) / sizeof(u32);
ASSERT_MSG(!handle_descriptor_header->send_current_pid, "Sending PID is not implemented");
- ASSERT_MSG(copy_objects.size() == handle_descriptor_header->num_handles_to_copy);
- ASSERT_MSG(move_objects.size() == handle_descriptor_header->num_handles_to_move);
+ ASSERT(copy_objects.size() == handle_descriptor_header->num_handles_to_copy);
+ ASSERT(move_objects.size() == handle_descriptor_header->num_handles_to_move);
// We don't make a distinction between copy and move handles when translating since HLE
// services don't deal with handles directly. However, the guest applications might check
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 1b4b649d8..8bf273b22 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -27,12 +27,12 @@ public:
{0, &IAudioOut::GetAudioOutState, "GetAudioOutState"},
{1, &IAudioOut::StartAudioOut, "StartAudioOut"},
{2, &IAudioOut::StopAudioOut, "StopAudioOut"},
- {3, &IAudioOut::AppendAudioOutBuffer, "AppendAudioOutBuffer"},
+ {3, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBuffer"},
{4, &IAudioOut::RegisterBufferEvent, "RegisterBufferEvent"},
- {5, &IAudioOut::GetReleasedAudioOutBuffer, "GetReleasedAudioOutBuffer"},
+ {5, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBuffer"},
{6, nullptr, "ContainsAudioOutBuffer"},
- {7, nullptr, "AppendAudioOutBufferAuto"},
- {8, nullptr, "GetReleasedAudioOutBufferAuto"},
+ {7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"},
+ {8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"},
{9, nullptr, "GetAudioOutBufferCount"},
{10, nullptr, "GetAudioOutPlayedSampleCount"},
{11, nullptr, "FlushAudioOutBuffers"},
@@ -96,7 +96,7 @@ private:
rb.PushCopyObjects(buffer_event);
}
- void AppendAudioOutBuffer(Kernel::HLERequestContext& ctx) {
+ void AppendAudioOutBufferImpl(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
IPC::RequestParser rp{ctx};
@@ -107,7 +107,7 @@ private:
rb.Push(RESULT_SUCCESS);
}
- void GetReleasedAudioOutBuffer(Kernel::HLERequestContext& ctx) {
+ void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
// TODO(st4rk): This is how libtransistor currently implements the
@@ -163,7 +163,7 @@ private:
AudioState audio_out_state;
};
-void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) {
+void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
IPC::RequestParser rp{ctx};
@@ -179,7 +179,7 @@ void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) {
rb.Push<u32>(1);
}
-void AudOutU::OpenAudioOut(Kernel::HLERequestContext& ctx) {
+void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
if (!audio_out_interface) {
@@ -196,10 +196,10 @@ void AudOutU::OpenAudioOut(Kernel::HLERequestContext& ctx) {
}
AudOutU::AudOutU() : ServiceFramework("audout:u") {
- static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOuts, "ListAudioOuts"},
- {1, &AudOutU::OpenAudioOut, "OpenAudioOut"},
- {2, nullptr, "ListAudioOutsAuto"},
- {3, nullptr, "OpenAudioOutAuto"}};
+ static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
+ {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
+ {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
+ {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}};
RegisterHandlers(functions);
}
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h
index 1f9bb9bcf..847d86aa6 100644
--- a/src/core/hle/service/audio/audout_u.h
+++ b/src/core/hle/service/audio/audout_u.h
@@ -22,8 +22,8 @@ public:
private:
std::shared_ptr<IAudioOut> audio_out_interface;
- void ListAudioOuts(Kernel::HLERequestContext& ctx);
- void OpenAudioOut(Kernel::HLERequestContext& ctx);
+ void ListAudioOutsImpl(Kernel::HLERequestContext& ctx);
+ void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
enum class PcmFormat : u32 {
Invalid = 0,
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 2da936b27..b7f591c6d 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -47,7 +47,7 @@ public:
// Start the audio event
CoreTiming::ScheduleEvent(audio_ticks, audio_event);
- voice_status_list.reserve(worker_params.voice_count);
+ voice_status_list.resize(worker_params.voice_count);
}
~IAudioRenderer() {
CoreTiming::UnscheduleEvent(audio_event, 0);
@@ -87,8 +87,6 @@ private:
memory_pool[i].state = MemoryPoolStates::Attached;
else if (mem_pool_info[i].pool_state == MemoryPoolStates::RequestDetach)
memory_pool[i].state = MemoryPoolStates::Detached;
- else
- memory_pool[i].state = mem_pool_info[i].pool_state;
}
std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(),
response_data.memory_pools_size);
@@ -183,7 +181,9 @@ private:
behavior_size = 0xb0;
memory_pools_size = (config.effect_count + (config.voice_count * 4)) * 0x10;
voices_size = config.voice_count * 0x10;
+ voice_resource_size = 0x0;
effects_size = config.effect_count * 0x10;
+ mixes_size = 0x0;
sinks_size = config.sink_count * 0x20;
performance_manager_size = 0x10;
total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size +
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 54a151c26..0d951084b 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -148,6 +148,24 @@ private:
LOG_DEBUG(Service_NIFM, "called");
}
+ void IsWirelessCommunicationEnabled(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_NIFM, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u8>(0);
+ }
+ void IsEthernetCommunicationEnabled(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_NIFM, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u8>(0);
+ }
+ void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_NIFM, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u8>(0);
+ }
};
IGeneralService::IGeneralService() : ServiceFramework("IGeneralService") {
@@ -167,11 +185,11 @@ IGeneralService::IGeneralService() : ServiceFramework("IGeneralService") {
{14, &IGeneralService::CreateTemporaryNetworkProfile, "CreateTemporaryNetworkProfile"},
{15, nullptr, "GetCurrentIpConfigInfo"},
{16, nullptr, "SetWirelessCommunicationEnabled"},
- {17, nullptr, "IsWirelessCommunicationEnabled"},
+ {17, &IGeneralService::IsWirelessCommunicationEnabled, "IsWirelessCommunicationEnabled"},
{18, nullptr, "GetInternetConnectionStatus"},
{19, nullptr, "SetEthernetCommunicationEnabled"},
- {20, nullptr, "IsEthernetCommunicationEnabled"},
- {21, nullptr, "IsAnyInternetRequestAccepted"},
+ {20, &IGeneralService::IsEthernetCommunicationEnabled, "IsEthernetCommunicationEnabled"},
+ {21, &IGeneralService::IsAnyInternetRequestAccepted, "IsAnyInternetRequestAccepted"},
{22, nullptr, "IsAnyForegroundRequestAccepted"},
{23, nullptr, "PutToSleep"},
{24, nullptr, "WakeUp"},
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 32648bdd9..6aa1e2511 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -19,10 +19,9 @@ void BSD::RegisterClient(Kernel::HLERequestContext& ctx) {
void BSD::StartMonitoring(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service, "(STUBBED) called");
- IPC::ResponseBuilder rb{ctx, 3};
+ IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
- rb.Push<u32>(0); // bsd errno
}
void BSD::Socket(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index eb7feb617..5fdb1d289 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/file_sys/romfs_factory.h"
+#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/service/filesystem/filesystem.h"
@@ -133,6 +134,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(
next_load_addr = AppLoader_NSO::LoadModule(path, load_addr);
if (next_load_addr) {
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
+ // Register module with GDBStub
+ GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false);
} else {
next_load_addr = load_addr;
}
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
index da064f8e3..0fd930ae2 100644
--- a/src/core/loader/nca.cpp
+++ b/src/core/loader/nca.cpp
@@ -7,10 +7,12 @@
#include "common/common_funcs.h"
#include "common/file_util.h"
#include "common/logging/log.h"
+#include "common/string_util.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/file_sys/program_metadata.h"
#include "core/file_sys/romfs_factory.h"
+#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/service/filesystem/filesystem.h"
@@ -259,6 +261,8 @@ ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) {
next_load_addr = AppLoader_NSO::LoadModule(module, nca->GetExeFsFile(module), load_addr);
if (next_load_addr) {
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
+ // Register module with GDBStub
+ GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false);
} else {
next_load_addr = load_addr;
}
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 3853cfa1a..4d7c69a22 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "common/swap.h"
#include "core/core.h"
+#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/loader/nro.h"
@@ -115,6 +116,9 @@ bool AppLoader_NRO::LoadNro(const std::string& path, VAddr load_base) {
codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
Core::CurrentProcess()->LoadModule(codeset, load_base);
+ // Register module with GDBStub
+ GDBStub::RegisterModule(codeset->name, load_base, load_base);
+
return true;
}
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 7f84e4b1b..1c629e21f 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -10,6 +10,7 @@
#include "common/logging/log.h"
#include "common/swap.h"
#include "core/core.h"
+#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/loader/nso.h"
@@ -147,6 +148,9 @@ VAddr AppLoader_NSO::LoadModule(const std::string& name, const std::vector<u8>&
codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
Core::CurrentProcess()->LoadModule(codeset, load_base);
+ // Register module with GDBStub
+ GDBStub::RegisterModule(codeset->name, load_base, load_base);
+
return load_base + image_size;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 3bca16364..dfbf80abd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -398,27 +398,6 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
-bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
- // The Vertex stage is always enabled.
- if (stage == Regs::ShaderStage::Vertex)
- return true;
-
- switch (stage) {
- case Regs::ShaderStage::TesselationControl:
- return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
- .enable != 0;
- case Regs::ShaderStage::TesselationEval:
- return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
- .enable != 0;
- case Regs::ShaderStage::Geometry:
- return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
- case Regs::ShaderStage::Fragment:
- return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
- }
-
- UNREACHABLE();
-}
-
void Maxwell3D::ProcessClearBuffers() {
ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
regs.clear_buffers.R == regs.clear_buffers.B &&
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 5a7cf0107..6f0170ff7 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -379,6 +379,14 @@ public:
}
};
+ bool IsShaderConfigEnabled(size_t index) const {
+ // The VertexB is always enabled.
+ if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
+ return true;
+ }
+ return shader_config[index].enable != 0;
+ }
+
union {
struct {
INSERT_PADDING_WORDS(0x45);
@@ -780,9 +788,6 @@ public:
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
- /// Returns whether the specified shader stage is enabled or not.
- bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
-
private:
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 2bc1782ad..65fa1495f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
GreaterThan = 4,
NotEqual = 5,
GreaterEqual = 6,
+ LessThanWithNan = 9,
NotEqualWithNan = 13,
// TODO(Subv): Other condition types
};
@@ -201,6 +202,11 @@ enum class IMinMaxExchange : u64 {
XHi = 3,
};
+enum class FlowCondition : u64 {
+ Always = 0xF,
+ Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -298,6 +304,13 @@ union Instruction {
} iadd32i;
union {
+ BitField<53, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_a;
+ BitField<56, 1, u64> negate_a;
+ BitField<57, 1, u64> abs_b;
+ } fadd32i;
+
+ union {
BitField<20, 8, u64> shift_position;
BitField<28, 8, u64> shift_length;
BitField<48, 1, u64> negate_b;
@@ -309,6 +322,10 @@ union Instruction {
} bfe;
union {
+ BitField<0, 5, FlowCondition> cond;
+ } flow;
+
+ union {
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_c;
} ffma;
@@ -487,6 +504,7 @@ public:
FADD_C,
FADD_R,
FADD_IMM,
+ FADD32I,
FMUL_C,
FMUL_R,
FMUL_IMM,
@@ -679,13 +697,14 @@ private:
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
- INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
+ INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
+ INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ea138d402..eecbc5ff0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -15,6 +15,7 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
+#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
@@ -22,6 +23,7 @@
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/video_core.h"
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = SurfaceParams::PixelFormat;
@@ -181,6 +183,19 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
return {array_ptr, buffer_offset};
}
+static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) {
+ auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
+
+ // Fetch program code from memory
+ GLShader::ProgramCode program_code;
+ auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+ const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
+ const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
+ Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
+
+ return program_code;
+}
+
void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
// Helper function for uploading uniform data
const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
@@ -193,26 +208,23 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
};
auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
- ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = uniform_buffers.size();
u32 current_texture_bindpoint = 0;
- for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
- const auto& stage = index - 1; // Stage indices are 0 - 5
-
- const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
-
// Skip stages that are not enabled
- if (!is_enabled) {
+ if (!gpu.regs.IsShaderConfigEnabled(index)) {
continue;
}
+ const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
@@ -228,16 +240,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
buffer_ptr += sizeof(GLShader::MaxwellUniformData);
buffer_offset += sizeof(GLShader::MaxwellUniformData);
- // Fetch program code from memory
- GLShader::ProgramCode program_code;
- const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
- const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
- Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
- GLShader::ShaderSetup setup{std::move(program_code)};
-
+ GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
GLShader::ShaderEntries shader_resources;
switch (program) {
+ case Maxwell::ShaderProgram::VertexA: {
+ // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
+ // Conventional HW does not support this, so we combine VertexA and VertexB into one
+ // stage here.
+ setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB));
+ GLShader::MaxwellVSConfig vs_config{setup};
+ shader_resources =
+ shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
+ break;
+ }
+
case Maxwell::ShaderProgram::VertexB: {
GLShader::MaxwellVSConfig vs_config{setup};
shader_resources =
@@ -268,6 +285,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
current_texture_bindpoint =
SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
current_texture_bindpoint, shader_resources.texture_samplers);
+
+ // When VertexA is enabled, we have dual vertex shaders
+ if (program == Maxwell::ShaderProgram::VertexA) {
+ // VertexB was combined with VertexA, so we skip the VertexB iteration
+ index++;
+ }
}
shader_program_manager->UseTrivialGeometryShader();
@@ -301,9 +324,6 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
bool using_depth_fb) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
- // Sync the depth test state before configuring the framebuffer surfaces.
- SyncDepthTestState();
-
// TODO(bunnei): Implement this
const bool has_stencil = false;
@@ -368,11 +388,20 @@ void RasterizerOpenGL::Clear() {
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
use_depth_fb = true;
+
+ // Always enable the depth write when clearing the depth buffer. The depth write mask is
+ // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
+ state.depth.test_enabled = true;
+ state.depth.write_mask = GL_TRUE;
+ state.depth.test_func = GL_ALWAYS;
+ state.Apply();
}
if (clear_mask == 0)
return;
+ ScopeAcquireGLContext acquire_context;
+
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(use_color_fb, use_depth_fb);
@@ -399,9 +428,12 @@ void RasterizerOpenGL::DrawArrays() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+ ScopeAcquireGLContext acquire_context;
+
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
+ SyncDepthTestState();
SyncBlendState();
SyncCullMode();
@@ -605,9 +637,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
auto& gpu = Core::System::GetInstance().GPU();
auto& maxwell3d = gpu.Get3DEngine();
- ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
- "Attempted to upload constbuffer of disabled shader stage");
-
// Reset all buffer draw state for this stage.
for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
buffer.bindpoint = 0;
@@ -674,9 +703,6 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
auto& gpu = Core::System::GetInstance().GPU();
auto& maxwell3d = gpu.Get3DEngine();
- ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
- "Attempted to upload textures of disabled shader stage");
-
ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 323ff7408..c171c4c5b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -105,6 +105,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // BC7U
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
@@ -112,6 +113,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // S8Z24
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
+ false}, // Z16
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -194,8 +197,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>,
- MortonCopy<true, PixelFormat::Z32F>,
+ MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::Z24S8>,
+ MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>,
+ MortonCopy<true, PixelFormat::Z16>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -215,10 +219,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
nullptr,
nullptr,
nullptr,
- MortonCopy<false, PixelFormat::ABGR8>,
+ nullptr,
+ MortonCopy<false, PixelFormat::G8R8>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32F>,
+ MortonCopy<false, PixelFormat::Z16>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -271,9 +277,10 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
S8Z24 input_pixel{};
Z24S8 output_pixel{};
+ const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
- const size_t offset{y * width + x};
+ const size_t offset{bpp * (y * width + x)};
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
output_pixel.s8.Assign(input_pixel.s8);
output_pixel.z24.Assign(input_pixel.z24);
@@ -281,6 +288,19 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
}
}
}
+
+static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
+ const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)};
+ for (size_t y = 0; y < height; ++y) {
+ for (size_t x = 0; x < width; ++x) {
+ const size_t offset{bpp * (y * width + x)};
+ const u8 temp{data[offset]};
+ data[offset] = data[offset + 1];
+ data[offset + 1] = temp;
+ }
+ }
+}
+
/**
* Helper function to perform software conversion (as needed) when loading a buffer from Switch
* memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
@@ -301,6 +321,11 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height);
break;
+
+ case PixelFormat::G8R8:
+ // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8.
+ ConvertG8R8ToR8G8(data, width, height);
+ break;
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 1bedae992..718c45ce1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -37,13 +37,15 @@ struct SurfaceParams {
DXN1 = 11, // This is also known as BC4
BC7U = 12,
ASTC_2D_4X4 = 13,
+ G8R8 = 14,
MaxColorFormat,
// DepthStencil formats
- Z24S8 = 14,
- S8Z24 = 15,
- Z32F = 16,
+ Z24S8 = 15,
+ S8Z24 = 16,
+ Z32F = 17,
+ Z16 = 18,
MaxDepthStencilFormat,
@@ -95,9 +97,11 @@ struct SurfaceParams {
4, // DXN1
4, // BC7U
4, // ASTC_2D_4X4
+ 1, // G8R8
1, // Z24S8
1, // S8Z24
1, // Z32F
+ 1, // Z16
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -123,9 +127,11 @@ struct SurfaceParams {
64, // DXN1
128, // BC7U
32, // ASTC_2D_4X4
+ 16, // G8R8
32, // Z24S8
32, // S8Z24
32, // Z32F
+ 16, // Z16
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -143,6 +149,8 @@ struct SurfaceParams {
return PixelFormat::Z24S8;
case Tegra::DepthFormat::Z32_FLOAT:
return PixelFormat::Z32F;
+ case Tegra::DepthFormat::Z16_UNORM:
+ return PixelFormat::Z16;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -181,6 +189,8 @@ struct SurfaceParams {
return PixelFormat::A1B5G5R5;
case Tegra::Texture::TextureFormat::R8:
return PixelFormat::R8;
+ case Tegra::Texture::TextureFormat::G8R8:
+ return PixelFormat::G8R8;
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
return PixelFormat::RGBA16F;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
@@ -218,6 +228,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::A1B5G5R5;
case PixelFormat::R8:
return Tegra::Texture::TextureFormat::R8;
+ case PixelFormat::G8R8:
+ return Tegra::Texture::TextureFormat::G8R8;
case PixelFormat::RGBA16F:
return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
case PixelFormat::R11FG11FB10F:
@@ -249,6 +261,8 @@ struct SurfaceParams {
return Tegra::DepthFormat::Z24_S8_UNORM;
case PixelFormat::Z32F:
return Tegra::DepthFormat::Z32_FLOAT;
+ case PixelFormat::Z16:
+ return Tegra::DepthFormat::Z16_UNORM;
default:
UNREACHABLE();
}
@@ -295,6 +309,7 @@ struct SurfaceParams {
static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
+ case Tegra::DepthFormat::Z16_UNORM:
case Tegra::DepthFormat::S8_Z24_UNORM:
case Tegra::DepthFormat::Z24_S8_UNORM:
return ComponentType::UNorm;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 5914077e8..5fae95788 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -42,13 +42,14 @@ enum class ExitMethod {
struct Subroutine {
/// Generates a name suitable for GLSL source code.
std::string GetName() const {
- return "sub_" + std::to_string(begin) + '_' + std::to_string(end);
+ return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
}
- u32 begin; ///< Entry point of the subroutine.
- u32 end; ///< Return point of the subroutine.
- ExitMethod exit_method; ///< Exit method of the subroutine.
- std::set<u32> labels; ///< Addresses refereced by JMP instructions.
+ u32 begin; ///< Entry point of the subroutine.
+ u32 end; ///< Return point of the subroutine.
+ const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
+ ExitMethod exit_method; ///< Exit method of the subroutine.
+ std::set<u32> labels; ///< Addresses refereced by JMP instructions.
bool operator<(const Subroutine& rhs) const {
return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
@@ -58,11 +59,11 @@ struct Subroutine {
/// Analyzes shader code and produces a set of subroutines.
class ControlFlowAnalyzer {
public:
- ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
+ ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
: program_code(program_code) {
// Recursively finds all subroutines.
- const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
+ const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
if (program_main.exit_method != ExitMethod::AlwaysEnd)
throw DecompileFail("Program does not always end");
}
@@ -77,12 +78,12 @@ private:
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
/// Adds and analyzes a new subroutine if it is not added yet.
- const Subroutine& AddSubroutine(u32 begin, u32 end) {
- auto iter = subroutines.find(Subroutine{begin, end});
+ const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
+ auto iter = subroutines.find(Subroutine{begin, end, suffix});
if (iter != subroutines.end())
return *iter;
- Subroutine subroutine{begin, end};
+ Subroutine subroutine{begin, end, suffix};
subroutine.exit_method = Scan(begin, end, subroutine.labels);
if (subroutine.exit_method == ExitMethod::Undetermined)
throw DecompileFail("Recursive function detected");
@@ -191,7 +192,8 @@ public:
UnsignedInteger,
};
- GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {}
+ GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix)
+ : index{index}, shader{shader}, suffix{suffix} {}
/// Gets the GLSL type string for a register
static std::string GetTypeString(Type type) {
@@ -216,7 +218,7 @@ public:
/// Returns a GLSL string representing the current state of the register
const std::string GetActiveString() {
declr_type.insert(active_type);
- return GetPrefixString(active_type) + std::to_string(index);
+ return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix;
}
/// Returns true if the active type is a float
@@ -251,6 +253,7 @@ private:
ShaderWriter& shader;
Type active_type{Type::Float};
std::set<Type> declr_type;
+ const std::string& suffix;
};
/**
@@ -262,8 +265,8 @@ private:
class GLSLRegisterManager {
public:
GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
- const Maxwell3D::Regs::ShaderStage& stage)
- : shader{shader}, declarations{declarations}, stage{stage} {
+ const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
+ : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
BuildRegisterList();
}
@@ -430,12 +433,12 @@ public:
}
/// Add declarations for registers
- void GenerateDeclarations() {
+ void GenerateDeclarations(const std::string& suffix) {
for (const auto& reg : regs) {
for (const auto& type : reg.DeclaredTypes()) {
declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' +
- GLSLRegister::GetPrefixString(type) +
- std::to_string(reg.GetIndex()) + " = 0;");
+ reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) +
+ '_' + suffix + " = 0;");
}
}
declarations.AddNewLine();
@@ -558,7 +561,7 @@ private:
/// Build the GLSL register list.
void BuildRegisterList() {
for (size_t index = 0; index < Register::NumRegisters; ++index) {
- regs.emplace_back(index, shader);
+ regs.emplace_back(index, shader, suffix);
}
}
@@ -620,16 +623,17 @@ private:
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
std::vector<SamplerEntry> used_samplers;
const Maxwell3D::Regs::ShaderStage& stage;
+ const std::string& suffix;
};
class GLSLGenerator {
public:
GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
- u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
+ u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
- stage(stage) {
+ stage(stage), suffix(suffix) {
- Generate();
+ Generate(suffix);
}
std::string GetShaderCode() {
@@ -644,7 +648,7 @@ public:
private:
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
- auto iter = subroutines.find(Subroutine{begin, end});
+ auto iter = subroutines.find(Subroutine{begin, end, suffix});
ASSERT(iter != subroutines.end());
return *iter;
}
@@ -689,7 +693,7 @@ private:
// Can't assign to the constant predicate.
ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
- std::string variable = 'p' + std::to_string(pred);
+ std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
shader.AddLine(variable + " = " + value + ';');
declr_predicates.insert(std::move(variable));
}
@@ -707,7 +711,7 @@ private:
if (index == static_cast<u64>(Pred::UnusedIndex))
variable = "true";
else
- variable = 'p' + std::to_string(index);
+ variable = 'p' + std::to_string(index) + '_' + suffix;
if (negate) {
return "!(" + variable + ')';
@@ -728,10 +732,10 @@ private:
const std::string& op_a, const std::string& op_b) const {
using Tegra::Shader::PredCondition;
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
- {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
- {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
- {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
- {PredCondition::NotEqualWithNan, "!="},
+ {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
+ {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
+ {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
+ {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="},
};
const auto& comparison{PredicateComparisonStrings.find(condition)};
@@ -739,7 +743,8 @@ private:
"Unknown predicate comparison operation");
std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
- if (condition == PredCondition::NotEqualWithNan) {
+ if (condition == PredCondition::LessThanWithNan ||
+ condition == PredCondition::NotEqualWithNan) {
predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
}
@@ -968,6 +973,29 @@ private:
regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
break;
}
+ case OpCode::Id::FADD32I: {
+ std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string op_b = GetImmediate32(instr);
+
+ if (instr.fadd32i.abs_a) {
+ op_a = "abs(" + op_a + ')';
+ }
+
+ if (instr.fadd32i.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
+ if (instr.fadd32i.abs_b) {
+ op_b = "abs(" + op_b + ')';
+ }
+
+ if (instr.fadd32i.negate_b) {
+ op_b = "-(" + op_b + ')';
+ }
+
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1);
+ break;
+ }
}
break;
}
@@ -1616,16 +1644,32 @@ private:
shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';');
}
- shader.AddLine("return true;");
- if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
- // If this is an unconditional exit then just end processing here, otherwise
- // we have to account for the possibility of the condition not being met, so
- // continue processing the next instruction.
- offset = PROGRAM_END - 1;
+ switch (instr.flow.cond) {
+ case Tegra::Shader::FlowCondition::Always:
+ shader.AddLine("return true;");
+ if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
+ // If this is an unconditional exit then just end processing here,
+ // otherwise we have to account for the possibility of the condition
+ // not being met, so continue processing the next instruction.
+ offset = PROGRAM_END - 1;
+ }
+ break;
+
+ case Tegra::Shader::FlowCondition::Fcsm_Tr:
+ // TODO(bunnei): What is this used for? If we assume this conditon is not
+ // satisifed, dual vertex shaders in Farming Simulator make more sense
+ LOG_CRITICAL(HW_GPU, "Skipping unknown FlowCondition::Fcsm_Tr");
+ break;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled flow condition: {}",
+ static_cast<u32>(instr.flow.cond.Value()));
+ UNREACHABLE();
}
break;
}
case OpCode::Id::KIL: {
+ ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
shader.AddLine("discard;");
break;
}
@@ -1646,8 +1690,9 @@ private:
// can ignore this when generating GLSL code.
break;
}
- case OpCode::Id::DEPBAR:
- case OpCode::Id::SYNC: {
+ case OpCode::Id::SYNC:
+ ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+ case OpCode::Id::DEPBAR: {
// TODO(Subv): Find out if we actually have to care about these instructions or if
// the GLSL compiler takes care of that for us.
LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed");
@@ -1687,7 +1732,7 @@ private:
return program_counter;
}
- void Generate() {
+ void Generate(const std::string& suffix) {
// Add declarations for all subroutines
for (const auto& subroutine : subroutines) {
shader.AddLine("bool " + subroutine.GetName() + "();");
@@ -1695,7 +1740,7 @@ private:
shader.AddNewLine();
// Add the main entry point
- shader.AddLine("bool exec_shader() {");
+ shader.AddLine("bool exec_" + suffix + "() {");
++shader.scope;
CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
--shader.scope;
@@ -1758,7 +1803,7 @@ private:
/// Add declarations for registers
void GenerateDeclarations() {
- regs.GenerateDeclarations();
+ regs.GenerateDeclarations(suffix);
for (const auto& pred : declr_predicates) {
declarations.AddLine("bool " + pred + " = false;");
@@ -1771,27 +1816,30 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
+ const std::string& suffix;
ShaderWriter shader;
ShaderWriter declarations;
- GLSLRegisterManager regs{shader, declarations, stage};
+ GLSLRegisterManager regs{shader, declarations, stage, suffix};
// Declarations
std::set<std::string> declr_predicates;
}; // namespace Decompiler
std::string GetCommonDeclarations() {
- std::string declarations = "bool exec_shader();\n";
+ std::string declarations;
declarations += "#define MAX_CONSTBUFFER_ELEMENTS " +
std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4)));
+ declarations += '\n';
return declarations;
}
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
- Maxwell3D::Regs::ShaderStage stage) {
+ Maxwell3D::Regs::ShaderStage stage,
+ const std::string& suffix) {
try {
- auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
- GLSLGenerator generator(subroutines, program_code, main_offset, stage);
+ auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+ GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {
LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 382c76b7a..7610dad3a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D;
std::string GetCommonDeclarations();
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
- Maxwell3D::Regs::ShaderStage stage);
+ Maxwell3D::Regs::ShaderStage stage,
+ const std::string& suffix);
} // namespace Decompiler
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index c1e6fac9f..129c777d1 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
+ out += "bool exec_vertex();\n";
+
+ if (setup.IsDualProgram()) {
+ out += "bool exec_vertex_b();\n";
+ }
+
+ ProgramResult program =
+ Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+ Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
+ .get_value_or({});
- ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
- Maxwell3D::Regs::ShaderStage::Vertex)
- .get_value_or({});
out += R"(
out gl_PerVertex {
@@ -34,7 +41,14 @@ layout (std140) uniform vs_config {
};
void main() {
- exec_shader();
+ exec_vertex();
+)";
+
+ if (setup.IsDualProgram()) {
+ out += " exec_vertex_b();";
+ }
+
+ out += R"(
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
@@ -44,8 +58,19 @@ void main() {
// For now, this is here to bring order in lieu of proper emulation
position.w = 1.0;
}
+
)";
+
out += program.first;
+
+ if (setup.IsDualProgram()) {
+ ProgramResult program_b =
+ Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
+ Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
+ .get_value_or({});
+ out += program_b.first;
+ }
+
return {out, program.second};
}
@@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
+ out += "bool exec_fragment();\n";
- ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
- Maxwell3D::Regs::ShaderStage::Fragment)
- .get_value_or({});
+ ProgramResult program =
+ Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+ Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
+ .get_value_or({});
out += R"(
-
in vec4 position;
out vec4 color;
@@ -67,7 +93,7 @@ layout (std140) uniform fs_config {
};
void main() {
- exec_shader();
+ exec_fragment();
}
)";
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ed890e0f9..4729ce0fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -115,21 +115,48 @@ struct ShaderEntries {
using ProgramResult = std::pair<std::string, ShaderEntries>;
struct ShaderSetup {
- ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
+ ShaderSetup(const ProgramCode& program_code) {
+ program.code = program_code;
+ }
+
+ struct {
+ ProgramCode code;
+ ProgramCode code_b; // Used for dual vertex shaders
+ } program;
- ProgramCode program_code;
bool program_code_hash_dirty = true;
u64 GetProgramCodeHash() {
if (program_code_hash_dirty) {
- program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
+ program_code_hash = GetNewHash();
program_code_hash_dirty = false;
}
return program_code_hash;
}
+ /// Used in scenarios where we have a dual vertex shaders
+ void SetProgramB(const ProgramCode& program_b) {
+ program.code_b = program_b;
+ has_program_b = true;
+ }
+
+ bool IsDualProgram() const {
+ return has_program_b;
+ }
+
private:
+ u64 GetNewHash() const {
+ if (has_program_b) {
+ // Compute hash over dual shader programs
+ return Common::ComputeHash64(&program, sizeof(program));
+ } else {
+ // Compute hash over a single shader program
+ return Common::ComputeHash64(&program.code, program.code.size());
+ }
+ }
+
u64 program_code_hash{};
+ bool has_program_b{};
};
struct MaxwellShaderConfigCommon {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 00841e937..1930fa6ef 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -92,11 +92,24 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
+ScopeAcquireGLContext::ScopeAcquireGLContext() {
+ if (Settings::values.use_multi_core) {
+ VideoCore::g_emu_window->MakeCurrent();
+ }
+}
+ScopeAcquireGLContext::~ScopeAcquireGLContext() {
+ if (Settings::values.use_multi_core) {
+ VideoCore::g_emu_window->DoneCurrent();
+ }
+}
+
RendererOpenGL::RendererOpenGL() = default;
RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) {
+ ScopeAcquireGLContext acquire_context;
+
Core::System::GetInstance().perf_stats.EndSystemFrame();
// Maintain the rasterizer's state as a priority
@@ -418,7 +431,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
/// Initialize the renderer
bool RendererOpenGL::Init() {
- render_window->MakeCurrent();
+ ScopeAcquireGLContext acquire_context;
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 21f0d298c..fd0267cf5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -31,6 +31,13 @@ struct ScreenInfo {
TextureInfo texture;
};
+/// Helper class to acquire/release OpenGL context within a given scope
+class ScopeAcquireGLContext : NonCopyable {
+public:
+ ScopeAcquireGLContext();
+ ~ScopeAcquireGLContext();
+};
+
class RendererOpenGL : public RendererBase {
public:
RendererOpenGL();
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index b3937b2fe..be18aa299 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -62,6 +62,7 @@ u32 BytesPerPixel(TextureFormat format) {
return 4;
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
+ case TextureFormat::G8R8:
return 2;
case TextureFormat::R8:
return 1;
@@ -77,6 +78,8 @@ u32 BytesPerPixel(TextureFormat format) {
static u32 DepthBytesPerPixel(DepthFormat format) {
switch (format) {
+ case DepthFormat::Z16_UNORM:
+ return 2;
case DepthFormat::S8_Z24_UNORM:
case DepthFormat::Z24_S8_UNORM:
case DepthFormat::Z32_FLOAT:
@@ -110,6 +113,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
case TextureFormat::R8:
+ case TextureFormat::G8R8:
case TextureFormat::R16_G16_B16_A16:
case TextureFormat::R32_G32_B32_A32:
case TextureFormat::BF10GF11RF11:
@@ -133,6 +137,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
+ case DepthFormat::Z16_UNORM:
case DepthFormat::S8_Z24_UNORM:
case DepthFormat::Z24_S8_UNORM:
case DepthFormat::Z32_FLOAT:
@@ -164,6 +169,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
case TextureFormat::R8:
+ case TextureFormat::G8R8:
case TextureFormat::BF10GF11RF11:
case TextureFormat::R32_G32_B32_A32:
// TODO(Subv): For the time being just forward the same data without any decoding.
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 833085559..159b2c32b 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -20,7 +20,10 @@
EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {}
void EmuThread::run() {
- render_window->MakeCurrent();
+ if (!Settings::values.use_multi_core) {
+ // Single core mode must acquire OpenGL context for entire emulation session
+ render_window->MakeCurrent();
+ }
MicroProfileOnThreadCreate("EmuThread");
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 9ce8d7c27..16812e077 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -374,6 +374,8 @@ bool GMainWindow::LoadROM(const QString& filename) {
const Core::System::ResultStatus result{system.Load(render_window, filename.toStdString())};
+ render_window->DoneCurrent();
+
if (result != Core::System::ResultStatus::Success) {
switch (result) {
case Core::System::ResultStatus::ErrorGetLoader:
@@ -916,6 +918,7 @@ int main(int argc, char* argv[]) {
QCoreApplication::setApplicationName("yuzu");
QApplication::setAttribute(Qt::AA_X11InitThreads);
+ QApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
QApplication app(argc, argv);
// Qt changes the locale and causes issues in float conversion using std::to_string() when
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index e6f0bbe8f..ec73f08bd 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -126,7 +126,7 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
if (render_window == nullptr) {
- LOG_CRITICAL(Frontend, "Failed to create SDL2 window! Exiting...");
+ LOG_CRITICAL(Frontend, "Failed to create SDL2 window! {}", SDL_GetError());
exit(1);
}
@@ -137,12 +137,12 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
gl_context = SDL_GL_CreateContext(render_window);
if (gl_context == nullptr) {
- LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! Exiting...");
+ LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! {}", SDL_GetError());
exit(1);
}
if (!gladLoadGLLoader(static_cast<GLADloadproc>(SDL_GL_GetProcAddress))) {
- LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting...");
+ LOG_CRITICAL(Frontend, "Failed to initialize GL functions! {}", SDL_GetError());
exit(1);
}
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 5f67ae4ee..24db1065a 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -22,10 +22,8 @@
#include "yuzu_cmd/config.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
-#ifdef _MSC_VER
-#include <getopt.h>
-#else
#include <getopt.h>
+#ifndef _MSC_VER
#include <unistd.h>
#endif
@@ -150,6 +148,11 @@ int main(int argc, char** argv) {
std::unique_ptr<EmuWindow_SDL2> emu_window{std::make_unique<EmuWindow_SDL2>(fullscreen)};
+ if (!Settings::values.use_multi_core) {
+ // Single core mode must acquire OpenGL context for entire emulation session
+ emu_window->MakeCurrent();
+ }
+
Core::System& system{Core::System::GetInstance()};
SCOPE_EXIT({ system.Shutdown(); });