61 files changed, 632 insertions, 384 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3cd896a0f..d85f1e9d1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(host_shaders)
+
 add_library(video_core STATIC
     buffer_cache/buffer_block.h
     buffer_cache/buffer_cache.h
@@ -244,6 +246,9 @@ create_target_directory_groups(video_core)
 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad xbyak)
 
+add_dependencies(video_core host_shaders)
+target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
+
 if (ENABLE_VULKAN)
     target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ff10ff40d..6e50661a3 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,13 @@
 
 namespace Tegra::Engines {
 
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+Fermi2D::Fermi2D() = default;
+
+Fermi2D::~Fermi2D() = default;
+
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
 
 void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
     ASSERT_MSG(method < Regs::NUM_REGS,
@@ -87,7 +93,7 @@ void Fermi2D::HandleSurfaceCopy() {
     copy_config.src_rect = src_rect;
     copy_config.dst_rect = dst_rect;
 
-    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+    if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
         UNIMPLEMENTED();
     }
 }
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8f37d053f..213abfaae 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -34,8 +34,11 @@ namespace Tegra::Engines {
 
 class Fermi2D final : public EngineInterface {
 public:
-    explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
-    ~Fermi2D() = default;
+    explicit Fermi2D();
+    ~Fermi2D();
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
 
     /// Write the value to the register identified by method.
     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
@@ -149,7 +152,7 @@ public:
     };
 
 private:
-    VideoCore::RasterizerInterface& rasterizer;
+    VideoCore::RasterizerInterface* rasterizer;
 
     /// Performs the copy from the source surface to the destination surface as configured in the
     /// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a82b06a38..898370739 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -16,14 +16,15 @@
 
 namespace Tegra::Engines {
 
-KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                             MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
-                                                                                  memory_manager,
-                                                                                  regs.upload} {}
+KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
+    : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
 
 KeplerCompute::~KeplerCompute() = default;
 
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
+
 void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
     ASSERT_MSG(method < Regs::NUM_REGS,
                "Invalid KeplerCompute register, increase the size of the Regs structure");
@@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
 }
 
 VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 void KeplerCompute::ProcessLaunch() {
@@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() {
     const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
     LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
 
-    rasterizer.DispatchCompute(code_addr);
+    rasterizer->DispatchCompute(code_addr);
 }
 
 Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index b7f668d88..7f2500aab 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -42,10 +42,12 @@ namespace Tegra::Engines {
 
 class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
 public:
-    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                           MemoryManager& memory_manager);
+    explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
     ~KeplerCompute();
 
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
     static constexpr std::size_t NumConstBuffers = 8;
 
     struct Regs {
@@ -230,11 +232,6 @@ public:
     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
 
 private:
-    Core::System& system;
-    VideoCore::RasterizerInterface& rasterizer;
-    MemoryManager& memory_manager;
-    Upload::State upload_state;
-
     void ProcessLaunch();
 
     /// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -242,6 +239,11 @@ private:
 
     /// Retrieves information about a specific TSC entry from the TSC buffer.
     Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
+    Core::System& system;
+    MemoryManager& memory_manager;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+    Upload::State upload_state;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c01436295..33854445f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,14 +22,19 @@ using VideoCore::QueryType;
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;
 
-Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                     MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
-      macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
+Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
+    : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
+      upload_state{memory_manager, regs.upload} {
     dirty.flags.flip();
     InitializeRegisterDefaults();
 }
 
+Maxwell3D::~Maxwell3D() = default;
+
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
+
 void Maxwell3D::InitializeRegisterDefaults() {
     // Initializes registers to their default values - what games expect them to be at boot. This is
     // for certain registers that may not be explicitly set by games.
@@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
 
     switch (method) {
     case MAXWELL3D_REG_INDEX(wait_for_idle): {
-        rasterizer.WaitForIdle();
+        rasterizer->WaitForIdle();
         break;
     }
     case MAXWELL3D_REG_INDEX(shadow_ram_control): {
@@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
 
     const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
     if (ShouldExecute()) {
-        rasterizer.Draw(is_indexed, true);
+        rasterizer->Draw(is_indexed, true);
     }
 
     // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() {
     switch (regs.query.query_get.operation) {
     case Regs::QueryOperation::Release:
         if (regs.query.query_get.fence == 1) {
-            rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+            rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
         } else {
             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
         }
@@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() {
 void Maxwell3D::ProcessCounterReset() {
     switch (regs.counter_reset) {
     case Regs::CounterReset::SampleCnt:
-        rasterizer.ResetCounter(QueryType::SamplesPassed);
+        rasterizer->ResetCounter(QueryType::SamplesPassed);
         break;
     default:
         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
@@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() {
     const u32 increment = regs.sync_info.increment.Value();
     [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
     if (increment) {
-        rasterizer.SignalSyncPoint(sync_point);
+        rasterizer->SignalSyncPoint(sync_point);
     }
 }
 
@@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() {
 
     const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
     if (ShouldExecute()) {
-        rasterizer.Draw(is_indexed, false);
+        rasterizer->Draw(is_indexed, false);
     }
 
     // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -590,8 +595,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
         return 0;
     case Regs::QuerySelect::SamplesPassed:
         // Deferred.
-        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
-                         system.GPU().GetTicks());
+        rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+                          system.GPU().GetTicks());
         return {};
     default:
         LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
@@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() {
            regs.clear_buffers.R == regs.clear_buffers.B &&
            regs.clear_buffers.R == regs.clear_buffers.A);
 
-    rasterizer.Clear();
+    rasterizer->Clear();
 }
 
 u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
@@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
 }
 
 VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
-    return rasterizer.AccessGuestDriverProfile();
+    return rasterizer->AccessGuestDriverProfile();
 }
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ef1618990..bc289c55d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -51,9 +51,11 @@ namespace Tegra::Engines {
 
 class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
 public:
-    explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                       MemoryManager& memory_manager);
-    ~Maxwell3D() = default;
+    explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
+    ~Maxwell3D();
+
+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
 
     /// Register structure of the Maxwell3D engine.
     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -647,7 +649,7 @@ public:
                     GetX() + GetWidth(),  // right
                     GetY()                // bottom
                 };
-            };
+            }
 
             f32 GetX() const {
                 return std::max(0.0f, translate_x - std::fabs(scale_x));
@@ -1418,12 +1420,12 @@ public:
         return execute_on;
     }
 
-    VideoCore::RasterizerInterface& GetRasterizer() {
-        return rasterizer;
+    VideoCore::RasterizerInterface& Rasterizer() {
+        return *rasterizer;
     }
 
-    const VideoCore::RasterizerInterface& GetRasterizer() const {
-        return rasterizer;
+    const VideoCore::RasterizerInterface& Rasterizer() const {
+        return *rasterizer;
     }
 
     /// Notify a memory write has happened.
@@ -1460,11 +1462,10 @@ private:
     void InitializeRegisterDefaults();
 
     Core::System& system;
-
-    VideoCore::RasterizerInterface& rasterizer;
-
     MemoryManager& memory_manager;
 
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
     /// Start offsets of each macro in macro_memory
     std::array<u32, 0x80> macro_positions = {};
 
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a2d3d7823..e88290754 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -94,7 +94,8 @@ void MaxwellDMA::CopyPitchToPitch() {
 }
 
 void MaxwellDMA::CopyBlockLinearToPitch() {
-    ASSERT(regs.src_params.block_size.depth == 0);
+    UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
+    UNIMPLEMENTED_IF(regs.src_params.layer != 0);
 
     // Optimized path for micro copies.
     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
@@ -123,17 +124,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
         write_buffer.resize(dst_size);
     }
 
-    if (Settings::IsGPULevelExtreme()) {
-        memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
-    } else {
-        memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
-        memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
-    }
+    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
+    memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
 
     UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
-                     read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(),
-                     block_height, src_params.origin.x, src_params.origin.y);
+                     block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
+                     read_buffer.data());
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
@@ -198,7 +194,6 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
     if (read_buffer.size() < src_size) {
         read_buffer.resize(src_size);
     }
-
     if (write_buffer.size() < dst_size) {
         write_buffer.resize(dst_size);
     }
@@ -212,8 +207,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
     }
 
     UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
-                     bytes_per_pixel, read_buffer.data(), write_buffer.data(),
-                     regs.src_params.block_size.height, pos_x, pos_y);
+                     bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
+                     write_buffer.data(), read_buffer.data());
 
     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 }
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8b2a6a42c..06cc12d5a 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -5,15 +5,10 @@
 #pragma once
 
 #include <algorithm>
-#include <array>
-#include <memory>
 #include <queue>
 
-#include "common/assert.h"
 #include "common/common_types.h"
 #include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 512578c8b..acb6e6d46 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -27,21 +27,28 @@ namespace Tegra {
 
 MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 
-GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
-    : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
-    auto& rasterizer{renderer->Rasterizer()};
-    memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
-    dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
-    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
-    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
-    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
-    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
-    shader_notify = std::make_unique<VideoCore::ShaderNotify>();
-}
+GPU::GPU(Core::System& system_, bool is_async_)
+    : system{system_}, dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+      memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+      maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
+      fermi_2d{std::make_unique<Engines::Fermi2D>()},
+      kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
+      maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
+      kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+      shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
 
 GPU::~GPU() = default;
 
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+    renderer = std::move(renderer_);
+
+    VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
+    memory_manager->BindRasterizer(rasterizer);
+    maxwell_3d->BindRasterizer(rasterizer);
+    fermi_2d->BindRasterizer(rasterizer);
+    kepler_compute->BindRasterizer(rasterizer);
+}
+
 Engines::Maxwell3D& GPU::Maxwell3D() {
     return *maxwell_3d;
 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ebfc7b0c7..c7d11deb2 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -142,11 +142,6 @@ class MemoryManager;
 
 class GPU {
 public:
-    explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                 bool is_async);
-
-    virtual ~GPU();
-
     struct MethodCall {
         u32 method{};
         u32 argument{};
@@ -162,6 +157,12 @@ public:
               method_count(method_count) {}
     };
 
+    explicit GPU(Core::System& system, bool is_async);
+    virtual ~GPU();
+
+    /// Binds a renderer to the GPU.
+    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
     /// Calls a GPU method.
     void CallMethod(const MethodCall& method_call);
 
@@ -345,8 +346,8 @@ private:
     bool ExecuteMethodOnEngine(u32 method);
 
 protected:
-    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     Core::System& system;
+    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     std::unique_ptr<VideoCore::RendererBase> renderer;
 
 private:
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 7b855f63e..70a3d5738 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,16 +10,14 @@
 
 namespace VideoCommon {
 
-GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
-                     std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
-    : GPU(system, std::move(renderer_), true), gpu_thread{system},
-      cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
-      gpu_context(std::move(context)) {}
+GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
 
 GPUAsynch::~GPUAsynch() = default;
 
 void GPUAsynch::Start() {
-    gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
+    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+    cpu_context->MakeCurrent();
 }
 
 void GPUAsynch::ObtainContext() {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 15e9f1d38..f89c855a5 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,8 +20,7 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU asynchronously
 class GPUAsynch final : public Tegra::GPU {
 public:
-    explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                       std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+    explicit GPUAsynch(Core::System& system);
     ~GPUAsynch() override;
 
     void Start() override;
@@ -42,7 +41,6 @@ protected:
 private:
     GPUThread::ThreadManager gpu_thread;
     std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
-    std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index aaeb9811d..1ca47ddef 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,20 +7,18 @@
 
 namespace VideoCommon {
 
-GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                   std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
-    : GPU(system, std::move(renderer), false), context{std::move(context)} {}
+GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
 
 GPUSynch::~GPUSynch() = default;
 
 void GPUSynch::Start() {}
 
 void GPUSynch::ObtainContext() {
-    context->MakeCurrent();
+    renderer->Context().MakeCurrent();
 }
 
 void GPUSynch::ReleaseContext() {
-    context->DoneCurrent();
+    renderer->Context().DoneCurrent();
 }
 
 void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 762c20aa5..297258cb1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,8 +19,7 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU synchronously
 class GPUSynch final : public Tegra::GPU {
 public:
-    explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                      std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+    explicit GPUSynch(Core::System& system);
     ~GPUSynch() override;
 
     void Start() override;
@@ -36,9 +35,6 @@ public:
 protected:
     void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
                              [[maybe_unused]] u32 value) const override {}
-
-private:
-    std::unique_ptr<Core::Frontend::GraphicsContext> context;
 };
 
 } // namespace VideoCommon
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
new file mode 100644
index 000000000..aa62363a7
--- /dev/null
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SHADER_FILES
+    opengl_present.frag
+    opengl_present.vert
+)
+
+set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
+set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
+
+set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
+add_custom_command(
+    OUTPUT
+        ${SHADER_DIR}
+    COMMAND
+        ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
+)
+
+set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
+set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+
+foreach(FILENAME IN ITEMS ${SHADER_FILES})
+    string(REPLACE "." "_" SHADER_NAME ${FILENAME})
+    set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
+    set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
+    add_custom_command(
+        OUTPUT
+            ${HEADER_FILE}
+        COMMAND
+            ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
+        MAIN_DEPENDENCY
+            ${SOURCE_FILE}
+        DEPENDS
+            ${HEADER_GENERATOR}
+            ${INPUT_FILE}
+    )
+    set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
+endforeach()
+
+add_custom_target(host_shaders
+    DEPENDS
+        ${SHADER_HEADERS}
+    SOURCES
+        ${SHADER_FILES}
+)
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
new file mode 100644
index 000000000..368bce0ed
--- /dev/null
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -0,0 +1,11 @@
+set(SOURCE_FILE ${CMAKE_ARGV3})
+set(HEADER_FILE ${CMAKE_ARGV4})
+set(INPUT_FILE ${CMAKE_ARGV5})
+
+get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME)
+string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME})
+string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
+
+file(READ ${SOURCE_FILE} CONTENTS)
+
+configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
new file mode 100644
index 000000000..8a4cb024b
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -0,0 +1,10 @@
+#version 430 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
+
+layout (binding = 0) uniform sampler2D color_texture;
+
+void main() {
+    color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
new file mode 100644
index 000000000..2235d31a4
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -0,0 +1,24 @@
+#version 430 core
+
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
+
+// This is a truncated 3x3 matrix for 2D transformations:
+// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
+// The third column performs translation.
+// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
+// implicitly be [0, 0, 1]
+layout (location = 0) uniform mat3x2 modelview_matrix;
+
+void main() {
+    // Multiply input position by the rotscale part of the matrix and then manually translate by
+    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+    // to `vec3(vert_position.xy, 1.0)`
+    gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
+    frag_tex_coord = vert_tex_coord;
+}
diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in
new file mode 100644
index 000000000..ccdb0d2a9
--- /dev/null
+++ b/src/video_core/host_shaders/source_shader.h.in
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <string_view>
+
+namespace HostShaders {
+
+constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)";
+
+} // namespace HostShaders
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 0c9ff59a4..df00b57df 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -24,7 +24,7 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
     maxwell3d.regs.index_array.first = parameters[4];
 
     if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(true, true);
+        maxwell3d.Rasterizer().Draw(true, true);
     }
     maxwell3d.regs.index_array.count = 0;
     maxwell3d.mme_draw.instance_count = 0;
@@ -42,7 +42,7 @@ void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
     maxwell3d.mme_draw.instance_count = count;
 
     if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(false, true);
+        maxwell3d.Rasterizer().Draw(false, true);
     }
     maxwell3d.regs.vertex_buffer.count = 0;
     maxwell3d.mme_draw.instance_count = 0;
@@ -65,7 +65,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
     maxwell3d.regs.draw.topology.Assign(
         static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
     if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(true, true);
+        maxwell3d.Rasterizer().Draw(true, true);
     }
     maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
     maxwell3d.regs.index_array.count = 0;
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index aa5256419..bd01fd1f2 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho
         this->parameters = std::make_unique<u32[]>(num_parameters);
     }
     std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
-    this->num_parameters = num_parameters;
 
     // Execute the code until we hit an exit condition.
     bool keep_executing = true;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 844164645..16b2aaa27 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -14,11 +14,15 @@
 
 namespace Tegra {
 
-MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
-    : system{system}, rasterizer{rasterizer}, page_table(page_table_size) {}
+MemoryManager::MemoryManager(Core::System& system_)
+    : system{system_}, page_table(page_table_size) {}
 
 MemoryManager::~MemoryManager() = default;
 
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+    rasterizer = &rasterizer_;
+}
+
 GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
     u64 remaining_size{size};
     for (u64 offset{}; offset < size; offset += page_size) {
@@ -217,7 +221,7 @@ void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::siz
 
             // Flush must happen on the rasterizer interface, such that memory is always synchronous
             // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
-            rasterizer.FlushRegion(src_addr, copy_amount);
+            rasterizer->FlushRegion(src_addr, copy_amount);
             system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
         }
 
@@ -266,7 +270,7 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, s
 
             // Invalidate must happen on the rasterizer interface, such that memory is always
             // synchronous when it is written (even when in asynchronous GPU mode).
-            rasterizer.InvalidateRegion(dest_addr, copy_amount);
+            rasterizer->InvalidateRegion(dest_addr, copy_amount);
             system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
         }
 
@@ -312,10 +316,10 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_add
     WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
 }
 
-bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
     const auto cpu_addr{GpuToCpuAddress(gpu_addr)};
     if (!cpu_addr) {
-        return {};
+        return false;
     }
     const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size};
     return page <= Core::Memory::PAGE_SIZE;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 681bd9588..53c8d122a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -31,19 +31,19 @@ public:
     constexpr PageEntry(State state) : state{state} {}
     constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
 
-    constexpr bool IsUnmapped() const {
+    [[nodiscard]] constexpr bool IsUnmapped() const {
         return state == State::Unmapped;
     }
 
-    constexpr bool IsAllocated() const {
+    [[nodiscard]] constexpr bool IsAllocated() const {
         return state == State::Allocated;
     }
 
-    constexpr bool IsValid() const {
+    [[nodiscard]] constexpr bool IsValid() const {
         return !IsUnmapped() && !IsAllocated();
     }
 
-    constexpr VAddr ToAddress() const {
+    [[nodiscard]] constexpr VAddr ToAddress() const {
         if (!IsValid()) {
             return {};
         }
@@ -51,7 +51,7 @@ public:
         return static_cast<VAddr>(state) << ShiftBits;
     }
 
-    constexpr PageEntry operator+(u64 offset) {
+    [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
         // If this is a reserved value, offsets do not apply
         if (!IsValid()) {
             return *this;
@@ -68,19 +68,22 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
 
 class MemoryManager final {
 public:
-    explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+    explicit MemoryManager(Core::System& system);
     ~MemoryManager();
 
-    std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+    /// Binds a renderer to the memory manager.
+    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
+    [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
 
     template <typename T>
-    T Read(GPUVAddr addr) const;
+    [[nodiscard]] T Read(GPUVAddr addr) const;
 
     template <typename T>
     void Write(GPUVAddr addr, T data);
 
-    u8* GetPointer(GPUVAddr addr);
-    const u8* GetPointer(GPUVAddr addr) const;
+    [[nodiscard]] u8* GetPointer(GPUVAddr addr);
+    [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
 
     /**
      * ReadBlock and WriteBlock are full read and write operations over virtual
@@ -109,24 +112,24 @@ public:
     /**
      * IsGranularRange checks if a gpu region can be simply read with a pointer.
      */
-    bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
+    [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
 
-    GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
-    GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
-    std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
-    GPUVAddr Allocate(std::size_t size, std::size_t align);
+    [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
+    [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+    [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
+    [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
     void Unmap(GPUVAddr gpu_addr, std::size_t size);
 
 private:
-    PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
+    [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
     void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
     GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
-    std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
+    [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
 
     void TryLockPage(PageEntry page_entry, std::size_t size);
     void TryUnlockPage(PageEntry page_entry, std::size_t size);
 
-    static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
+    [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
         return (gpu_addr >> page_bits) & page_table_mask;
     }
 
@@ -141,7 +144,7 @@ private:
 
     Core::System& system;
 
-    VideoCore::RasterizerInterface& rasterizer;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
 
     std::vector<PageEntry> page_table;
 };
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index dfb06e87e..a93a1732c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -9,7 +9,9 @@
 
 namespace VideoCore {
 
-RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} {
+RendererBase::RendererBase(Core::Frontend::EmuWindow& window_,
+                           std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+    : render_window{window_}, context{std::move(context_)} {
     RefreshBaseSettings();
 }
 
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d85219b6..649074acd 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -15,7 +15,8 @@
 
 namespace Core::Frontend {
 class EmuWindow;
-}
+class GraphicsContext;
+} // namespace Core::Frontend
 
 namespace VideoCore {
 
@@ -25,14 +26,15 @@ struct RendererSettings {
 
     // Screenshot
     std::atomic<bool> screenshot_requested{false};
-    void* screenshot_bits;
+    void* screenshot_bits{};
     std::function<void()> screenshot_complete_callback;
     Layout::FramebufferLayout screenshot_framebuffer_layout;
 };
 
 class RendererBase : NonCopyable {
 public:
-    explicit RendererBase(Core::Frontend::EmuWindow& window);
+    explicit RendererBase(Core::Frontend::EmuWindow& window,
+                          std::unique_ptr<Core::Frontend::GraphicsContext> context);
     virtual ~RendererBase();
 
     /// Initialize the renderer
@@ -68,6 +70,14 @@ public:
         return *rasterizer;
     }
 
+    Core::Frontend::GraphicsContext& Context() {
+        return *context;
+    }
+
+    const Core::Frontend::GraphicsContext& Context() const {
+        return *context;
+    }
+
     Core::Frontend::EmuWindow& GetRenderWindow() {
         return render_window;
     }
@@ -94,6 +104,7 @@ public:
 protected:
     Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
     std::unique_ptr<RasterizerInterface> rasterizer;
+    std::unique_ptr<Core::Frontend::GraphicsContext> context;
     f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
     int m_current_frame = 0;  ///< Current frame, should be set by the renderer
 
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index ec5421afa..3d2588dd2 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,16 +4,17 @@
 
 #include "common/assert.h"
 
+#include <glad/glad.h>
+
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_fence_manager.h"
 
 namespace OpenGL {
 
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
-    : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
 
 GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
-    : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
+    : FenceBase(address, payload, is_stubbed) {}
 
 GLInnerFence::~GLInnerFence() = default;
 
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index c917b3343..1686cf5c8 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <memory>
-#include <glad/glad.h>
 
 #include "common/common_types.h"
 #include "video_core/fence_manager.h"
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index cb284db77..4af5824cd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
     }
 
     if (device.UseAsynchronousShaders()) {
-        // Max worker threads we should allow
-        constexpr u32 MAX_THREADS = 4;
-        // Deduce how many threads we can use
-        const u32 threads_used = std::thread::hardware_concurrency() / 4;
-        // Always allow at least 1 thread regardless of our settings
-        const auto max_worker_count = std::max(1U, threads_used);
-        // Don't use more than MAX_THREADS
-        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
-        async_shaders.AllocateWorkers(worker_count);
+        async_shaders.AllocateWorkers();
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index a787e27d2..0ebcec427 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <string_view>
 #include <utility>
 #include <glad/glad.h>
 #include "common/common_types.h"
@@ -82,11 +83,13 @@ void OGLSampler::Release() {
     handle = 0;
 }
 
-void OGLShader::Create(const char* source, GLenum type) {
-    if (handle != 0)
+void OGLShader::Create(std::string_view source, GLenum type) {
+    if (handle != 0) {
         return;
-    if (source == nullptr)
+    }
+    if (source.empty()) {
         return;
+    }
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
     handle = GLShader::LoadShader(source, type);
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index b05cb641c..f48398669 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <string_view>
 #include <utility>
 #include <glad/glad.h>
 #include "common/common_types.h"
@@ -127,7 +128,7 @@ public:
         return *this;
     }
 
-    void Create(const char* source, GLenum type);
+    void Create(std::string_view source, GLenum type);
 
     void Release();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index eb49a36bf..a07d56ef0 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -22,6 +22,7 @@
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_arb_decompiler.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 2dcc2b0eb..40c0877c1 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -73,7 +73,7 @@ ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
 
 ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
 
-bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
+bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
     if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
         return false;
     }
@@ -144,7 +144,7 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
     return true;
 }
 
-bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
+bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
     if (file.WriteObject(static_cast<u32>(type)) != 1 ||
         file.WriteObject(static_cast<u32>(code.size())) != 1 ||
         file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
@@ -214,20 +214,20 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
     // Skip games without title id
     const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
     if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
-        return {};
+        return std::nullopt;
     }
 
-    FileUtil::IOFile file(GetTransferablePath(), "rb");
+    Common::FS::IOFile file(GetTransferablePath(), "rb");
     if (!file.IsOpen()) {
         LOG_INFO(Render_OpenGL, "No transferable shader cache found");
         is_usable = true;
-        return {};
+        return std::nullopt;
     }
 
     u32 version{};
     if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
         LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
-        return {};
+        return std::nullopt;
     }
 
     if (version < NativeVersion) {
@@ -235,12 +235,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
         file.Close();
         InvalidateTransferable();
         is_usable = true;
-        return {};
+        return std::nullopt;
     }
     if (version > NativeVersion) {
         LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
                                    "of the emulator, skipping");
-        return {};
+        return std::nullopt;
     }
 
     // Version is valid, load the shaders
@@ -249,7 +249,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
         ShaderDiskCacheEntry& entry = entries.emplace_back();
         if (!entry.Load(file)) {
             LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
-            return {};
+            return std::nullopt;
         }
     }
 
@@ -262,7 +262,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
         return {};
     }
 
-    FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+    Common::FS::IOFile file(GetPrecompiledPath(), "rb");
     if (!file.IsOpen()) {
         LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
         return {};
@@ -279,7 +279,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled()
 }
 
 std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
-    FileUtil::IOFile& file) {
+    Common::FS::IOFile& file) {
     // Read compressed file from disk and decompress to virtual precompiled cache file
     std::vector<u8> compressed(file.GetSize());
     file.ReadBytes(compressed.data(), compressed.size());
@@ -290,12 +290,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
     ShaderCacheVersionHash file_hash{};
     if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
         precompiled_cache_virtual_file_offset = 0;
-        return {};
+        return std::nullopt;
     }
     if (GetShaderCacheVersionHash() != file_hash) {
         LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
         precompiled_cache_virtual_file_offset = 0;
-        return {};
+        return std::nullopt;
     }
 
     std::vector<ShaderDiskCachePrecompiled> entries;
@@ -305,19 +305,20 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
         if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
             !LoadObjectFromPrecompiled(entry.binary_format) ||
             !LoadObjectFromPrecompiled(binary_size)) {
-            return {};
+            return std::nullopt;
         }
 
         entry.binary.resize(binary_size);
         if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
-            return {};
+            return std::nullopt;
         }
     }
-    return entries;
+
+    return std::move(entries);
 }
 
 void ShaderDiskCacheOpenGL::InvalidateTransferable() {
-    if (!FileUtil::Delete(GetTransferablePath())) {
+    if (!Common::FS::Delete(GetTransferablePath())) {
         LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
                   GetTransferablePath());
     }
@@ -328,7 +329,7 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
     // Clear virtaul precompiled cache file
     precompiled_cache_virtual_file.Resize(0);
 
-    if (!FileUtil::Delete(GetPrecompiledPath())) {
+    if (!Common::FS::Delete(GetPrecompiledPath())) {
         LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
     }
 }
@@ -344,7 +345,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
         return;
     }
 
-    FileUtil::IOFile file = AppendTransferableFile();
+    Common::FS::IOFile file = AppendTransferableFile();
     if (!file.IsOpen()) {
         return;
     }
@@ -386,15 +387,15 @@ void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint progra
     }
 }
 
-FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
     if (!EnsureDirectories()) {
         return {};
     }
 
     const auto transferable_path{GetTransferablePath()};
-    const bool existed = FileUtil::Exists(transferable_path);
+    const bool existed = Common::FS::Exists(transferable_path);
 
-    FileUtil::IOFile file(transferable_path, "ab");
+    Common::FS::IOFile file(transferable_path, "ab");
     if (!file.IsOpen()) {
         LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
         return {};
@@ -426,7 +427,7 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
         Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
 
     const auto precompiled_path{GetPrecompiledPath()};
-    FileUtil::IOFile file(precompiled_path, "wb");
+    Common::FS::IOFile file(precompiled_path, "wb");
 
     if (!file.IsOpen()) {
         LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
@@ -440,24 +441,24 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
 
 bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
     const auto CreateDir = [](const std::string& dir) {
-        if (!FileUtil::CreateDir(dir)) {
+        if (!Common::FS::CreateDir(dir)) {
             LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
             return false;
         }
         return true;
     };
 
-    return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+    return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) &&
            CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
            CreateDir(GetPrecompiledDir());
 }
 
 std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
-    return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+    return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
 }
 
 std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
-    return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+    return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
 }
 
 std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
@@ -469,7 +470,7 @@ std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
 }
 
 std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
-    return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+    return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl";
 }
 
 std::string ShaderDiskCacheOpenGL::GetTitleID() const {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index a79cef0e9..db2bb73bc 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -25,7 +25,7 @@ namespace Core {
 class System;
 }
 
-namespace FileUtil {
+namespace Common::FS {
 class IOFile;
 }
 
@@ -38,9 +38,9 @@ struct ShaderDiskCacheEntry {
     ShaderDiskCacheEntry();
     ~ShaderDiskCacheEntry();
 
-    bool Load(FileUtil::IOFile& file);
+    bool Load(Common::FS::IOFile& file);
 
-    bool Save(FileUtil::IOFile& file) const;
+    bool Save(Common::FS::IOFile& file) const;
 
     bool HasProgramA() const {
         return !code.empty() && !code_b.empty();
@@ -97,10 +97,10 @@ public:
 private:
     /// Loads the transferable cache. Returns empty on failure.
     std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
-        FileUtil::IOFile& file);
+        Common::FS::IOFile& file);
 
     /// Opens current game's transferable file and write it's header if it doesn't exist
-    FileUtil::IOFile AppendTransferableFile() const;
+    Common::FS::IOFile AppendTransferableFile() const;
 
     /// Save precompiled header to precompiled_cache_in_memory
     void SavePrecompiledHeaderToVirtualPrecompiledCache();
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 9e74eda0d..4bf0d6090 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <string_view>
 #include <vector>
 #include <glad/glad.h>
 #include "common/assert.h"
@@ -11,7 +12,8 @@
 namespace OpenGL::GLShader {
 
 namespace {
-const char* GetStageDebugName(GLenum type) {
+
+std::string_view StageDebugName(GLenum type) {
     switch (type) {
     case GL_VERTEX_SHADER:
         return "vertex";
@@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) {
     UNIMPLEMENTED();
     return "unknown";
 }
+
 } // Anonymous namespace
 
-GLuint LoadShader(const char* source, GLenum type) {
-    const char* debug_type = GetStageDebugName(type);
+GLuint LoadShader(std::string_view source, GLenum type) {
+    const std::string_view debug_type = StageDebugName(type);
     const GLuint shader_id = glCreateShader(type);
-    glShaderSource(shader_id, 1, &source, nullptr);
+
+    const GLchar* source_string = source.data();
+    const GLint source_length = static_cast<GLint>(source.size());
+
+    glShaderSource(shader_id, 1, &source_string, &source_length);
     LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
     glCompileShader(shader_id);
 
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 03b7548c2..1b770532e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) {
  * @param source String of the GLSL shader program
  * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
  */
-GLuint LoadShader(const char* source, GLenum type);
+GLuint LoadShader(std::string_view source, GLenum type);
 
 /**
  * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 0a7bc9e2b..f403f388a 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -403,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() {
     LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
 }
 
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
+void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
     LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
 }
 
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index bfc4ddf5d..de8f18489 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -90,7 +90,7 @@ public:
                       Tegra::Texture::SwizzleSource z_source,
                       Tegra::Texture::SwizzleSource w_source);
 
-    void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+    void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
 
     void MarkAsModified(u64 tick) {
         surface.MarkAsModified(true, tick);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 52e9e8250..b759c2dba 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,6 +21,8 @@
 #include "core/perf_stats.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
+#include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_vert.h"
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -44,46 +46,6 @@ struct Frame {
     bool is_srgb{};                   /// Framebuffer is sRGB or RGB
 };
 
-constexpr char VERTEX_SHADER[] = R"(
-#version 430 core
-
-out gl_PerVertex {
-    vec4 gl_Position;
-};
-
-layout (location = 0) in vec2 vert_position;
-layout (location = 1) in vec2 vert_tex_coord;
-layout (location = 0) out vec2 frag_tex_coord;
-
-// This is a truncated 3x3 matrix for 2D transformations:
-// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
-// The third column performs translation.
-// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
-// implicitly be [0, 0, 1]
-layout (location = 0) uniform mat3x2 modelview_matrix;
-
-void main() {
-    // Multiply input position by the rotscale part of the matrix and then manually translate by
-    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
-    // to `vec3(vert_position.xy, 1.0)`
-    gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
-    frag_tex_coord = vert_tex_coord;
-}
-)";
-
-constexpr char FRAGMENT_SHADER[] = R"(
-#version 430 core
-
-layout (location = 0) in vec2 frag_tex_coord;
-layout (location = 0) out vec4 color;
-
-layout (binding = 0) uniform sampler2D color_texture;
-
-void main() {
-    color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
-}
-)";
-
 constexpr GLint PositionLocation = 0;
 constexpr GLint TexCoordLocation = 1;
 constexpr GLint ModelViewMatrixLocation = 0;
@@ -313,10 +275,11 @@ public:
     }
 };
 
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
-                               Core::Frontend::GraphicsContext& context)
-    : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
-      program_manager{device}, has_debug_tool{HasDebugTool()} {}
+RendererOpenGL::RendererOpenGL(Core::System& system_, Core::Frontend::EmuWindow& emu_window_,
+                               Tegra::GPU& gpu_,
+                               std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+    : RendererBase{emu_window_, std::move(context_)}, system{system_},
+      emu_window{emu_window_}, gpu{gpu_}, program_manager{device}, has_debug_tool{HasDebugTool()} {}
 
 RendererOpenGL::~RendererOpenGL() = default;
 
@@ -384,7 +347,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
     if (has_debug_tool) {
         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
         Present(0);
-        context.SwapBuffers();
+        context->SwapBuffers();
     }
 }
 
@@ -460,10 +423,10 @@ void RendererOpenGL::InitOpenGLObjects() {
 
     // Create shader programs
     OGLShader vertex_shader;
-    vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+    vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
 
     OGLShader fragment_shader;
-    fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+    fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
 
     vertex_program.Create(true, false, vertex_shader.handle);
     fragment_program.Create(true, false, fragment_shader.handle);
@@ -509,9 +472,10 @@ void RendererOpenGL::AddTelemetryFields() {
     LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
 
     auto& telemetry_session = system.TelemetrySession();
-    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
-    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
-    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+    constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
+    telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
+    telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
+    telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version);
 }
 
 void RendererOpenGL::CreateRasterizer() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 8b18d32e6..52ea76b7d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -56,8 +56,9 @@ class FrameMailbox;
 
 class RendererOpenGL final : public VideoCore::RendererBase {
 public:
-    explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
-                            Core::Frontend::GraphicsContext& context);
+    explicit RendererOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
+                            Tegra::GPU& gpu,
+                            std::unique_ptr<Core::Frontend::GraphicsContext> context);
     ~RendererOpenGL() override;
 
     bool Init() override;
@@ -93,9 +94,9 @@ private:
 
     bool Present(int timeout_ms);
 
-    Core::Frontend::EmuWindow& emu_window;
     Core::System& system;
-    Core::Frontend::GraphicsContext& context;
+    Core::Frontend::EmuWindow& emu_window;
+    Tegra::GPU& gpu;
     const Device device;
 
     StateTracker state_tracker{system};
@@ -120,7 +121,7 @@ private:
     std::vector<u8> gl_framebuffer_data;
 
     /// Used for transforming the framebuffer orientation
-    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
+    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
     Common::Rectangle<int> framebuffer_crop_rect;
 
     /// Frame presentation mailbox
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
index 435c8c1b8..5b01020ec 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
@@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() {
         return false;
     }
 
-    dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
+    dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
 
-    (void)FileUtil::DeleteDirRecursively(dump_dir);
-    if (!FileUtil::CreateDir(dump_dir)) {
+    (void)Common::FS::DeleteDirRecursively(dump_dir);
+    if (!Common::FS::CreateDir(dump_dir)) {
         LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
         return false;
     }
@@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
         return;
     }
 
-    FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
+    Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
     if (!file.IsOpen()) {
         LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
         return;
@@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
     }();
 
     std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
-    if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
+    if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
         LOG_ERROR(Render_Vulkan, "Failed to write dump file");
         return;
     }
     const std::string_view json_view(json.data(), json.size());
-    if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
+    if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
         LOG_ERROR(Render_Vulkan, "Failed to write JSON");
         return;
     }
@@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_
 
     const std::string path =
         fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
-    FileUtil::IOFile file(path, "wb");
+    Common::FS::IOFile file(path, "wb");
     if (!file.IsOpen()) {
         LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
         return;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2258479f5..ae46e0444 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -78,7 +78,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
     if (!libvulkan_env || !library.Open(libvulkan_env)) {
         // Use the libvulkan.dylib from the application bundle.
         const std::string filename =
-            FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+            Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
         library.Open(filename.c_str());
     }
 #else
@@ -237,8 +237,10 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
 
 } // Anonymous namespace
 
-RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
-    : RendererBase(window), system{system} {}
+RendererVulkan::RendererVulkan(Core::System& system_, Core::Frontend::EmuWindow& emu_window,
+                               Tegra::GPU& gpu_,
+                               std::unique_ptr<Core::Frontend::GraphicsContext> context)
+    : RendererBase{emu_window, std::move(context)}, system{system_}, gpu{gpu_} {}
 
 RendererVulkan::~RendererVulkan() {
     ShutDown();
@@ -439,7 +441,7 @@ void RendererVulkan::Report() const {
     LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
 
     auto& telemetry_session = system.TelemetrySession();
-    constexpr auto field = Telemetry::FieldType::UserSystem;
+    constexpr auto field = Common::Telemetry::FieldType::UserSystem;
     telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
     telemetry_session.AddField(field, "GPU_Model", model_name);
     telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 522b5bff8..13debbbc0 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -38,7 +38,9 @@ struct VKScreenInfo {
 
 class RendererVulkan final : public VideoCore::RendererBase {
 public:
-    explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+    explicit RendererVulkan(Core::System& system, Core::Frontend::EmuWindow& emu_window,
+                            Tegra::GPU& gpu,
+                            std::unique_ptr<Core::Frontend::GraphicsContext> context);
     ~RendererVulkan() override;
 
     bool Init() override;
@@ -58,6 +60,7 @@ private:
     void Report() const;
 
     Core::System& system;
+    Tegra::GPU& gpu;
 
     Common::DynamicLibrary library;
     vk::InstanceDispatch dld;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 0c03e4d83..ebcfaa0e3 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -382,6 +382,8 @@ bool VKDevice::Create() {
 
     graphics_queue = logical.GetQueue(graphics_family);
     present_queue = logical.GetQueue(present_family);
+
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
     return true;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 529744f2d..26a233db1 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -202,6 +202,11 @@ public:
         return reported_extensions;
     }
 
+    /// Returns true if the setting for async shader compilation is enabled.
+    bool UseAsynchronousShaders() const {
+        return use_asynchronous_shaders;
+    }
+
     /// Checks if the physical device is suitable.
     static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
 
@@ -252,6 +257,9 @@ private:
     bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
     bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
 
+    // Asynchronous Graphics Pipeline setting
+    bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+
     // Telemetry parameters
     std::string vendor_name;                      ///< Device's driver name.
     std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a02be5487..d7f65d435 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -29,7 +29,7 @@ void InnerFence::Queue() {
     }
     ASSERT(!event);
 
-    event = device.GetLogical().CreateEvent();
+    event = device.GetLogical().CreateNewEvent();
     ticks = scheduler.Ticks();
 
     scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index aaf930b90..2e46c6278 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
                                        const GraphicsPipelineCacheKey& key,
                                        vk::Span<VkDescriptorSetLayoutBinding> bindings,
                                        const SPIRVProgram& program)
-    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+    : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
       descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
       descriptor_allocator{descriptor_pool, *descriptor_set_layout},
       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
                                                                         program)},
-      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
-                                                                             key.renderpass_params,
-                                                                             program)} {}
+      renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
+      pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
 
 VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 
@@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
 
 vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
                                                 const SPIRVProgram& program) const {
-    const auto& state = fixed_state;
+    const auto& state = cache_key.fixed_state;
     const auto& viewport_swizzles = state.viewport_swizzles;
 
     FixedPipelineState::DynamicState dynamic;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a1d699a6c..58aa35efd 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,7 +19,27 @@ namespace Vulkan {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-struct GraphicsPipelineCacheKey;
+struct GraphicsPipelineCacheKey {
+    RenderPassParams renderpass_params;
+    u32 padding;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    FixedPipelineState fixed_state;
+
+    std::size_t Hash() const noexcept;
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    std::size_t Size() const noexcept {
+        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+    }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 
 class VKDescriptorPool;
 class VKDevice;
@@ -54,6 +74,10 @@ public:
         return renderpass;
     }
 
+    GraphicsPipelineCacheKey GetCacheKey() const {
+        return cache_key;
+    }
+
 private:
     vk::DescriptorSetLayout CreateDescriptorSetLayout(
         vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -70,7 +94,7 @@ private:
 
     const VKDevice& device;
     VKScheduler& scheduler;
-    const FixedPipelineState fixed_state;
+    const GraphicsPipelineCacheKey cache_key;
     const u64 hash;
 
     vk::DescriptorSetLayout descriptor_set_layout;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 418c62bc4..cfdcdd6ab 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -28,6 +28,7 @@
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
+#include "video_core/shader_notify.h"
 
 namespace Vulkan {
 
@@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
     return last_shaders = shaders;
 }
 
-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
+    const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
     MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 
     if (last_graphics_pipeline && last_graphics_key == key) {
-        return *last_graphics_pipeline;
+        return last_graphics_pipeline;
     }
     last_graphics_key = key;
 
+    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
+        std::unique_lock lock{pipeline_cache};
+        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+        if (is_cache_miss) {
+            system.GPU().ShaderNotify().MarkSharderBuilding();
+            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+            const auto [program, bindings] = DecompileShaders(key.fixed_state);
+            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
+                                            update_descriptor_queue, renderpass_cache, bindings,
+                                            program, key);
+        }
+        last_graphics_pipeline = pair->second.get();
+        return last_graphics_pipeline;
+    }
+
     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
     auto& entry = pair->second;
     if (is_cache_miss) {
+        system.GPU().ShaderNotify().MarkSharderBuilding();
         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-        const auto [program, bindings] = DecompileShaders(key);
+        const auto [program, bindings] = DecompileShaders(key.fixed_state);
         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
                                                      update_descriptor_queue, renderpass_cache, key,
                                                      bindings, program);
+        system.GPU().ShaderNotify().MarkShaderComplete();
     }
-    return *(last_graphics_pipeline = entry.get());
+    last_graphics_pipeline = entry.get();
+    return last_graphics_pipeline;
 }
 
 VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
@@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
     return *entry;
 }
 
+void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+    system.GPU().ShaderNotify().MarkShaderComplete();
+    std::unique_lock lock{pipeline_cache};
+    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+}
+
 void VKPipelineCache::OnShaderRemoval(Shader* shader) {
     bool finished = false;
     const auto Finish = [&] {
@@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
 }
 
 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
-    const auto& fixed_state = key.fixed_state;
+VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
     auto& memory_manager = system.GPU().MemoryManager();
     const auto& gpu = system.GPU().Maxwell3D();
 
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0a3fe65fb..c04829e77 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -22,6 +22,7 @@
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
@@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue;
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-struct GraphicsPipelineCacheKey {
-    RenderPassParams renderpass_params;
-    u32 padding;
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
-    FixedPipelineState fixed_state;
-
-    std::size_t Hash() const noexcept;
-
-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
-    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return !operator==(rhs);
-    }
-
-    std::size_t Size() const noexcept {
-        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
-    }
-};
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
-
 struct ComputePipelineCacheKey {
     GPUVAddr shader;
     u32 shared_memory_size;
@@ -152,16 +131,19 @@ public:
 
     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 
-    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+                                            VideoCommon::Shader::AsyncShaders& async_shaders);
 
     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 
+    void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+
 protected:
     void OnShaderRemoval(Shader* shader) final;
 
 private:
     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
-        const GraphicsPipelineCacheKey& key);
+        const FixedPipelineState& fixed_state);
 
     Core::System& system;
     const VKDevice& device;
@@ -178,6 +160,7 @@ private:
     GraphicsPipelineCacheKey last_graphics_key;
     VKGraphicsPipeline* last_graphics_pipeline = nullptr;
 
+    std::mutex pipeline_cache;
     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
         graphics_cache;
     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7500e8244..ff1b52eab 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
+#include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
@@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
       sampler_cache(device),
       fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
-      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+      query_cache(system, *this, device, scheduler),
+      wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
+    if (device.UseAsynchronousShaders()) {
+        async_shaders.AllocateWorkers();
+    }
 }
 
 RasterizerVulkan::~RasterizerVulkan() = default;
@@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
+    SCOPE_EXIT({ system.GPU().TickWork(); });
+
     const auto& gpu = system.GPU().Maxwell3D();
     GraphicsPipelineCacheKey key;
     key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
@@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
-    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+    auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
+        // Async graphics pipeline was not ready.
+        return;
+    }
+
+    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
 
-    const auto renderpass = pipeline.GetRenderPass();
+    const auto renderpass = pipeline->GetRenderPass();
     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
     scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
 
@@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     BeginTransformFeedback();
 
-    const auto pipeline_layout = pipeline.GetLayout();
-    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    const auto pipeline_layout = pipeline->GetLayout();
+    const auto descriptor_set = pipeline->CommitDescriptorSet();
     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
         if (descriptor_set) {
             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
@@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     });
 
     EndTransformFeedback();
-
-    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::Clear() {
@@ -1433,10 +1443,10 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
 }
 
 void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
-    if (!state_tracker.TouchPrimitiveTopology()) {
+    const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
+    if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) {
         return;
     }
-    const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
     scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
         cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
     });
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 923178b0b..f640ba649 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -32,6 +32,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 
 namespace Core {
 class System;
@@ -136,6 +137,14 @@ public:
                            u32 pixel_stride) override;
     void SetupDirtyFlags() override;
 
+    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+        return async_shaders;
+    }
+
+    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+        return async_shaders;
+    }
+
     /// Maximum supported size that a constbuffer can have in bytes.
     static constexpr std::size_t MaxConstbufferSize = 0x10000;
     static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
@@ -297,6 +306,7 @@ private:
     vk::Buffer default_buffer;
     VKMemoryCommit default_buffer_commit;
     vk::Event wfi_event;
+    VideoCommon::Shader::AsyncShaders async_shaders;
 
     std::array<View, Maxwell::NumRenderTargets> color_attachments;
     View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 9151d9fb1..4bd1009f9 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() {
     flags[DepthWriteEnable] = true;
     flags[DepthCompareOp] = true;
     flags[FrontFace] = true;
-    flags[PrimitiveTopology] = true;
     flags[StencilOp] = true;
     flags[StencilTestEnable] = true;
     return flags;
@@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) {
     table[OFF(screen_y_control)] = FrontFace;
 }
 
-void SetupDirtyPrimitiveTopology(Tables& tables) {
-    tables[0][OFF(draw.topology)] = PrimitiveTopology;
-}
-
 void SetupDirtyStencilOp(Tables& tables) {
     auto& table = tables[0];
     table[OFF(stencil_front_op_fail)] = StencilOp;
@@ -156,13 +151,13 @@ void StateTracker::Initialize() {
     SetupDirtyDepthWriteEnable(tables);
     SetupDirtyDepthCompareOp(tables);
     SetupDirtyFrontFace(tables);
-    SetupDirtyPrimitiveTopology(tables);
     SetupDirtyStencilOp(tables);
     SetupDirtyStencilTestEnable(tables);
 }
 
 void StateTracker::InvalidateCommandBufferState() {
     system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
+    current_topology = INVALID_TOPOLOGY;
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 54ca0d6c6..13a6ce786 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -32,7 +32,6 @@ enum : u8 {
     DepthWriteEnable,
     DepthCompareOp,
     FrontFace,
-    PrimitiveTopology,
     StencilOp,
     StencilTestEnable,
 
@@ -43,6 +42,8 @@ static_assert(Last <= std::numeric_limits<u8>::max());
 } // namespace Dirty
 
 class StateTracker {
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
 public:
     explicit StateTracker(Core::System& system);
 
@@ -102,10 +103,6 @@ public:
         return Exchange(Dirty::FrontFace, false);
     }
 
-    bool TouchPrimitiveTopology() {
-        return Exchange(Dirty::PrimitiveTopology, false);
-    }
-
     bool TouchStencilOp() {
         return Exchange(Dirty::StencilOp, false);
     }
@@ -114,7 +111,15 @@ public:
         return Exchange(Dirty::StencilTestEnable, false);
     }
 
+    bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
+        const bool has_changed = current_topology != new_topology;
+        current_topology = new_topology;
+        return has_changed;
+    }
+
 private:
+    static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
+
     bool Exchange(std::size_t id, bool new_value) const noexcept {
         auto& flags = system.GPU().Maxwell3D().dirty.flags;
         const bool is_dirty = flags[id];
@@ -124,6 +129,7 @@ private:
 
     Core::System& system;
     Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
+    Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 14cac38ea..013865aa4 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
     return ShaderModule(object, handle, *dld);
 }
 
-Event Device::CreateEvent() const {
+Event Device::CreateNewEvent() const {
     static constexpr VkEventCreateInfo ci{
         .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
         .pNext = nullptr,
@@ -786,7 +786,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
         VK_SUCCESS) {
         return std::nullopt;
     }
-    return properties;
+    return std::move(properties);
 }
 
 std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 31885ef42..b9d3fedc1 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -721,7 +721,7 @@ public:
 
     ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
 
-    Event CreateEvent() const;
+    Event CreateNewEvent() const;
 
     SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
 
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index b7f66d7ee..f815584f7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -2,7 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <chrono>
 #include <condition_variable>
 #include <mutex>
 #include <thread>
@@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() {
     KillWorkers();
 }
 
-void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
-    // If we're already have workers queued or don't want to queue workers, ignore
-    if (num_workers == worker_threads.size() || num_workers == 0) {
+void AsyncShaders::AllocateWorkers() {
+    // Max worker threads we should allow
+    constexpr u32 MAX_THREADS = 4;
+    // Deduce how many threads we can use
+    const u32 threads_used = std::thread::hardware_concurrency() / 4;
+    // Always allow at least 1 thread regardless of our settings
+    const auto max_worker_count = std::max(1U, threads_used);
+    // Don't use more than MAX_THREADS
+    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
+    // If we already have workers queued, ignore
+    if (num_workers == worker_threads.size()) {
         return;
     }
 
@@ -34,8 +42,8 @@ void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
     // Create workers
     for (std::size_t i = 0; i < num_workers; i++) {
         context_list.push_back(emu_window.CreateSharedContext());
-        worker_threads.push_back(std::move(
-            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())));
+        worker_threads.push_back(
+            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
     }
 }
 
@@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                      VideoCommon::Shader::CompilerSettings compiler_settings,
                                      const VideoCommon::Shader::Registry& registry,
                                      VAddr cpu_addr) {
-    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
-                                                    : AsyncShaders::Backend::OpenGL,
-                        device,
-                        shader_type,
-                        uid,
-                        std::move(code),
-                        std::move(code_b),
-                        main_offset,
-                        compiler_settings,
-                        registry,
-                        cpu_addr};
+    WorkerParams params{
+        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+        .device = &device,
+        .shader_type = shader_type,
+        .uid = uid,
+        .code = std::move(code),
+        .code_b = std::move(code_b),
+        .main_offset = main_offset,
+        .compiler_settings = compiler_settings,
+        .registry = registry,
+        .cpu_address = cpu_addr,
+    };
     std::unique_lock lock(queue_mutex);
-    pending_queue.push_back(std::move(params));
+    pending_queue.push(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+                                     Vulkan::VKDescriptorPool& descriptor_pool,
+                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     Vulkan::VKRenderPassCache& renderpass_cache,
+                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
+                                     Vulkan::SPIRVProgram program,
+                                     Vulkan::GraphicsPipelineCacheKey key) {
+    WorkerParams params{
+        .backend = Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .vk_device = &device,
+        .scheduler = &scheduler,
+        .descriptor_pool = &descriptor_pool,
+        .update_descriptor_queue = &update_descriptor_queue,
+        .renderpass_cache = &renderpass_cache,
+        .bindings = bindings,
+        .program = program,
+        .key = key,
+    };
+
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push(std::move(params));
     cv.notify_one();
 }
 
 void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
-    using namespace std::chrono_literals;
     while (!is_thread_exiting.load(std::memory_order_relaxed)) {
         std::unique_lock lock{queue_mutex};
         cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
@@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
         if (pending_queue.empty()) {
             continue;
         }
+
         // Pull work from queue
         WorkerParams work = std::move(pending_queue.front());
-        pending_queue.pop_front();
-
+        pending_queue.pop();
         lock.unlock();
 
-        if (work.backend == AsyncShaders::Backend::OpenGL ||
-            work.backend == AsyncShaders::Backend::GLASM) {
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
+        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
             const auto scope = context->Acquire();
             auto program =
-                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
             Result result{};
             result.backend = work.backend;
             result.cpu_address = work.cpu_address;
@@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
             result.code_b = std::move(work.code_b);
             result.shader_type = work.shader_type;
 
-            if (work.backend == AsyncShaders::Backend::OpenGL) {
+            if (work.backend == Backend::OpenGL) {
                 result.program.opengl = std::move(program->source_program);
-            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+            } else if (work.backend == Backend::GLASM) {
                 result.program.glasm = std::move(program->assembly_program);
             }
 
@@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
                 std::unique_lock complete_lock(completed_mutex);
                 finished_work.push_back(std::move(result));
             }
+        } else if (work.backend == Backend::Vulkan) {
+            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                *work.vk_device, *work.scheduler, *work.descriptor_pool,
+                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+                work.program);
+
+            work.pp_cache->EmplacePipeline(std::move(pipeline));
         }
     }
 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 2f5ee94ad..d5ae814d5 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -14,6 +14,10 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 
 namespace Core::Frontend {
 class EmuWindow;
@@ -24,6 +28,10 @@ namespace Tegra {
 class GPU;
 }
 
+namespace Vulkan {
+class VKPipelineCache;
+}
+
 namespace VideoCommon::Shader {
 
 class AsyncShaders {
@@ -31,6 +39,7 @@ public:
     enum class Backend {
         OpenGL,
         GLASM,
+        Vulkan,
     };
 
     struct ResultPrograms {
@@ -52,7 +61,7 @@ public:
     ~AsyncShaders();
 
     /// Start up shader worker threads
-    void AllocateWorkers(std::size_t num_workers);
+    void AllocateWorkers();
 
     /// Clear the shader queue and kill all worker threads
     void FreeWorkers();
@@ -76,6 +85,14 @@ public:
                            VideoCommon::Shader::CompilerSettings compiler_settings,
                            const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
 
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+                           Vulkan::VKScheduler& scheduler,
+                           Vulkan::VKDescriptorPool& descriptor_pool,
+                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                           Vulkan::VKRenderPassCache& renderpass_cache,
+                           std::vector<VkDescriptorSetLayoutBinding> bindings,
+                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+
 private:
     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
 
@@ -83,16 +100,28 @@ private:
     bool HasWorkQueued();
 
     struct WorkerParams {
-        AsyncShaders::Backend backend;
-        OpenGL::Device device;
+        Backend backend;
+        // For OGL
+        const OpenGL::Device* device;
         Tegra::Engines::ShaderType shader_type;
         u64 uid;
         std::vector<u64> code;
         std::vector<u64> code_b;
         u32 main_offset;
         VideoCommon::Shader::CompilerSettings compiler_settings;
-        VideoCommon::Shader::Registry registry;
+        std::optional<VideoCommon::Shader::Registry> registry;
         VAddr cpu_address;
+
+        // For Vulkan
+        Vulkan::VKPipelineCache* pp_cache;
+        const Vulkan::VKDevice* vk_device;
+        Vulkan::VKScheduler* scheduler;
+        Vulkan::VKDescriptorPool* descriptor_pool;
+        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+        Vulkan::VKRenderPassCache* renderpass_cache;
+        std::vector<VkDescriptorSetLayoutBinding> bindings;
+        Vulkan::SPIRVProgram program;
+        Vulkan::GraphicsPipelineCacheKey key;
     };
 
     std::condition_variable cv;
@@ -101,7 +130,7 @@ private:
     std::atomic<bool> is_thread_exiting{};
     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
     std::vector<std::thread> worker_threads;
-    std::deque<WorkerParams> pending_queue;
+    std::queue<WorkerParams> pending_queue;
     std::vector<AsyncShaders::Result> finished_work;
     Core::Frontend::EmuWindow& emu_window;
 };
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 474ae620a..16d46a018 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -228,24 +228,30 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
     }
 }
 
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
-                      u32 block_height_bit, u32 offset_x, u32 offset_y) {
-    const u32 block_height = 1U << block_height_bit;
-    for (u32 line = 0; line < subrect_height; ++line) {
-        const u32 y2 = line + offset_y;
-        const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height +
-                                  ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
-        const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y];
-        for (u32 x = 0; x < subrect_width; ++x) {
-            const u32 x2 = (x + offset_x) * bytes_per_pixel;
-            const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height;
-            const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X];
-            const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel;
-            u8* dest_line = unswizzled_data + unswizzled_offset;
-            u8* source_addr = swizzled_data + swizzled_offset;
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+    const u32 stride = width * bytes_per_pixel;
+    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
+
+    const u32 block_height_mask = (1U << block_height) - 1;
+    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
+
+    for (u32 line = 0; line < line_count; ++line) {
+        const u32 src_y = line + origin_y;
+        const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+
+        const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
+        const u32 src_offset_y = (block_y >> block_height) * block_size +
+                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+        for (u32 column = 0; column < line_length_in; ++column) {
+            const u32 src_x = (column + origin_x) * bytes_per_pixel;
+            const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+            const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
+            const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
 
-            std::memcpy(dest_line, source_addr, bytes_per_pixel);
+            std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
         }
     }
 }
@@ -261,7 +267,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
     const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
 
     const u32 block_height_mask = (1U << block_height) - 1;
-    const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth));
+    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
 
     for (u32 line = 0; line < line_count; ++line) {
         const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index d6fe35d37..01e156bc8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -48,9 +48,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
                     u32 block_height_bit, u32 offset_x, u32 offset_y);
 
 /// Copies a tiled subrectangle into a linear surface.
-void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
-                      u32 offset_x, u32 offset_y);
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
+                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
 
 /// @brief Swizzles a 2D array of pixels into a 3D texture
 /// @param line_length_in  Number of pixels per line
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 45f360bdd..4e3a092c7 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <memory>
+
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
@@ -16,37 +17,46 @@
 #include "video_core/video_core.h"
 
 namespace {
-std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
-                                                        Core::System& system,
-                                                        Core::Frontend::GraphicsContext& context) {
+
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
+    Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+    std::unique_ptr<Core::Frontend::GraphicsContext> context) {
     switch (Settings::values.renderer_backend.GetValue()) {
     case Settings::RendererBackend::OpenGL:
-        return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
+        return std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, gpu,
+                                                        std::move(context));
 #ifdef HAS_VULKAN
     case Settings::RendererBackend::Vulkan:
-        return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+        return std::make_unique<Vulkan::RendererVulkan>(system, emu_window, gpu,
+                                                        std::move(context));
 #endif
     default:
         return nullptr;
     }
 }
+
 } // Anonymous namespace
 
 namespace VideoCore {
 
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+    std::unique_ptr<Tegra::GPU> gpu;
+    if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+        gpu = std::make_unique<VideoCommon::GPUAsynch>(system);
+    } else {
+        gpu = std::make_unique<VideoCommon::GPUSynch>(system);
+    }
+
     auto context = emu_window.CreateSharedContext();
     const auto scope = context->Acquire();
-    auto renderer = CreateRenderer(emu_window, system, *context);
+
+    auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
     if (!renderer->Init()) {
         return nullptr;
     }
 
-    if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
-        return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
-                                                        std::move(context));
-    }
-    return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
+    gpu->BindRenderer(std::move(renderer));
+    return gpu;
 }
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {