34 files changed, 1710 insertions, 528 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6113e17ff..33e507e69 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -44,6 +44,8 @@ add_library(video_core STATIC
     renderer_opengl/gl_shader_cache.h
     renderer_opengl/gl_shader_decompiler.cpp
     renderer_opengl/gl_shader_decompiler.h
+    renderer_opengl/gl_shader_disk_cache.cpp
+    renderer_opengl/gl_shader_disk_cache.h
     renderer_opengl/gl_shader_gen.cpp
     renderer_opengl/gl_shader_gen.h
     renderer_opengl/gl_shader_manager.cpp
@@ -102,4 +104,4 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad)
+target_link_libraries(video_core PRIVATE glad lz4_static)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 63a958f11..eb9bf1878 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -35,8 +35,10 @@ void DmaPusher::DispatchCalls() {
 bool DmaPusher::Step() {
     if (dma_get != dma_put) {
         // Push buffer non-empty, read a word
-        const CommandHeader command_header{
-            Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))};
+        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+        ASSERT_MSG(address, "Invalid GPU address");
+
+        const CommandHeader command_header{Memory::Read32(*address)};
 
         dma_get += sizeof(u32);
 
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 80f70e332..9f1533263 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -42,8 +42,10 @@ void Fermi2D::HandleSurfaceCopy() {
     // TODO(Subv): Only raw copies are implemented.
     ASSERT(regs.operation == Regs::Operation::SrcCopy);
 
-    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
-    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
+    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
+    ASSERT_MSG(source_cpu, "Invalid source GPU address");
+    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
 
     u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
     u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
@@ -52,22 +54,22 @@ void Fermi2D::HandleSurfaceCopy() {
         // All copies here update the main memory, so mark all rasterizer states as invalid.
         Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
-        rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
+        rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
         // We have to invalidate the destination region to evict any outdated surfaces from the
         // cache. We do this before actually writing the new data because the destination address
         // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu,
+        rasterizer.InvalidateRegion(*dest_cpu,
                                     dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
 
         if (regs.src.linear == regs.dst.linear) {
             // If the input layout and the output layout are the same, just perform a raw copy.
             ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
-            Memory::CopyBlock(dest_cpu, source_cpu,
+            Memory::CopyBlock(*dest_cpu, *source_cpu,
                               src_bytes_per_pixel * regs.dst.width * regs.dst.height);
             return;
         }
-        u8* src_buffer = Memory::GetPointer(source_cpu);
-        u8* dst_buffer = Memory::GetPointer(dest_cpu);
+        u8* src_buffer = Memory::GetPointer(*source_cpu);
+        u8* dst_buffer = Memory::GetPointer(*dest_cpu);
         if (!regs.src.linear && regs.dst.linear) {
             // If the input is tiled and the output is linear, deswizzle the input and copy it over.
             Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4880191fc..5c1029ddf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -39,16 +39,17 @@ void KeplerMemory::ProcessData(u32 data) {
     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
 
-    GPUVAddr address = regs.dest.Address();
-    VAddr dest_address =
-        *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+    const GPUVAddr address = regs.dest.Address();
+    const auto dest_address =
+        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+    ASSERT_MSG(dest_address, "Invalid GPU address");
 
     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
     // We do this before actually writing the new data because the destination address might contain
     // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(dest_address, sizeof(u32));
+    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
 
-    Memory::Write32(dest_address, data);
+    Memory::Write32(*dest_address, data);
     Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 
     state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a388b3944..10eae6a65 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -273,7 +273,8 @@ void Maxwell3D::ProcessQueryGet() {
     GPUVAddr sequence_address = regs.query.QueryAddress();
     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
     // VAddr before writing.
-    std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
+    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
+    ASSERT_MSG(address, "Invalid GPU address");
 
     // TODO(Subv): Support the other query units.
     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -386,14 +387,14 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
 
 void Maxwell3D::ProcessCBData(u32 value) {
     // Write the input value to the current const buffer at the current position.
-    GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
+    const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
     ASSERT(buffer_address != 0);
 
     // Don't allow writing past the end of the buffer.
     ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
 
-    std::optional<VAddr> address =
-        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
+    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
+    ASSERT_MSG(address, "Invalid GPU address");
 
     Memory::Write32(*address, value);
     dirty_flags.OnMemoryWrite();
@@ -403,10 +404,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }
 
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    GPUVAddr tic_base_address = regs.tic.TICAddress();
+    const GPUVAddr tic_base_address = regs.tic.TICAddress();
 
-    GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
+    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
 
     Texture::TICEntry tic_entry;
     Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -415,10 +417,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
                "TIC versions other than BlockLinear or Pitch are unimplemented");
 
-    auto r_type = tic_entry.r_type.Value();
-    auto g_type = tic_entry.g_type.Value();
-    auto b_type = tic_entry.b_type.Value();
-    auto a_type = tic_entry.a_type.Value();
+    const auto r_type = tic_entry.r_type.Value();
+    const auto g_type = tic_entry.g_type.Value();
+    const auto b_type = tic_entry.b_type.Value();
+    const auto a_type = tic_entry.a_type.Value();
 
     // TODO(Subv): Different data types for separate components are not supported
     ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -427,10 +429,11 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }
 
 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
+    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
 
-    GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
+    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
 
     Texture::TSCEntry tsc_entry;
     Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -452,8 +455,10 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
 
-        Texture::TextureHandle tex_handle{
-            Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
+        const auto address = memory_manager.GpuToCpuAddress(current_texture);
+        ASSERT_MSG(address, "Invalid GPU address");
+
+        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
 
         Texture::FullTextureInfo tex_info{};
         // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -462,23 +467,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
             sizeof(Texture::TextureHandle);
 
         // Load the TIC data.
-        if (tex_handle.tic_id != 0) {
-            tex_info.enabled = true;
-
-            auto tic_entry = GetTICEntry(tex_handle.tic_id);
-            // TODO(Subv): Workaround for BitField's move constructor being deleted.
-            std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-        }
+        auto tic_entry = GetTICEntry(tex_handle.tic_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
 
         // Load the TSC data
-        if (tex_handle.tsc_id != 0) {
-            auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-            // TODO(Subv): Workaround for BitField's move constructor being deleted.
-            std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-        }
+        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
 
-        if (tex_info.enabled)
-            textures.push_back(tex_info);
+        textures.push_back(tex_info);
     }
 
     return textures;
@@ -490,31 +488,28 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
     auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
 
-    GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+    const GPUVAddr tex_info_address =
+        tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
 
     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
 
-    std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
+
+    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
 
     Texture::FullTextureInfo tex_info{};
     tex_info.index = static_cast<u32>(offset);
 
     // Load the TIC data.
-    if (tex_handle.tic_id != 0) {
-        tex_info.enabled = true;
-
-        auto tic_entry = GetTICEntry(tex_handle.tic_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-    }
+    auto tic_entry = GetTICEntry(tex_handle.tic_id);
+    // TODO(Subv): Workaround for BitField's move constructor being deleted.
+    std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
 
     // Load the TSC data
-    if (tex_handle.tsc_id != 0) {
-        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-    }
+    auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+    // TODO(Subv): Workaround for BitField's move constructor being deleted.
+    std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
 
     return tex_info;
 }
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 06462f570..d6c41a5ae 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -39,8 +39,10 @@ void MaxwellDMA::HandleCopy() {
     const GPUVAddr source = regs.src_address.Address();
     const GPUVAddr dest = regs.dst_address.Address();
 
-    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
-    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
+    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
+    ASSERT_MSG(source_cpu, "Invalid source GPU address");
+    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
 
     // TODO(Subv): Perform more research and implement all features of this engine.
     ASSERT(regs.exec.enable_swizzle == 0);
@@ -64,7 +66,7 @@ void MaxwellDMA::HandleCopy() {
         // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
         // y_count).
         if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count);
+            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
             return;
         }
 
@@ -73,8 +75,8 @@ void MaxwellDMA::HandleCopy() {
         // rectangle. There is no need to manually flush/invalidate the regions because
         // CopyBlock does that for us.
         for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = dest_cpu + line * regs.dst_pitch;
+            const VAddr source_line = *source_cpu + line * regs.src_pitch;
+            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
             Memory::CopyBlock(dest_line, source_line, regs.x_count);
         }
         return;
@@ -87,12 +89,12 @@ void MaxwellDMA::HandleCopy() {
     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
         // copying.
-        rasterizer.FlushRegion(source_cpu, src_size);
+        rasterizer.FlushRegion(*source_cpu, src_size);
 
         // We have to invalidate the destination region to evict any outdated surfaces from the
         // cache. We do this before actually writing the new data because the destination address
         // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu, dst_size);
+        rasterizer.InvalidateRegion(*dest_cpu, dst_size);
     };
 
     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,8 +107,8 @@ void MaxwellDMA::HandleCopy() {
                            copy_size * src_bytes_per_pixel);
 
         Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
-                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
+                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                   regs.src_params.pos_y);
     } else {
         ASSERT(regs.dst_params.size_z == 1);
@@ -119,7 +121,7 @@ void MaxwellDMA::HandleCopy() {
 
         // If the input is linear and the output is tiled, swizzle the input and copy it over.
         Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
     }
 }
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 713b01c9f..269df9437 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -217,9 +217,9 @@ enum class StoreType : u64 {
     Signed8 = 1,
     Unsigned16 = 2,
     Signed16 = 3,
-    Bytes32 = 4,
-    Bytes64 = 5,
-    Bytes128 = 6,
+    Bits32 = 4,
+    Bits64 = 5,
+    Bits128 = 6,
 };
 
 enum class IMinMaxExchange : u64 {
@@ -981,6 +981,10 @@ union Instruction {
             }
             return false;
         }
+
+        bool IsComponentEnabled(std::size_t component) const {
+            return ((1ULL << component) & component_mask) != 0;
+        }
     } txq;
 
     union {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 47247f097..54abe5298 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -154,7 +154,8 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
     const VAddr base_addr{PageSlot(gpu_addr)};
 
     if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
-        base_addr == static_cast<u64>(PageStatus::Unmapped)) {
+        base_addr == static_cast<u64>(PageStatus::Unmapped) ||
+        base_addr == static_cast<u64>(PageStatus::Reserved)) {
         return {};
     }
 
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 4c08bb148..77da135a0 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <functional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
@@ -61,5 +62,9 @@ public:
 
     /// Increase/decrease the number of object in pages touching the specified region
     virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
+
+    /// Initialize disk cached resources for the game being emulated
+    virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
+                                   const DiskResourceLoadCallback& callback = {}) {}
 };
 } // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index bd2b30e77..b3062e5ba 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -19,7 +19,8 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                       std::size_t alignment, bool cache) {
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");
 
     // Cache management is a big overhead, so only cache entries with a given size.
     // TODO: Figure out which size is the best for given games.
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index d9ed08437..77d5cedd2 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,7 +46,9 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
 
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");
+
     const u8* source{Memory::GetPointer(*cpu_addr)};
 
     for (u32 primitive = 0; primitive < count / 4; ++primitive) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9f7c837d6..974ca6a20 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -22,6 +22,7 @@
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
@@ -99,8 +100,9 @@ struct FramebufferCacheKey {
     }
 };
 
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
+                                   ScreenInfo& info)
+    : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
       buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
     // Create sampler objects
     for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
@@ -447,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
     return boost::make_iterator_range(map.equal_range(interval));
 }
 
-void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
     const u64 page_start{addr >> Memory::PAGE_BITS};
     const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
 
@@ -477,6 +479,11 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
         cached_pages.add({pages_interval, delta});
 }
 
+void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+                                         const VideoCore::DiskResourceLoadCallback& callback) {
+    shader_cache.LoadDiskCache(stop_loading, callback);
+}
+
 std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
     OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
     std::optional<std::size_t> single_color_target) {
@@ -1004,29 +1011,20 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
 
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry = entries[bindpoint];
+        const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
         const u32 current_bindpoint = base_bindings.sampler + bindpoint;
-        auto& unit = state.texture_units[current_bindpoint];
-
-        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
-        if (!texture.enabled) {
-            unit.texture = 0;
-            continue;
-        }
 
         texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
 
         Surface surface = res_cache.GetTextureSurface(texture, entry);
         if (surface != nullptr) {
-            unit.texture =
+            state.texture_units[current_bindpoint].texture =
                 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
-            unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
-            unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
-            unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
-            unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
-            unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
+            surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
+                                   texture.tic.w_source);
         } else {
             // Can occur when texture addr is null or its memory is unmapped/invalid
-            unit.texture = 0;
+            state.texture_units[current_bindpoint].texture = 0;
         }
     }
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7f2bf0f8b..f3b607f4d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <atomic>
 #include <cstddef>
 #include <map>
 #include <memory>
@@ -33,6 +34,10 @@
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
 
+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -45,7 +50,8 @@ struct FramebufferCacheKey;
 
 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
-    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info);
+    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
+                              ScreenInfo& info);
     ~RasterizerOpenGL() override;
 
     void DrawArrays() override;
@@ -60,6 +66,8 @@ public:
                            u32 pixel_stride) override;
     bool AccelerateDrawBatch(bool is_indexed) override;
     void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+    void LoadDiskResources(const std::atomic_bool& stop_loading,
+                           const VideoCore::DiskResourceLoadCallback& callback) override;
 
     /// Maximum supported size that a constbuffer can have in bytes.
     static constexpr std::size_t MaxConstbufferSize = 0x10000;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 50286432d..a79eee03e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -18,7 +18,6 @@
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
-#include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/surface.h"
 #include "video_core/textures/astc.h"
@@ -44,14 +43,14 @@ struct FormatTuple {
     bool compressed;
 };
 
-static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) {
-    glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
+static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
+    glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
     if (max_mip_level == 1) {
-        glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0);
+        glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
     }
 }
 
@@ -529,55 +528,41 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
 CachedSurface::CachedSurface(const SurfaceParams& params)
     : params(params), gl_target(SurfaceTargetToGL(params.target)),
       cached_size_in_bytes(params.size_in_bytes) {
-    texture.Create();
-    const auto& rect{params.GetRect()};
-
-    // Keep track of previous texture bindings
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-
-    cur_state.texture_units[0].texture = texture.handle;
-    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
-    cur_state.Apply();
-    glActiveTexture(GL_TEXTURE0);
+    texture.Create(gl_target);
+
+    // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
+    // alternatives. This signals a bug on those functions.
+    const auto width = static_cast<GLsizei>(params.MipWidth(0));
+    const auto height = static_cast<GLsizei>(params.MipHeight(0));
 
     const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
     gl_internal_format = format_tuple.internal_format;
-    gl_is_compressed = format_tuple.compressed;
 
-    if (!format_tuple.compressed) {
-        // Only pre-create the texture for non-compressed textures.
-        switch (params.target) {
-        case SurfaceTarget::Texture1D:
-            glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth());
-            break;
-        case SurfaceTarget::Texture2D:
-        case SurfaceTarget::TextureCubemap:
-            glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
-            break;
-        case SurfaceTarget::Texture3D:
-        case SurfaceTarget::Texture2DArray:
-        case SurfaceTarget::TextureCubeArray:
-            glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
-                           params.depth);
-            break;
-        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
-                         static_cast<u32>(params.target));
-            UNREACHABLE();
-            glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight());
-        }
+    switch (params.target) {
+    case SurfaceTarget::Texture1D:
+        glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width);
+        break;
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::TextureCubemap:
+        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height);
+        break;
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+        glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height, params.depth);
+        break;
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                     static_cast<u32>(params.target));
+        UNREACHABLE();
+        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height);
     }
 
-    ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);
+    ApplyTextureDefaults(texture.handle, params.max_mip_level);
 
     OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
 
@@ -751,63 +736,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
     const auto& rect{params.GetRect(mip_map)};
 
     // Load data from memory to the surface
-    const GLint x0 = static_cast<GLint>(rect.left);
-    const GLint y0 = static_cast<GLint>(rect.bottom);
-    std::size_t buffer_offset =
+    const auto x0 = static_cast<GLint>(rect.left);
+    const auto y0 = static_cast<GLint>(rect.bottom);
+    auto buffer_offset =
         static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
                                  static_cast<std::size_t>(x0)) *
         GetBytesPerPixel(params.pixel_format);
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    const GLuint target_tex = texture.handle;
-    OpenGLState cur_state = OpenGLState::GetCurState();
-
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-    cur_state.texture_units[0].texture = target_tex;
-    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
-    cur_state.Apply();
 
     // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
     ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
     glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
 
-    GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
-    glActiveTexture(GL_TEXTURE0);
+    const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
     if (tuple.compressed) {
         switch (params.target) {
         case SurfaceTarget::Texture2D:
-            glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage2D(
+                texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
+                &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture3D:
-            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
-                                   static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage3D(
+                texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)),
+                static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
+                &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture2DArray:
         case SurfaceTarget::TextureCubeArray:
-            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
-                                   static_cast<GLsizei>(params.depth), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage3D(
+                texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
+                tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::TextureCubemap: {
-            GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
+            const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
             for (std::size_t face = 0; face < params.depth; ++face) {
-                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
-                                       mip_map, tuple.internal_format,
-                                       static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                       static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
-                                       layer_size, &gl_buffer[mip_map][buffer_offset]);
+                glCompressedTextureSubImage3D(
+                    texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
+                    static_cast<GLsizei>(params.MipWidth(mip_map)),
+                    static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
+                    layer_size, &gl_buffer[mip_map][buffer_offset]);
                 buffer_offset += layer_size;
             }
             break;
@@ -816,46 +788,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
-            glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
-                                   static_cast<GLsizei>(params.size_in_bytes_gl),
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage2D(
+                texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
+                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
         }
     } else {
-
         switch (params.target) {
         case SurfaceTarget::Texture1D:
-            glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
-                            static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
+                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture2D:
-            glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture3D:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
-                            tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
+                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::Texture2DArray:
         case SurfaceTarget::TextureCubeArray:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
-                            tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
+                                tuple.type, &gl_buffer[mip_map][buffer_offset]);
             break;
         case SurfaceTarget::TextureCubemap: {
             std::size_t start = buffer_offset;
             for (std::size_t face = 0; face < params.depth; ++face) {
-                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
-                                x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                                &gl_buffer[mip_map][buffer_offset]);
+                glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
+                                    static_cast<GLsizei>(rect.GetWidth()),
+                                    static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
+                                    tuple.type, &gl_buffer[mip_map][buffer_offset]);
                 buffer_offset += params.LayerSizeGL(mip_map);
             }
             break;
@@ -864,9 +833,10 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
-            glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[mip_map][buffer_offset]);
         }
     }
 
@@ -876,29 +846,18 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
 void CachedSurface::EnsureTextureView() {
     if (texture_view.handle != 0)
         return;
-    // Compressed texture are not being created with immutable storage
-    UNIMPLEMENTED_IF(gl_is_compressed);
 
     const GLenum target{TargetLayer()};
     const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
     constexpr GLuint min_layer = 0;
     constexpr GLuint min_level = 0;
 
-    texture_view.Create();
-    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level,
-                  params.max_mip_level, min_layer, num_layers);
-
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-    cur_state.texture_units[0].texture = texture_view.handle;
-    cur_state.texture_units[0].target = target;
-    cur_state.Apply();
-
-    ApplyTextureDefaults(target, params.max_mip_level);
+    glGenTextures(1, &texture_view.handle);
+    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
+                  params.max_mip_level, 0, 1);
+    ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
+    glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
+                         reinterpret_cast<const GLint*>(swizzle.data()));
 }
 
 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
@@ -909,6 +868,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
         UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
 }
 
+void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
+                                  Tegra::Texture::SwizzleSource swizzle_y,
+                                  Tegra::Texture::SwizzleSource swizzle_z,
+                                  Tegra::Texture::SwizzleSource swizzle_w) {
+    const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
+    const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
+    const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
+    const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
+    if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
+        return;
+    }
+    swizzle = {new_x, new_y, new_z, new_w};
+    const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
+    glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
+    if (texture_view.handle != 0) {
+        glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
+    }
+}
+
 RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
     : RasterizerCache{rasterizer} {
     read_framebuffer.Create();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8d7d6722c..490b8252e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -382,6 +382,11 @@ public:
     // Upload data in gl_buffer to this surface's texture
     void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
 
+    void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
+                       Tegra::Texture::SwizzleSource swizzle_y,
+                       Tegra::Texture::SwizzleSource swizzle_z,
+                       Tegra::Texture::SwizzleSource swizzle_w);
+
 private:
     void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
 
@@ -393,8 +398,8 @@ private:
     SurfaceParams params{};
     GLenum gl_target{};
     GLenum gl_internal_format{};
-    bool gl_is_compressed{};
     std::size_t cached_size_in_bytes{};
+    std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 1da744158..bfe666a73 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
 
 namespace OpenGL {
 
-void OGLTexture::Create() {
+void OGLTexture::Create(GLenum target) {
     if (handle != 0)
         return;
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    glGenTextures(1, &handle);
+    glCreateTextures(target, 1, &handle);
 }
 
 void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
 }
 
 void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
-                                  const char* frag_shader, bool separable_program) {
+                                  const char* frag_shader, bool separable_program,
+                                  bool hint_retrievable) {
     OGLShader vert, geo, frag;
     if (vert_shader)
         vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
         frag.Create(frag_shader, GL_FRAGMENT_SHADER);
 
     MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    Create(separable_program, vert.handle, geo.handle, frag.handle);
+    Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
 }
 
 void OGLProgram::Release() {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index e33f1e973..fbb93ee49 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -28,7 +28,7 @@ public:
     }
 
     /// Creates a new internal OpenGL resource and stores the handle
-    void Create();
+    void Create(GLenum target);
 
     /// Deletes the internal OpenGL resource
     void Release();
@@ -101,15 +101,15 @@ public:
     }
 
     template <typename... T>
-    void Create(bool separable_program, T... shaders) {
+    void Create(bool separable_program, bool hint_retrievable, T... shaders) {
         if (handle != 0)
             return;
-        handle = GLShader::LoadProgram(separable_program, shaders...);
+        handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
     }
 
     /// Creates a new internal OpenGL resource and stores the handle
     void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
-                          bool separable_program = false);
+                          bool separable_program = false, bool hint_retrievable = false);
 
     /// Deletes the internal OpenGL resource
     void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 90eda7814..4883e4f62 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -11,6 +11,7 @@
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/shader/shader_ir.h"
@@ -19,16 +20,29 @@ namespace OpenGL {
 
 using VideoCommon::Shader::ProgramCode;
 
+// One UBO is always reserved for emulation values
+constexpr u32 RESERVED_UBOS = 1;
+
+struct UnspecializedShader {
+    std::string code;
+    GLShader::ShaderEntries entries;
+    Maxwell::ShaderProgram program_type;
+};
+
+namespace {
+
 /// Gets the address for the specified shader stage program
-static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
     const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
-    return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
-                                               shader_config.offset);
+    const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
+                                                            shader_config.offset);
+    ASSERT_MSG(address, "Invalid GPU address");
+    return *address;
 }
 
 /// Gets the shader program code from memory for the specified address
-static ProgramCode GetShaderCode(VAddr addr) {
+ProgramCode GetShaderCode(VAddr addr) {
     ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
     Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
     return program_code;
@@ -49,38 +63,196 @@ constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
     }
 }
 
-CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
-    : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
 
-    GLShader::ProgramResult program_result;
+/// Describes primitive behavior on geometry shaders
+constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
+    switch (primitive_mode) {
+    case GL_POINTS:
+        return {"points", "Points", 1};
+    case GL_LINES:
+    case GL_LINE_STRIP:
+        return {"lines", "Lines", 2};
+    case GL_LINES_ADJACENCY:
+    case GL_LINE_STRIP_ADJACENCY:
+        return {"lines_adjacency", "LinesAdj", 4};
+    case GL_TRIANGLES:
+    case GL_TRIANGLE_STRIP:
+    case GL_TRIANGLE_FAN:
+        return {"triangles", "Triangles", 3};
+    case GL_TRIANGLES_ADJACENCY:
+    case GL_TRIANGLE_STRIP_ADJACENCY:
+        return {"triangles_adjacency", "TrianglesAdj", 6};
+    default:
+        return {"points", "Invalid", 1};
+    }
+}
 
-    switch (program_type) {
-    case Maxwell::ShaderProgram::VertexA:
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+    constexpr std::size_t start_offset = 10;
+    std::size_t offset = start_offset;
+    std::size_t size = start_offset * sizeof(u64);
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if (instruction == 0 || (instruction >> 52) == 0x50b) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+        }
+        size += sizeof(u64);
+        offset++;
+    }
+    // The last instruction is included in the program size
+    return std::min(size + sizeof(u64), program.size() * sizeof(u64));
+}
+
+/// Hashes one (or two) program streams
+u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
+                        const ProgramCode& code_b) {
+    u64 unique_identifier =
+        Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
+    if (program_type != Maxwell::ShaderProgram::VertexA) {
+        return unique_identifier;
+    }
+    // VertexA programs include two programs
+
+    std::size_t seed = 0;
+    boost::hash_combine(seed, unique_identifier);
+
+    const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
+                                                CalculateProgramSize(code_b));
+    boost::hash_combine(seed, identifier_b);
+    return static_cast<u64>(seed);
+}
+
+/// Creates an unspecialized program from code streams
+GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
+                                      ProgramCode program_code_b) {
+    GLShader::ShaderSetup setup(program_code);
+    if (program_type == Maxwell::ShaderProgram::VertexA) {
         // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
         // Conventional HW does not support this, so we combine VertexA and VertexB into one
         // stage here.
-        setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+        setup.SetProgramB(program_code_b);
+    }
+    setup.program.unique_identifier =
+        GetUniqueIdentifier(program_type, program_code, program_code_b);
+
+    switch (program_type) {
+    case Maxwell::ShaderProgram::VertexA:
     case Maxwell::ShaderProgram::VertexB:
-        CalculateProperties();
-        program_result = GLShader::GenerateVertexShader(setup);
-        break;
+        return GLShader::GenerateVertexShader(setup);
     case Maxwell::ShaderProgram::Geometry:
-        CalculateProperties();
-        program_result = GLShader::GenerateGeometryShader(setup);
-        break;
+        return GLShader::GenerateGeometryShader(setup);
     case Maxwell::ShaderProgram::Fragment:
-        CalculateProperties();
-        program_result = GLShader::GenerateFragmentShader(setup);
-        break;
+        return GLShader::GenerateFragmentShader(setup);
     default:
         LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
         UNREACHABLE();
+        return {};
+    }
+}
+
+CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
+                               Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
+                               GLenum primitive_mode, bool hint_retrievable = false) {
+    std::string source = "#version 430 core\n";
+    source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+
+    for (const auto& cbuf : entries.const_buffers) {
+        source +=
+            fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
+    }
+    for (const auto& gmem : entries.global_memory_entries) {
+        source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
+                              gmem.GetCbufOffset(), base_bindings.gmem++);
+    }
+    for (const auto& sampler : entries.samplers) {
+        source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
+                              base_bindings.sampler++);
+    }
+
+    if (program_type == Maxwell::ShaderProgram::Geometry) {
+        const auto [glsl_topology, debug_name, max_vertices] =
+            GetPrimitiveDescription(primitive_mode);
+
+        source += "layout (" + std::string(glsl_topology) + ") in;\n";
+        source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
+    }
+
+    source += code;
+
+    OGLShader shader;
+    shader.Create(source.c_str(), GetShaderType(program_type));
+
+    auto program = std::make_shared<OGLProgram>();
+    program->Create(true, hint_retrievable, shader.handle);
+    return program;
+}
+
+std::set<GLenum> GetSupportedFormats() {
+    std::set<GLenum> supported_formats;
+
+    GLint num_formats{};
+    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+    std::vector<GLint> formats(num_formats);
+    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+    for (const GLint format : formats)
+        supported_formats.insert(static_cast<GLenum>(format));
+    return supported_formats;
+}
+
+} // namespace
+
+CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                           ShaderDiskCacheOpenGL& disk_cache,
+                           const PrecompiledPrograms& precompiled_programs,
+                           ProgramCode&& program_code, ProgramCode&& program_code_b)
+    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
+      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+
+    const std::size_t code_size = CalculateProgramSize(program_code);
+    const std::size_t code_size_b =
+        program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
+
+    GLShader::ProgramResult program_result =
+        CreateProgram(program_type, program_code, program_code_b);
+    if (program_result.first.empty()) {
+        // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
         return;
     }
 
     code = program_result.first;
     entries = program_result.second;
     shader_length = entries.shader_length;
+
+    const ShaderDiskCacheRaw raw(unique_identifier, program_type,
+                                 static_cast<u32>(code_size / sizeof(u64)),
+                                 static_cast<u32>(code_size_b / sizeof(u64)),
+                                 std::move(program_code), std::move(program_code_b));
+    disk_cache.SaveRaw(raw);
+}
+
+CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                           ShaderDiskCacheOpenGL& disk_cache,
+                           const PrecompiledPrograms& precompiled_programs,
+                           GLShader::ProgramResult result)
+    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
+      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+
+    code = std::move(result.first);
+    entries = result.second;
+    shader_length = entries.shader_length;
 }
 
 std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
@@ -92,136 +264,222 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
         const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
         auto& program = entry->second;
         if (is_cache_miss) {
-            std::string source = AllocateBindings(base_bindings);
-            source += code;
+            program = TryLoadProgram(primitive_mode, base_bindings);
+            if (!program) {
+                program =
+                    SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
+                disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
+            }
 
-            OGLShader shader;
-            shader.Create(source.c_str(), GetShaderType(program_type));
-            program.Create(true, shader.handle);
-            LabelGLObject(GL_PROGRAM, program.handle, addr);
+            LabelGLObject(GL_PROGRAM, program->handle, addr);
         }
 
-        handle = program.handle;
+        handle = program->handle;
     }
 
-    // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
-    // emulation values
-    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
+    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
     base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
     base_bindings.sampler += static_cast<u32>(entries.samplers.size());
 
     return {handle, base_bindings};
 }
 
-std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
-    std::string code = "#version 430 core\n";
-    code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
-
-    for (const auto& cbuf : entries.const_buffers) {
-        code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
-    }
-
-    for (const auto& gmem : entries.global_memory_entries) {
-        code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
-                            gmem.GetCbufOffset(), base_bindings.gmem++);
-    }
-
-    for (const auto& sampler : entries.samplers) {
-        code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
-                            base_bindings.sampler++);
-    }
-
-    return code;
-}
-
 GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
     const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
     auto& programs = entry->second;
 
     switch (primitive_mode) {
     case GL_POINTS:
-        return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
     case GL_LINES:
     case GL_LINE_STRIP:
-        return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
+        return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
     case GL_LINES_ADJACENCY:
     case GL_LINE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
-                                   "ShaderLinesAdjacency");
+        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
     case GL_TRIANGLES:
     case GL_TRIANGLE_STRIP:
     case GL_TRIANGLE_FAN:
-        return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
-                                   "ShaderTriangles");
+        return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
     case GL_TRIANGLES_ADJACENCY:
     case GL_TRIANGLE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
-                                   "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
+        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
     default:
         UNREACHABLE_MSG("Unknown primitive mode.");
-        return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
     }
 }
 
-GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
-                                         const std::string& glsl_topology, u32 max_vertices,
-                                         const std::string& debug_name) {
-    if (target_program.handle != 0) {
-        return target_program.handle;
+GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
+                                         GLenum primitive_mode) {
+    if (target_program) {
+        return target_program->handle;
+    }
+    const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
+    target_program = TryLoadProgram(primitive_mode, base_bindings);
+    if (!target_program) {
+        target_program =
+            SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
+        disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
     }
-    std::string source = AllocateBindings(base_bindings);
-    source += "layout (" + glsl_topology + ") in;\n";
-    source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
-    source += code;
 
-    OGLShader shader;
-    shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
-    target_program.Create(true, shader.handle);
-    LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
-    return target_program.handle;
+    LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
+
+    return target_program->handle;
 };
 
-static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // sched instructions appear once every 4 instructions.
-    static constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
+CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
+                                           BaseBindings base_bindings) const {
+    const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
+    if (found == precompiled_programs.end()) {
+        return {};
+    }
+    return found->second;
 }
 
-static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
-    constexpr std::size_t start_offset = 10;
-    std::size_t offset = start_offset;
-    std::size_t size = start_offset * sizeof(u64);
-    while (offset < program.size()) {
-        const u64 inst = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if (inst == 0 || (inst >> 52) == 0x50b) {
-                break;
+ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
+                                            BaseBindings base_bindings) const {
+    return {unique_identifier, base_bindings, primitive_mode};
+}
+
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
+    : RasterizerCache{rasterizer}, disk_cache{system} {}
+
+void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+                                      const VideoCore::DiskResourceLoadCallback& callback) {
+    const auto transferable = disk_cache.LoadTransferable();
+    if (!transferable) {
+        return;
+    }
+    const auto [raws, usages] = *transferable;
+
+    auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
+
+    const auto supported_formats{GetSupportedFormats()};
+    const auto unspecialized{
+        GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
+    if (stop_loading)
+        return;
+
+    // Build shaders
+    if (callback)
+        callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
+    for (std::size_t i = 0; i < usages.size(); ++i) {
+        if (stop_loading)
+            return;
+
+        const auto& usage{usages[i]};
+        LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
+                 i + 1, usages.size());
+
+        const auto& unspec{unspecialized.at(usage.unique_identifier)};
+        const auto dump_it = dumps.find(usage);
+
+        CachedProgram shader;
+        if (dump_it != dumps.end()) {
+            // If the shader is dumped, attempt to load it with
+            shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
+            if (!shader) {
+                // Invalidate the precompiled cache if a shader dumped shader was rejected
+                disk_cache.InvalidatePrecompiled();
+                dumps.clear();
             }
         }
-        size += sizeof(inst);
-        offset++;
+        if (!shader) {
+            shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
+                                      usage.bindings, usage.primitive, true);
+        }
+        precompiled_programs.insert({usage, std::move(shader)});
+
+        if (callback)
+            callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
+    }
+
+    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
+    // precompiling them
+
+    for (std::size_t i = 0; i < usages.size(); ++i) {
+        const auto& usage{usages[i]};
+        if (dumps.find(usage) == dumps.end()) {
+            const auto& program = precompiled_programs.at(usage);
+            disk_cache.SaveDump(usage, program->handle);
+        }
     }
-    return size;
 }
 
-void CachedShader::CalculateProperties() {
-    setup.program.real_size = CalculateProgramSize(setup.program.code);
-    setup.program.real_size_b = 0;
-    setup.program.unique_identifier = Common::CityHash64(
-        reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size);
-    if (program_type == Maxwell::ShaderProgram::VertexA) {
-        std::size_t seed = 0;
-        boost::hash_combine(seed, setup.program.unique_identifier);
-        setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
-        const u64 identifier_b = Common::CityHash64(
-            reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
-        boost::hash_combine(seed, identifier_b);
-        setup.program.unique_identifier = static_cast<u64>(seed);
+CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
+    const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
+
+    if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
+        return {};
     }
+
+    CachedProgram shader = std::make_shared<OGLProgram>();
+    shader->handle = glCreateProgram();
+    glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+    glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
+                    static_cast<GLsizei>(dump.binary.size()));
+
+    GLint link_status{};
+    glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
+    if (link_status == GL_FALSE) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
+        return {};
+    }
+
+    return shader;
 }
 
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
+std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
+    const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
+    const std::vector<ShaderDiskCacheRaw>& raws,
+    const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
+    std::unordered_map<u64, UnspecializedShader> unspecialized;
+
+    if (callback)
+        callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
+
+    for (std::size_t i = 0; i < raws.size(); ++i) {
+        if (stop_loading)
+            return {};
+
+        const auto& raw{raws[i]};
+        const u64 unique_identifier = raw.GetUniqueIdentifier();
+        const u64 calculated_hash =
+            GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+        if (unique_identifier != calculated_hash) {
+            LOG_ERROR(
+                Render_OpenGL,
+                "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
+                raw.GetUniqueIdentifier(), calculated_hash);
+            disk_cache.InvalidateTransferable();
+            return {};
+        }
+
+        GLShader::ProgramResult result;
+        if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
+            // If it's stored in the precompiled file, avoid decompiling it here
+            const auto& stored_decompiled{it->second};
+            result = {stored_decompiled.code, stored_decompiled.entries};
+        } else {
+            // Otherwise decompile the shader at boot and save the result to the decompiled file
+            result =
+                CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+            disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
+        }
+
+        precompiled_shaders.insert({unique_identifier, result});
+
+        unspecialized.insert(
+            {raw.GetUniqueIdentifier(),
+             {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
+
+        if (callback)
+            callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
+    }
+    return unspecialized;
+}
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
@@ -235,7 +493,23 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
 
     if (!shader) {
         // No shader found - create a new one
-        shader = std::make_shared<CachedShader>(program_addr, program);
+        ProgramCode program_code = GetShaderCode(program_addr);
+        ProgramCode program_code_b;
+        if (program == Maxwell::ShaderProgram::VertexA) {
+            program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
+        }
+        const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+
+        const auto found = precompiled_shaders.find(unique_identifier);
+        if (found != precompiled_shaders.end()) {
+            shader =
+                std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
+                                               precompiled_programs, found->second);
+        } else {
+            shader = std::make_shared<CachedShader>(
+                program_addr, unique_identifier, program, disk_cache, precompiled_programs,
+                std::move(program_code), std::move(program_code_b));
+        }
         Register(shader);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 904d15dd0..97eed192f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,40 +5,49 @@
 #pragma once
 
 #include <array>
-#include <map>
 #include <memory>
+#include <set>
 #include <tuple>
+#include <unordered_map>
 
 #include <glad/glad.h>
 
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
 
+namespace Core {
+class System;
+} // namespace Core
+
 namespace OpenGL {
 
 class CachedShader;
 class RasterizerOpenGL;
+struct UnspecializedShader;
 
 using Shader = std::shared_ptr<CachedShader>;
+using CachedProgram = std::shared_ptr<OGLProgram>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct BaseBindings {
-    u32 cbuf{};
-    u32 gmem{};
-    u32 sampler{};
-
-    bool operator<(const BaseBindings& rhs) const {
-        return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
-    }
-};
+using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
+using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
 
 class CachedShader final : public RasterizerCacheObject {
 public:
-    CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
+    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                          ShaderDiskCacheOpenGL& disk_cache,
+                          const PrecompiledPrograms& precompiled_programs,
+                          ProgramCode&& program_code, ProgramCode&& program_code_b);
+
+    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                          ShaderDiskCacheOpenGL& disk_cache,
+                          const PrecompiledPrograms& precompiled_programs,
+                          GLShader::ProgramResult result);
 
     VAddr GetAddr() const override {
         return addr;
@@ -65,49 +74,67 @@ private:
     // declared by the hardware. Workaround this issue by generating a different shader per input
     // topology class.
     struct GeometryPrograms {
-        OGLProgram points;
-        OGLProgram lines;
-        OGLProgram lines_adjacency;
-        OGLProgram triangles;
-        OGLProgram triangles_adjacency;
+        CachedProgram points;
+        CachedProgram lines;
+        CachedProgram lines_adjacency;
+        CachedProgram triangles;
+        CachedProgram triangles_adjacency;
     };
 
-    std::string AllocateBindings(BaseBindings base_bindings);
-
     GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
 
     /// Generates a geometry shader or returns one that already exists.
-    GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
-                               const std::string& glsl_topology, u32 max_vertices,
-                               const std::string& debug_name);
+    GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
+                               GLenum primitive_mode);
+
+    CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
 
-    void CalculateProperties();
+    ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
 
     VAddr addr{};
-    std::size_t shader_length{};
+    u64 unique_identifier{};
     Maxwell::ShaderProgram program_type{};
-    GLShader::ShaderSetup setup;
+    ShaderDiskCacheOpenGL& disk_cache;
+    const PrecompiledPrograms& precompiled_programs;
+
+    std::size_t shader_length{};
     GLShader::ShaderEntries entries;
 
     std::string code;
 
-    std::map<BaseBindings, OGLProgram> programs;
-    std::map<BaseBindings, GeometryPrograms> geometry_programs;
+    std::unordered_map<BaseBindings, CachedProgram> programs;
+    std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
 
-    std::map<u32, GLuint> cbuf_resource_cache;
-    std::map<u32, GLuint> gmem_resource_cache;
-    std::map<u32, GLint> uniform_cache;
+    std::unordered_map<u32, GLuint> cbuf_resource_cache;
+    std::unordered_map<u32, GLuint> gmem_resource_cache;
+    std::unordered_map<u32, GLint> uniform_cache;
 };
 
 class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
 public:
-    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
+    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
+
+    /// Loads disk cache for the current game
+    void LoadDiskCache(const std::atomic_bool& stop_loading,
+                       const VideoCore::DiskResourceLoadCallback& callback);
 
     /// Gets the current specified shader stage program
     Shader GetStageProgram(Maxwell::ShaderProgram program);
 
 private:
+    std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
+        const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
+        const std::vector<ShaderDiskCacheRaw>& raws,
+        const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
+
+    CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
+                                             const std::set<GLenum>& supported_formats);
+
     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    ShaderDiskCacheOpenGL disk_cache;
+    PrecompiledShaders precompiled_shaders;
+    PrecompiledPrograms precompiled_programs;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8e3c20090..70e124dc4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -193,15 +193,14 @@ public:
     ShaderEntries GetShaderEntries() const {
         ShaderEntries entries;
         for (const auto& cbuf : ir.GetConstantBuffers()) {
-            entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
+            entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
                                                cbuf.first);
         }
         for (const auto& sampler : ir.GetSamplers()) {
-            entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
+            entries.samplers.emplace_back(sampler);
         }
         for (const auto& gmem : ir.GetGlobalMemoryBases()) {
-            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
-                                                       GetGlobalMemoryBlock(gmem));
+            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
         }
         entries.clip_distances = ir.GetClipDistances();
         entries.shader_length = ir.GetLength();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 0856a1361..72aca4938 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
@@ -18,56 +19,29 @@ class ShaderIR;
 
 namespace OpenGL::GLShader {
 
+struct ShaderEntries;
+
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ProgramResult = std::pair<std::string, ShaderEntries>;
+using SamplerEntry = VideoCommon::Shader::Sampler;
 
 class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
 public:
-    explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry,
-                              Maxwell::ShaderStage stage, const std::string& name, u32 index)
-        : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
+    explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
+        : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
 
     u32 GetIndex() const {
         return index;
     }
 
 private:
-    std::string name;
-    Maxwell::ShaderStage stage{};
     u32 index{};
 };
 
-class SamplerEntry : public VideoCommon::Shader::Sampler {
-public:
-    explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
-                          const std::string& name)
-        : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
-private:
-    std::string name;
-    Maxwell::ShaderStage stage{};
-};
-
 class GlobalMemoryEntry {
 public:
-    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
-                               std::string name)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
+    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
 
     u32 GetCbufIndex() const {
         return cbuf_index;
@@ -77,19 +51,9 @@ public:
         return cbuf_offset;
     }
 
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
 private:
     u32 cbuf_index{};
     u32 cbuf_offset{};
-    Maxwell::ShaderStage stage{};
-    std::string name;
 };
 
 struct ShaderEntries {
@@ -100,8 +64,6 @@ struct ShaderEntries {
     std::size_t shader_length{};
 };
 
-using ProgramResult = std::pair<std::string, ShaderEntries>;
-
 std::string GetCommonDeclarations();
 
 ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
new file mode 100644
index 000000000..81882822b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -0,0 +1,656 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <fmt/format.h>
+#include <lz4.h>
+
+#include "common/assert.h"
+#include "common/common_paths.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/scm_rev.h"
+
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
+#include "core/settings.h"
+
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+
+namespace OpenGL {
+
+using ShaderCacheVersionHash = std::array<u8, 64>;
+
+enum class TransferableEntryKind : u32 {
+    Raw,
+    Usage,
+};
+
+enum class PrecompiledEntryKind : u32 {
+    Decompiled,
+    Dump,
+};
+
+constexpr u32 NativeVersion = 1;
+
+// Making sure sizes doesn't change by accident
+static_assert(sizeof(BaseBindings) == 12);
+static_assert(sizeof(ShaderDiskCacheUsage) == 24);
+
+namespace {
+
+ShaderCacheVersionHash GetShaderCacheVersionHash() {
+    ShaderCacheVersionHash hash{};
+    const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
+    std::memcpy(hash.data(), Common::g_shader_cache_version, length);
+    return hash;
+}
+
+template <typename T>
+std::vector<u8> CompressData(const T* source, std::size_t source_size) {
+    if (source_size > LZ4_MAX_INPUT_SIZE) {
+        // Source size exceeds LZ4 maximum input size
+        return {};
+    }
+    const auto source_size_int = static_cast<int>(source_size);
+    const int max_compressed_size = LZ4_compressBound(source_size_int);
+    std::vector<u8> compressed(max_compressed_size);
+    const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
+                                                     reinterpret_cast<char*>(compressed.data()),
+                                                     source_size_int, max_compressed_size);
+    if (compressed_size <= 0) {
+        // Compression failed
+        return {};
+    }
+    compressed.resize(compressed_size);
+    return compressed;
+}
+
+std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
+    std::vector<u8> uncompressed(uncompressed_size);
+    const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
+                                               reinterpret_cast<char*>(uncompressed.data()),
+                                               static_cast<int>(compressed.size()),
+                                               static_cast<int>(uncompressed.size()));
+    if (static_cast<int>(uncompressed_size) != size_check) {
+        // Decompression failed
+        return {};
+    }
+    return uncompressed;
+}
+
+} // namespace
+
+ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                                       u32 program_code_size, u32 program_code_size_b,
+                                       ProgramCode program_code, ProgramCode program_code_b)
+    : unique_identifier{unique_identifier}, program_type{program_type},
+      program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
+      program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
+
+ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
+
+ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
+
+bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
+    if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
+        file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
+        return false;
+    }
+    u32 program_code_size{};
+    u32 program_code_size_b{};
+    if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
+        file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
+        return false;
+    }
+
+    program_code.resize(program_code_size);
+    program_code_b.resize(program_code_size_b);
+
+    if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
+        return false;
+
+    if (HasProgramA() &&
+        file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        return false;
+    }
+    return true;
+}
+
+bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
+    if (file.WriteObject(unique_identifier) != 1 ||
+        file.WriteObject(static_cast<u32>(program_type)) != 1 ||
+        file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
+        return false;
+    }
+
+    if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
+        return false;
+
+    if (HasProgramA() &&
+        file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        return false;
+    }
+    return true;
+}
+
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
+ShaderDiskCacheOpenGL::LoadTransferable() {
+    // Skip games without title id
+    const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+    if (!Settings::values.use_disk_shader_cache || !has_title_id)
+        return {};
+    tried_to_load = true;
+
+    FileUtil::IOFile file(GetTransferablePath(), "rb");
+    if (!file.IsOpen()) {
+        LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
+                 GetTitleID());
+        return {};
+    }
+
+    u32 version{};
+    if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
+        LOG_ERROR(Render_OpenGL,
+                  "Failed to get transferable cache version for title id={} - skipping",
+                  GetTitleID());
+        return {};
+    }
+
+    if (version < NativeVersion) {
+        LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
+        file.Close();
+        InvalidateTransferable();
+        return {};
+    }
+    if (version > NativeVersion) {
+        LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
+                                   "of the emulator - skipping");
+        return {};
+    }
+
+    // Version is valid, load the shaders
+    std::vector<ShaderDiskCacheRaw> raws;
+    std::vector<ShaderDiskCacheUsage> usages;
+    while (file.Tell() < file.GetSize()) {
+        TransferableEntryKind kind{};
+        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+            LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
+            return {};
+        }
+
+        switch (kind) {
+        case TransferableEntryKind::Raw: {
+            ShaderDiskCacheRaw entry;
+            if (!entry.Load(file)) {
+                LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
+                return {};
+            }
+            transferable.insert({entry.GetUniqueIdentifier(), {}});
+            raws.push_back(std::move(entry));
+            break;
+        }
+        case TransferableEntryKind::Usage: {
+            ShaderDiskCacheUsage usage{};
+            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
+                LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
+                return {};
+            }
+            usages.push_back(std::move(usage));
+            break;
+        }
+        default:
+            LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
+                      static_cast<u32>(kind));
+            return {};
+        }
+    }
+    return {{raws, usages}};
+}
+
+std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+          std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+ShaderDiskCacheOpenGL::LoadPrecompiled() {
+    if (!IsUsable())
+        return {};
+
+    FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+    if (!file.IsOpen()) {
+        LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
+                 GetTitleID());
+        return {};
+    }
+
+    const auto result = LoadPrecompiledFile(file);
+    if (!result) {
+        LOG_INFO(Render_OpenGL,
+                 "Failed to load precompiled cache for game with title id={} - removing",
+                 GetTitleID());
+        file.Close();
+        InvalidatePrecompiled();
+        return {};
+    }
+    return *result;
+}
+
+std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+                        std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+    ShaderCacheVersionHash file_hash{};
+    if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
+        return {};
+    }
+    if (GetShaderCacheVersionHash() != file_hash) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
+        return {};
+    }
+
+    std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
+    std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
+    while (file.Tell() < file.GetSize()) {
+        PrecompiledEntryKind kind{};
+        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+            return {};
+        }
+
+        switch (kind) {
+        case PrecompiledEntryKind::Decompiled: {
+            u64 unique_identifier{};
+            if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
+                return {};
+
+            const auto entry = LoadDecompiledEntry(file);
+            if (!entry)
+                return {};
+            decompiled.insert({unique_identifier, std::move(*entry)});
+            break;
+        }
+        case PrecompiledEntryKind::Dump: {
+            ShaderDiskCacheUsage usage;
+            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
+                return {};
+
+            ShaderDiskCacheDump dump;
+            if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
+                return {};
+
+            u32 binary_length{};
+            u32 compressed_size{};
+            if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
+                file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
+                return {};
+            }
+
+            std::vector<u8> compressed_binary(compressed_size);
+            if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
+                compressed_binary.size()) {
+                return {};
+            }
+
+            dump.binary = DecompressData(compressed_binary, binary_length);
+            if (dump.binary.empty()) {
+                return {};
+            }
+
+            dumps.insert({usage, dump});
+            break;
+        }
+        default:
+            return {};
+        }
+    }
+    return {{decompiled, dumps}};
+}
+
+std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
+    FileUtil::IOFile& file) {
+    u32 code_size{};
+    u32 compressed_code_size{};
+    if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
+        file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
+        return {};
+    }
+
+    std::vector<u8> compressed_code(compressed_code_size);
+    if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+        return {};
+    }
+
+    const std::vector<u8> code = DecompressData(compressed_code, code_size);
+    if (code.empty()) {
+        return {};
+    }
+    ShaderDiskCacheDecompiled entry;
+    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+
+    u32 const_buffers_count{};
+    if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < const_buffers_count; ++i) {
+        u32 max_offset{};
+        u32 index{};
+        u8 is_indirect{};
+        if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
+            return {};
+        }
+        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+    }
+
+    u32 samplers_count{};
+    if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < samplers_count; ++i) {
+        u64 offset{};
+        u64 index{};
+        u32 type{};
+        u8 is_array{};
+        u8 is_shadow{};
+        if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
+            file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
+            file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
+            file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
+            return {};
+        }
+        entry.entries.samplers.emplace_back(
+            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+            static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+    }
+
+    u32 global_memory_count{};
+    if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < global_memory_count; ++i) {
+        u32 cbuf_index{};
+        u32 cbuf_offset{};
+        if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+            return {};
+        }
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+    }
+
+    for (auto& clip_distance : entry.entries.clip_distances) {
+        u8 clip_distance_raw{};
+        if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
+            return {};
+        clip_distance = clip_distance_raw != 0;
+    }
+
+    u64 shader_length{};
+    if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
+        return {};
+    entry.entries.shader_length = static_cast<std::size_t>(shader_length);
+
+    return entry;
+}
+
+bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
+                                               const std::string& code,
+                                               const std::vector<u8>& compressed_code,
+                                               const GLShader::ShaderEntries& entries) {
+    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
+        file.WriteObject(unique_identifier) != 1 ||
+        file.WriteObject(static_cast<u32>(code.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
+        file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+        return false;
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
+        return false;
+    for (const auto& cbuf : entries.const_buffers) {
+        if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
+            file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
+            file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
+            return false;
+        }
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
+        return false;
+    for (const auto& sampler : entries.samplers) {
+        if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
+            file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
+            file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
+            return false;
+        }
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
+        return false;
+    for (const auto& gmem : entries.global_memory_entries) {
+        if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
+            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+            return false;
+        }
+    }
+
+    for (const bool clip_distance : entries.clip_distances) {
+        if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
+            return false;
+    }
+
+    return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
+}
+
+void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
+    if (!FileUtil::Delete(GetTransferablePath())) {
+        LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
+                  GetTransferablePath());
+    }
+    InvalidatePrecompiled();
+}
+
+void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
+    if (!FileUtil::Delete(GetPrecompiledPath())) {
+        LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
+    if (!IsUsable())
+        return;
+
+    const u64 id = entry.GetUniqueIdentifier();
+    if (transferable.find(id) != transferable.end()) {
+        // The shader already exists
+        return;
+    }
+
+    FileUtil::IOFile file = AppendTransferableFile();
+    if (!file.IsOpen())
+        return;
+    if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
+        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
+        file.Close();
+        InvalidateTransferable();
+        return;
+    }
+    transferable.insert({id, {}});
+}
+
+void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
+    if (!IsUsable())
+        return;
+
+    const auto it = transferable.find(usage.unique_identifier);
+    ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
+
+    auto& usages{it->second};
+    ASSERT(usages.find(usage) == usages.end());
+    usages.insert(usage);
+
+    FileUtil::IOFile file = AppendTransferableFile();
+    if (!file.IsOpen())
+        return;
+
+    if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
+        LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
+        file.Close();
+        InvalidateTransferable();
+        return;
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
+                                           const GLShader::ShaderEntries& entries) {
+    if (!IsUsable())
+        return;
+
+    const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
+    if (compressed_code.empty()) {
+        LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
+                  unique_identifier);
+        return;
+    }
+
+    FileUtil::IOFile file = AppendPrecompiledFile();
+    if (!file.IsOpen())
+        return;
+
+    if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
+        LOG_ERROR(Render_OpenGL,
+                  "Failed to save decompiled entry to the precompiled file - removing");
+        file.Close();
+        InvalidatePrecompiled();
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+    if (!IsUsable())
+        return;
+
+    GLint binary_length{};
+    glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
+
+    GLenum binary_format{};
+    std::vector<u8> binary(binary_length);
+    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
+
+    const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
+    if (compressed_binary.empty()) {
+        LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
+                  usage.unique_identifier);
+        return;
+    }
+
+    FileUtil::IOFile file = AppendPrecompiledFile();
+    if (!file.IsOpen())
+        return;
+
+    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
+        file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
+        file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
+        file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
+        file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
+            compressed_binary.size()) {
+        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
+                  usage.unique_identifier);
+        file.Close();
+        InvalidatePrecompiled();
+        return;
+    }
+}
+
+bool ShaderDiskCacheOpenGL::IsUsable() const {
+    return tried_to_load && Settings::values.use_disk_shader_cache;
+}
+
+FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+    if (!EnsureDirectories())
+        return {};
+
+    const auto transferable_path{GetTransferablePath()};
+    const bool existed = FileUtil::Exists(transferable_path);
+
+    FileUtil::IOFile file(transferable_path, "ab");
+    if (!file.IsOpen()) {
+        LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
+        return {};
+    }
+    if (!existed || file.GetSize() == 0) {
+        // If the file didn't exist, write its version
+        if (file.WriteObject(NativeVersion) != 1) {
+            LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
+                      transferable_path);
+            return {};
+        }
+    }
+    return file;
+}
+
+FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
+    if (!EnsureDirectories())
+        return {};
+
+    const auto precompiled_path{GetPrecompiledPath()};
+    const bool existed = FileUtil::Exists(precompiled_path);
+
+    FileUtil::IOFile file(precompiled_path, "ab");
+    if (!file.IsOpen()) {
+        LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
+        return {};
+    }
+
+    if (!existed || file.GetSize() == 0) {
+        const auto hash{GetShaderCacheVersionHash()};
+        if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
+            LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
+                      precompiled_path);
+            return {};
+        }
+    }
+    return file;
+}
+
+bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
+    const auto CreateDir = [](const std::string& dir) {
+        if (!FileUtil::CreateDir(dir)) {
+            LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
+            return false;
+        }
+        return true;
+    };
+
+    return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+           CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
+           CreateDir(GetPrecompiledDir());
+}
+
+std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
+    return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+}
+
+std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
+    return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+}
+
+std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
+    return GetBaseDir() + DIR_SEP "transferable";
+}
+
+std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
+    return GetBaseDir() + DIR_SEP "precompiled";
+}
+
+std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
+    return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+}
+
+std::string ShaderDiskCacheOpenGL::GetTitleID() const {
+    return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
new file mode 100644
index 000000000..6be0c0547
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -0,0 +1,245 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+
+namespace Core {
+class System;
+}
+
+namespace FileUtil {
+class IOFile;
+}
+
+namespace OpenGL {
+
+using ProgramCode = std::vector<u64>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+/// Allocated bindings used by an OpenGL shader program
+struct BaseBindings {
+    u32 cbuf{};
+    u32 gmem{};
+    u32 sampler{};
+
+    bool operator==(const BaseBindings& rhs) const {
+        return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
+    }
+
+    bool operator!=(const BaseBindings& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+/// Describes how a shader is used
+struct ShaderDiskCacheUsage {
+    u64 unique_identifier{};
+    BaseBindings bindings;
+    GLenum primitive{};
+
+    bool operator==(const ShaderDiskCacheUsage& rhs) const {
+        return std::tie(unique_identifier, bindings, primitive) ==
+               std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
+    }
+
+    bool operator!=(const ShaderDiskCacheUsage& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+} // namespace OpenGL
+
+namespace std {
+
+template <>
+struct hash<OpenGL::BaseBindings> {
+    std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+        return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
+    }
+};
+
+template <>
+struct hash<OpenGL::ShaderDiskCacheUsage> {
+    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+        return static_cast<std::size_t>(usage.unique_identifier) ^
+               std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
+    }
+};
+
+} // namespace std
+
+namespace OpenGL {
+
+/// Describes a shader how it's used by the guest GPU
+class ShaderDiskCacheRaw {
+public:
+    explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                                u32 program_code_size, u32 program_code_size_b,
+                                ProgramCode program_code, ProgramCode program_code_b);
+    ShaderDiskCacheRaw();
+    ~ShaderDiskCacheRaw();
+
+    bool Load(FileUtil::IOFile& file);
+
+    bool Save(FileUtil::IOFile& file) const;
+
+    u64 GetUniqueIdentifier() const {
+        return unique_identifier;
+    }
+
+    bool HasProgramA() const {
+        return program_type == Maxwell::ShaderProgram::VertexA;
+    }
+
+    Maxwell::ShaderProgram GetProgramType() const {
+        return program_type;
+    }
+
+    Maxwell::ShaderStage GetProgramStage() const {
+        switch (program_type) {
+        case Maxwell::ShaderProgram::VertexA:
+        case Maxwell::ShaderProgram::VertexB:
+            return Maxwell::ShaderStage::Vertex;
+        case Maxwell::ShaderProgram::TesselationControl:
+            return Maxwell::ShaderStage::TesselationControl;
+        case Maxwell::ShaderProgram::TesselationEval:
+            return Maxwell::ShaderStage::TesselationEval;
+        case Maxwell::ShaderProgram::Geometry:
+            return Maxwell::ShaderStage::Geometry;
+        case Maxwell::ShaderProgram::Fragment:
+            return Maxwell::ShaderStage::Fragment;
+        }
+        UNREACHABLE();
+    }
+
+    const ProgramCode& GetProgramCode() const {
+        return program_code;
+    }
+
+    const ProgramCode& GetProgramCodeB() const {
+        return program_code_b;
+    }
+
+private:
+    u64 unique_identifier{};
+    Maxwell::ShaderProgram program_type{};
+    u32 program_code_size{};
+    u32 program_code_size_b{};
+
+    ProgramCode program_code;
+    ProgramCode program_code_b;
+};
+
+/// Contains decompiled data from a shader
+struct ShaderDiskCacheDecompiled {
+    std::string code;
+    GLShader::ShaderEntries entries;
+};
+
+/// Contains an OpenGL dumped binary program
+struct ShaderDiskCacheDump {
+    GLenum binary_format;
+    std::vector<u8> binary;
+};
+
+class ShaderDiskCacheOpenGL {
+public:
+    explicit ShaderDiskCacheOpenGL(Core::System& system);
+
+    /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
+    std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
+    LoadTransferable();
+
+    /// Loads current game's precompiled cache. Invalidates on failure.
+    std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+              std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+    LoadPrecompiled();
+
+    /// Removes the transferable (and precompiled) cache file.
+    void InvalidateTransferable() const;
+
+    /// Removes the precompiled cache file.
+    void InvalidatePrecompiled() const;
+
+    /// Saves a raw dump to the transferable file. Checks for collisions.
+    void SaveRaw(const ShaderDiskCacheRaw& entry);
+
+    /// Saves shader usage to the transferable file. Does not check for collisions.
+    void SaveUsage(const ShaderDiskCacheUsage& usage);
+
+    /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
+    void SaveDecompiled(u64 unique_identifier, const std::string& code,
+                        const GLShader::ShaderEntries& entries);
+
+    /// Saves a dump entry to the precompiled file. Does not check for collisions.
+    void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
+
+private:
+    /// Loads the transferable cache. Returns empty on failure.
+    std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+                            std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+    LoadPrecompiledFile(FileUtil::IOFile& file);
+
+    /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
+    std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
+
+    /// Saves a decompiled entry to the passed file. Returns true on success.
+    bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
+                            const std::vector<u8>& compressed_code,
+                            const GLShader::ShaderEntries& entries);
+
+    /// Returns if the cache can be used
+    bool IsUsable() const;
+
+    /// Opens current game's transferable file and write it's header if it doesn't exist
+    FileUtil::IOFile AppendTransferableFile() const;
+
+    /// Opens current game's precompiled file and write it's header if it doesn't exist
+    FileUtil::IOFile AppendPrecompiledFile() const;
+
+    /// Create shader disk cache directories. Returns true on success.
+    bool EnsureDirectories() const;
+
+    /// Gets current game's transferable file path
+    std::string GetTransferablePath() const;
+
+    /// Gets current game's precompiled file path
+    std::string GetPrecompiledPath() const;
+
+    /// Get user's transferable directory path
+    std::string GetTransferableDir() const;
+
+    /// Get user's precompiled directory path
+    std::string GetPrecompiledDir() const;
+
+    /// Get user's shader directory path
+    std::string GetBaseDir() const;
+
+    /// Get current game's title id
+    std::string GetTitleID() const;
+
+    // Copre system
+    Core::System& system;
+    // Stored transferable shaders
+    std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+    // The cache has been loaded at boot
+    bool tried_to_load{};
+};
+
+} // namespace OpenGL
+\ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ac5e6917b..fba8e681b 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -26,12 +26,10 @@ struct ShaderSetup {
         ProgramCode code;
         ProgramCode code_b; // Used for dual vertex shaders
         u64 unique_identifier;
-        std::size_t real_size;
-        std::size_t real_size_b;
     } program;
 
     /// Used in scenarios where we have a dual vertex shaders
-    void SetProgramB(ProgramCode&& program_b) {
+    void SetProgramB(ProgramCode program_b) {
         program.code_b = std::move(program_b);
         has_program_b = true;
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 285594f50..03b7548c2 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
  * @returns Handle of the newly created OpenGL program object
  */
 template <typename... T>
-GLuint LoadProgram(bool separable_program, T... shaders) {
+GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
     // Link the program
     LOG_DEBUG(Render_OpenGL, "Linking program...");
 
@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
     if (separable_program) {
         glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
     }
+    if (hint_retrievable) {
+        glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
+    }
 
     glLinkProgram(program_id);
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index b7ba59350..81af803bc 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -462,29 +462,35 @@ void OpenGLState::ApplyPolygonOffset() const {
 }
 
 void OpenGLState::ApplyTextures() const {
+    bool has_delta{};
+    std::size_t first{};
+    std::size_t last{};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
+
     for (std::size_t i = 0; i < std::size(texture_units); ++i) {
         const auto& texture_unit = texture_units[i];
         const auto& cur_state_texture_unit = cur_state.texture_units[i];
+        textures[i] = texture_unit.texture;
 
-        if (texture_unit.texture != cur_state_texture_unit.texture) {
-            glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
-            glBindTexture(texture_unit.target, texture_unit.texture);
-        }
-        // Update the texture swizzle
-        if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r ||
-            texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
-            texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
-            texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
-            std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
-                                         texture_unit.swizzle.b, texture_unit.swizzle.a};
-            glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+        if (textures[i] != cur_state_texture_unit.texture) {
+            if (!has_delta) {
+                first = i;
+                has_delta = true;
+            }
+            last = i;
         }
     }
+
+    if (has_delta) {
+        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                       textures.data());
+    }
 }
 
 void OpenGLState::ApplySamplers() const {
     bool has_delta{};
-    std::size_t first{}, last{};
+    std::size_t first{};
+    std::size_t last{};
     std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
     for (std::size_t i = 0; i < std::size(samplers); ++i) {
         samplers[i] = texture_units[i].sampler;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index a5a7c0920..9e1eda5b1 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -126,26 +126,14 @@ public:
     struct TextureUnit {
         GLuint texture; // GL_TEXTURE_BINDING_2D
         GLuint sampler; // GL_SAMPLER_BINDING
-        GLenum target;
-        struct {
-            GLint r; // GL_TEXTURE_SWIZZLE_R
-            GLint g; // GL_TEXTURE_SWIZZLE_G
-            GLint b; // GL_TEXTURE_SWIZZLE_B
-            GLint a; // GL_TEXTURE_SWIZZLE_A
-        } swizzle;
 
         void Unbind() {
             texture = 0;
-            swizzle.r = GL_RED;
-            swizzle.g = GL_GREEN;
-            swizzle.b = GL_BLUE;
-            swizzle.a = GL_ALPHA;
         }
 
         void Reset() {
             Unbind();
             sampler = 0;
-            target = GL_TEXTURE_2D;
         }
     };
     std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e37b65b38..6476a9e1a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -98,8 +98,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
     return matrix;
 }
 
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
-    : VideoCore::RendererBase{window} {}
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
+    : VideoCore::RendererBase{window}, system{system} {}
 
 RendererOpenGL::~RendererOpenGL() = default;
 
@@ -171,10 +171,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
                                        Memory::GetPointer(framebuffer_addr),
                                        gl_framebuffer_data.data(), true);
 
-        state.texture_units[0].texture = screen_info.texture.resource.handle;
-        state.Apply();
-
-        glActiveTexture(GL_TEXTURE0);
         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
 
         // Update existing texture
@@ -182,14 +178,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
         //       they differ from the LCD resolution.
         // TODO: Applications could theoretically crash yuzu here by specifying too large
         //       framebuffer sizes. We should make sure that this cannot happen.
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
-                        screen_info.texture.gl_format, screen_info.texture.gl_type,
-                        gl_framebuffer_data.data());
+        glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
+                            framebuffer.height, screen_info.texture.gl_format,
+                            screen_info.texture.gl_type, gl_framebuffer_data.data());
 
         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
-        state.texture_units[0].texture = 0;
-        state.Apply();
     }
 }
 
@@ -199,17 +192,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
  */
 void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                                 const TextureInfo& texture) {
-    state.texture_units[0].texture = texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
-
-    // Update existing texture
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
-
-    state.texture_units[0].texture = 0;
-    state.Apply();
+    const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
+    glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
 }
 
 /**
@@ -249,26 +233,13 @@ void RendererOpenGL::InitOpenGLObjects() {
                               sizeof(ScreenRectVertex));
 
     // Allocate textures for the screen
-    screen_info.texture.resource.Create();
+    screen_info.texture.resource.Create(GL_TEXTURE_2D);
 
-    // Allocation of storage is deferred until the first frame, when we
-    // know the framebuffer size.
-
-    state.texture_units[0].texture = screen_info.texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    const GLuint texture = screen_info.texture.resource.handle;
+    glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
 
     screen_info.display_texture = screen_info.texture.resource.handle;
 
-    state.texture_units[0].texture = 0;
-    state.Apply();
-
     // Clear screen to black
     LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
 }
@@ -279,25 +250,24 @@ void RendererOpenGL::CreateRasterizer() {
     }
     // Initialize sRGB Usage
     OpenGLState::ClearsRGBUsed();
-    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info);
+    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
 }
 
 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                  const Tegra::FramebufferConfig& framebuffer) {
-
     texture.width = framebuffer.width;
     texture.height = framebuffer.height;
 
     GLint internal_format;
     switch (framebuffer.pixel_format) {
     case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-        internal_format = GL_RGBA;
+        internal_format = GL_RGBA8;
         texture.gl_format = GL_RGBA;
         texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
         gl_framebuffer_data.resize(texture.width * texture.height * 4);
         break;
     default:
-        internal_format = GL_RGBA;
+        internal_format = GL_RGBA8;
         texture.gl_format = GL_RGBA;
         texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
         gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -306,15 +276,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
         UNREACHABLE();
     }
 
-    state.texture_units[0].texture = texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
-                 texture.gl_format, texture.gl_type, nullptr);
-
-    state.texture_units[0].texture = 0;
-    state.Apply();
+    texture.resource.Release();
+    texture.resource.Create(GL_TEXTURE_2D);
+    glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
 }
 
 void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -356,7 +320,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
     }};
 
     state.texture_units[0].texture = screen_info.display_texture;
-    state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
     // Workaround brigthness problems in SMO by enabling sRGB in the final output
     // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
     state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 1665018db..7e13e566b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,6 +12,10 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
 
+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -41,7 +45,7 @@ struct ScreenInfo {
 
 class RendererOpenGL : public VideoCore::RendererBase {
 public:
-    explicit RendererOpenGL(Core::Frontend::EmuWindow& window);
+    explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
     ~RendererOpenGL() override;
 
     /// Swap buffers (render frame)
@@ -72,6 +76,8 @@ private:
     void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                     const TextureInfo& texture);
 
+    Core::System& system;
+
     OpenGLState state;
 
     // OpenGL object IDs
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 9b579bde1..e006f8138 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -104,19 +104,42 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
     }
     case OpCode::Id::LD_L: {
         UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
-                             static_cast<unsigned>(instr.ld_l.unknown.Value()));
-
-        const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
-                                     Immediate(static_cast<s32>(instr.smem_imm)));
-        const Node lmem = GetLocalMemory(index);
+                             static_cast<u32>(instr.ld_l.unknown.Value()));
+
+        const auto GetLmem = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
+                                           immediate_offset);
+            return GetLocalMemory(address);
+        };
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bytes32:
-            SetRegister(bb, instr.gpr0, lmem);
+        case Tegra::Shader::StoreType::Bits32:
+        case Tegra::Shader::StoreType::Bits64:
+        case Tegra::Shader::StoreType::Bits128: {
+            const u32 count = [&]() {
+                switch (instr.ldst_sl.type.Value()) {
+                case Tegra::Shader::StoreType::Bits32:
+                    return 1;
+                case Tegra::Shader::StoreType::Bits64:
+                    return 2;
+                case Tegra::Shader::StoreType::Bits128:
+                    return 4;
+                default:
+                    UNREACHABLE();
+                    return 0;
+                }
+            }();
+            for (u32 i = 0; i < count; ++i)
+                SetTemporal(bb, i, GetLmem(i * 4));
+            for (u32 i = 0; i < count; ++i)
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
             break;
+        }
         default:
             UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
-                              static_cast<unsigned>(instr.ldst_sl.type.Value()));
+                              static_cast<u32>(instr.ldst_sl.type.Value()));
         }
         break;
     }
@@ -203,12 +226,20 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
                              static_cast<u32>(instr.st_l.unknown.Value()));
 
-        const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
-                                     Immediate(static_cast<s32>(instr.smem_imm)));
+        const auto GetLmemAddr = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
+        };
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bytes32:
-            SetLocalMemory(bb, index, GetRegister(instr.gpr0));
+        case Tegra::Shader::StoreType::Bits128:
+            SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
+            SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
+        case Tegra::Shader::StoreType::Bits64:
+            SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
+        case Tegra::Shader::StoreType::Bits32:
+            SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
             break;
         default:
             UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
@@ -325,15 +356,18 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         const auto& sampler =
             GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
 
+        u32 indexer = 0;
         switch (instr.txq.query_type) {
         case Tegra::Shader::TextureQueryType::Dimension: {
             for (u32 element = 0; element < 4; ++element) {
-                MetaTexture meta{sampler, element};
-                const Node value = Operation(OperationCode::F4TextureQueryDimensions,
-                                             std::move(meta), GetRegister(instr.gpr8));
-                SetTemporal(bb, element, value);
+                if (instr.txq.IsComponentEnabled(element)) {
+                    MetaTexture meta{sampler, element};
+                    const Node value = Operation(OperationCode::F4TextureQueryDimensions,
+                                                 std::move(meta), GetRegister(instr.gpr8));
+                    SetTemporal(bb, indexer++, value);
+                }
             }
-            for (u32 i = 0; i < 4; ++i) {
+            for (u32 i = 0; i < indexer; ++i) {
                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
             }
             break;
@@ -734,4 +768,4 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
     return {coord_count, total_coord_count};
 }
 
-} // namespace VideoCommon::Shader
-\ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 8f97512ee..1d4fbef53 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -236,6 +236,11 @@ private:
 
 class ConstBuffer {
 public:
+    explicit ConstBuffer(u32 max_offset, bool is_indirect)
+        : max_offset{max_offset}, is_indirect{is_indirect} {}
+
+    ConstBuffer() = default;
+
     void MarkAsUsed(u64 offset) {
         max_offset = std::max(max_offset, static_cast<u32>(offset));
     }
@@ -252,6 +257,10 @@ public:
         return max_offset + sizeof(float);
     }
 
+    u32 GetMaxOffset() const {
+        return max_offset;
+    }
+
 private:
     u32 max_offset{};
     bool is_indirect{};
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e7c78bee2..0fc5530f2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -182,7 +182,7 @@ struct TICEntry {
     };
     union {
         BitField<0, 16, u32> height_minus_1;
-        BitField<16, 15, u32> depth_minus_1;
+        BitField<16, 14, u32> depth_minus_1;
     };
     union {
         BitField<6, 13, u32> mip_lod_bias;
@@ -317,7 +317,6 @@ struct FullTextureInfo {
     u32 index;
     TICEntry tic;
     TSCEntry tsc;
-    bool enabled;
 };
 
 /// Returns the number of bytes per pixel of the input texture format.
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 0b8ccdd44..cb82ecf3f 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -11,8 +11,9 @@
 
 namespace VideoCore {
 
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) {
-    return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                             Core::System& system) {
+    return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
 }
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 5b373bcb1..3c583f195 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -6,6 +6,10 @@
 
 #include <memory>
 
+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -20,7 +24,8 @@ class RendererBase;
  * @note The returned renderer instance is simply allocated. Its Init()
  *       function still needs to be called to fully complete its setup.
  */
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                             Core::System& system);
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer);