20 files changed, 643 insertions, 364 deletions
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a3fda1094..8b86ad050 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
         case ThiMethod::SetMethod1:
             LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
                       static_cast<u32>(nvdec_thi_state.method_0));
-            nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
-                                           data);
+            nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
             break;
         default:
             break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index d02dc6260..1b4bbc8ac 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
     av_free(ptr);
 }
 
-Codec::Codec(GPU& gpu_)
-    : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
+    : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
       vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
 
 Codec::~Codec() {
@@ -43,46 +43,48 @@ Codec::~Codec() {
     avcodec_close(av_codec_ctx);
 }
 
+void Codec::Initialize() {
+    AVCodecID codec{AV_CODEC_ID_NONE};
+    switch (current_codec) {
+    case NvdecCommon::VideoCodec::H264:
+        codec = AV_CODEC_ID_H264;
+        break;
+    case NvdecCommon::VideoCodec::Vp9:
+        codec = AV_CODEC_ID_VP9;
+        break;
+    default:
+        return;
+    }
+    av_codec = avcodec_find_decoder(codec);
+    av_codec_ctx = avcodec_alloc_context3(av_codec);
+    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+
+    // TODO(ameerj): libavcodec gpu hw acceleration
+
+    const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
+    if (av_error < 0) {
+        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
+        avcodec_close(av_codec_ctx);
+        return;
+    }
+    initialized = true;
+    return;
+}
+
 void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
     if (current_codec != codec) {
-        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
         current_codec = codec;
+        LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
     }
 }
 
-void Codec::StateWrite(u32 offset, u64 arguments) {
-    u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
-    std::memcpy(state_offset, &arguments, sizeof(u64));
-}
-
 void Codec::Decode() {
-    bool is_first_frame = false;
+    const bool is_first_frame = !initialized;
     if (!initialized) {
-        if (current_codec == NvdecCommon::VideoCodec::H264) {
-            av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
-        } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
-            av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
-        } else {
-            LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
-            return;
-        }
-
-        av_codec_ctx = avcodec_alloc_context3(av_codec);
-        av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-
-        // TODO(ameerj): libavcodec gpu hw acceleration
-
-        const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
-        if (av_error < 0) {
-            LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
-            avcodec_close(av_codec_ctx);
-            return;
-        }
-        initialized = true;
-        is_first_frame = true;
+        Initialize();
     }
-    bool vp9_hidden_frame = false;
 
+    bool vp9_hidden_frame = false;
     AVPacket packet{};
     av_init_packet(&packet);
     std::vector<u8> frame_data;
@@ -95,7 +97,7 @@ void Codec::Decode() {
     }
 
     packet.data = frame_data.data();
-    packet.size = static_cast<int>(frame_data.size());
+    packet.size = static_cast<s32>(frame_data.size());
 
     avcodec_send_packet(av_codec_ctx, &packet);
 
@@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
     return current_codec;
 }
 
+std::string_view Codec::GetCurrentCodecName() const {
+    switch (current_codec) {
+    case NvdecCommon::VideoCodec::None:
+        return "None";
+    case NvdecCommon::VideoCodec::H264:
+        return "H264";
+    case NvdecCommon::VideoCodec::Vp8:
+        return "VP8";
+    case NvdecCommon::VideoCodec::H265:
+        return "H265";
+    case NvdecCommon::VideoCodec::Vp9:
+        return "VP9";
+    default:
+        return "Unknown";
+    }
+};
+
 } // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 8a2a6c360..96c823c76 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -34,15 +34,15 @@ class VP9;
 
 class Codec {
 public:
-    explicit Codec(GPU& gpu);
+    explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
     ~Codec();
 
+    /// Initialize the codec, returning success or failure
+    void Initialize();
+
     /// Sets NVDEC video stream codec
     void SetTargetCodec(NvdecCommon::VideoCodec codec);
 
-    /// Populate NvdecRegisters state with argument value at the provided offset
-    void StateWrite(u32 offset, u64 arguments);
-
     /// Call decoders to construct headers, decode AVFrame with ffmpeg
     void Decode();
 
@@ -51,6 +51,8 @@ public:
 
     /// Returns the value of current_codec
     [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+    /// Return name of the current codec
+    [[nodiscard]] std::string_view GetCurrentCodecName() const;
 
 private:
     bool initialized{};
@@ -60,10 +62,10 @@ private:
     AVCodecContext* av_codec_ctx{nullptr};
 
     GPU& gpu;
+    const NvdecCommon::NvdecRegisters& state;
     std::unique_ptr<Decoder::H264> h264_decoder;
     std::unique_ptr<Decoder::VP9> vp9_decoder;
 
-    NvdecCommon::NvdecRegisters state{};
     std::queue<AVFramePtr> av_frames{};
 };
 
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index fea6aed98..5fb6d45ee 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -45,134 +45,129 @@ H264::~H264() = default;
 
 const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
                                                 bool is_first_frame) {
-    H264DecoderContext context{};
+    H264DecoderContext context;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
 
-    const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
+    const s64 frame_number = context.h264_parameter_set.frame_number.Value();
     if (!is_first_frame && frame_number != 0) {
-        frame.resize(context.frame_data_size);
-
+        frame.resize(context.stream_len);
         gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
-    } else {
-        /// Encode header
-        H264BitWriter writer{};
-        writer.WriteU(1, 24);
-        writer.WriteU(0, 1);
-        writer.WriteU(3, 2);
-        writer.WriteU(7, 5);
-        writer.WriteU(100, 8);
-        writer.WriteU(0, 8);
-        writer.WriteU(31, 8);
-        writer.WriteUe(0);
-        const auto chroma_format_idc =
-            static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
-        writer.WriteUe(chroma_format_idc);
-        if (chroma_format_idc == 3) {
-            writer.WriteBit(false);
-        }
-
-        writer.WriteUe(0);
-        writer.WriteUe(0);
-        writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
-        writer.WriteBit(false); // Scaling matrix present flag
-
-        const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
-        writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
-        writer.WriteUe(order_cnt_type);
-        if (order_cnt_type == 0) {
-            writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
-        } else if (order_cnt_type == 1) {
-            writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
-
-            writer.WriteSe(0);
-            writer.WriteSe(0);
-            writer.WriteUe(0);
-        }
-
-        const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
-                               (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+        return frame;
+    }
 
-        writer.WriteUe(16);
+    // Encode header
+    H264BitWriter writer{};
+    writer.WriteU(1, 24);
+    writer.WriteU(0, 1);
+    writer.WriteU(3, 2);
+    writer.WriteU(7, 5);
+    writer.WriteU(100, 8);
+    writer.WriteU(0, 8);
+    writer.WriteU(31, 8);
+    writer.WriteUe(0);
+    const u32 chroma_format_idc =
+        static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
+    writer.WriteUe(chroma_format_idc);
+    if (chroma_format_idc == 3) {
         writer.WriteBit(false);
-        writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
-        writer.WriteUe(pic_height - 1);
-        writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
-
-        if (!context.h264_parameter_set.frame_mbs_only_flag) {
-            writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
-        }
+    }
 
-        writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
-        writer.WriteBit(false); // Frame cropping flag
-        writer.WriteBit(false); // VUI parameter present flag
+    writer.WriteUe(0);
+    writer.WriteUe(0);
+    writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
+    writer.WriteBit(false); // Scaling matrix present flag
 
-        writer.End();
+    writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
 
-        // H264 PPS
-        writer.WriteU(1, 24);
-        writer.WriteU(0, 1);
-        writer.WriteU(3, 2);
-        writer.WriteU(8, 5);
+    const auto order_cnt_type =
+        static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
+    writer.WriteUe(order_cnt_type);
+    if (order_cnt_type == 0) {
+        writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
+    } else if (order_cnt_type == 1) {
+        writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
 
+        writer.WriteSe(0);
+        writer.WriteSe(0);
         writer.WriteUe(0);
-        writer.WriteUe(0);
+    }
 
-        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
-        writer.WriteBit(false);
-        writer.WriteUe(0);
-        writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
-        writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
-        writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
-        writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
-        s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
-        pic_init_qp = (pic_init_qp << 26) >> 26;
-        writer.WriteSe(pic_init_qp);
-        writer.WriteSe(0);
-        s32 chroma_qp_index_offset =
-            static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
-        chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
+    const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
+                           (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+
+    writer.WriteUe(16);
+    writer.WriteBit(false);
+    writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
+    writer.WriteUe(pic_height - 1);
+    writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
 
-        writer.WriteSe(chroma_qp_index_offset);
-        writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
-        writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
-        writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
-        writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+    if (!context.h264_parameter_set.frame_mbs_only_flag) {
+        writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
+    }
 
+    writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
+    writer.WriteBit(false); // Frame cropping flag
+    writer.WriteBit(false); // VUI parameter present flag
+
+    writer.End();
+
+    // H264 PPS
+    writer.WriteU(1, 24);
+    writer.WriteU(0, 1);
+    writer.WriteU(3, 2);
+    writer.WriteU(8, 5);
+
+    writer.WriteUe(0);
+    writer.WriteUe(0);
+
+    writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
+    writer.WriteBit(false);
+    writer.WriteUe(0);
+    writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
+    writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
+    writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
+    writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
+    s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
+    writer.WriteSe(pic_init_qp);
+    writer.WriteSe(0);
+    s32 chroma_qp_index_offset =
+        static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
+
+    writer.WriteSe(chroma_qp_index_offset);
+    writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
+    writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
+    writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
+    writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+
+    writer.WriteBit(true);
+
+    for (s32 index = 0; index < 6; index++) {
         writer.WriteBit(true);
+        std::span<const u8> matrix{context.weight_scale};
+        writer.WriteScalingList(matrix, index * 16, 16);
+    }
 
-        for (s32 index = 0; index < 6; index++) {
+    if (context.h264_parameter_set.transform_8x8_mode_flag) {
+        for (s32 index = 0; index < 2; index++) {
             writer.WriteBit(true);
-            const auto matrix_x4 =
-                std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
-            writer.WriteScalingList(matrix_x4, index * 16, 16);
-        }
-
-        if (context.h264_parameter_set.transform_8x8_mode_flag) {
-            for (s32 index = 0; index < 2; index++) {
-                writer.WriteBit(true);
-                const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
-                                                       context.scaling_matrix_8.end());
-
-                writer.WriteScalingList(matrix_x8, index * 64, 64);
-            }
+            std::span<const u8> matrix{context.weight_scale_8x8};
+            writer.WriteScalingList(matrix, index * 64, 64);
         }
+    }
 
-        s32 chroma_qp_index_offset2 =
-            static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
-        chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
+    s32 chroma_qp_index_offset2 =
+        static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
 
-        writer.WriteSe(chroma_qp_index_offset2);
+    writer.WriteSe(chroma_qp_index_offset2);
 
-        writer.End();
+    writer.End();
 
-        const auto& encoded_header = writer.GetByteArray();
-        frame.resize(encoded_header.size() + context.frame_data_size);
-        std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
+    const auto& encoded_header = writer.GetByteArray();
+    frame.resize(encoded_header.size() + context.stream_len);
+    std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
 
-        gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
-                                      frame.data() + encoded_header.size(),
-                                      context.frame_data_size);
-    }
+    gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+                                  frame.data() + encoded_header.size(), context.stream_len);
 
     return frame;
 }
@@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
     WriteBits(state ? 1 : 0, 1);
 }
 
-void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
+void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
     std::vector<u8> scan(count);
     if (count == 16) {
         std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 0f3a1d9f3..bfe84a472 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -20,7 +20,9 @@
 
 #pragma once
 
+#include <span>
 #include <vector>
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/command_classes/nvdec_common.h"
@@ -48,7 +50,7 @@ public:
 
     /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
     /// Writes the scaling matrices of the sream
-    void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
+    void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
 
     /// Return the bitstream as a vector.
     [[nodiscard]] std::vector<u8>& GetByteArray();
@@ -78,40 +80,110 @@ public:
         const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
 
 private:
+    std::vector<u8> frame;
+    GPU& gpu;
+
     struct H264ParameterSet {
-        u32 log2_max_pic_order_cnt{};
-        u32 delta_pic_order_always_zero_flag{};
-        u32 frame_mbs_only_flag{};
-        u32 pic_width_in_mbs{};
-        u32 pic_height_in_map_units{};
-        INSERT_PADDING_WORDS(1);
-        u32 entropy_coding_mode_flag{};
-        u32 bottom_field_pic_order_flag{};
-        u32 num_refidx_l0_default_active{};
-        u32 num_refidx_l1_default_active{};
-        u32 deblocking_filter_control_flag{};
-        u32 redundant_pic_count_flag{};
-        u32 transform_8x8_mode_flag{};
-        INSERT_PADDING_WORDS(9);
-        u64 flags{};
-        u32 frame_number{};
-        u32 frame_number2{};
+        s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
+        s32 delta_pic_order_always_zero_flag;  ///< 0x04
+        s32 frame_mbs_only_flag;               ///< 0x08
+        u32 pic_width_in_mbs;                  ///< 0x0C
+        u32 frame_height_in_map_units;         ///< 0x10
+        union {                                ///< 0x14
+            BitField<0, 2, u32> tile_format;
+            BitField<2, 3, u32> gob_height;
+        };
+        u32 entropy_coding_mode_flag;               ///< 0x18
+        s32 pic_order_present_flag;                 ///< 0x1C
+        s32 num_refidx_l0_default_active;           ///< 0x20
+        s32 num_refidx_l1_default_active;           ///< 0x24
+        s32 deblocking_filter_control_present_flag; ///< 0x28
+        s32 redundant_pic_cnt_present_flag;         ///< 0x2C
+        u32 transform_8x8_mode_flag;                ///< 0x30
+        u32 pitch_luma;                             ///< 0x34
+        u32 pitch_chroma;                           ///< 0x38
+        u32 luma_top_offset;                        ///< 0x3C
+        u32 luma_bot_offset;                        ///< 0x40
+        u32 luma_frame_offset;                      ///< 0x44
+        u32 chroma_top_offset;                      ///< 0x48
+        u32 chroma_bot_offset;                      ///< 0x4C
+        u32 chroma_frame_offset;                    ///< 0x50
+        u32 hist_buffer_size;                       ///< 0x54
+        union {                                     ///< 0x58
+            union {
+                BitField<0, 1, u64> mbaff_frame;
+                BitField<1, 1, u64> direct_8x8_inference;
+                BitField<2, 1, u64> weighted_pred;
+                BitField<3, 1, u64> constrained_intra_pred;
+                BitField<4, 1, u64> ref_pic;
+                BitField<5, 1, u64> field_pic;
+                BitField<6, 1, u64> bottom_field;
+                BitField<7, 1, u64> second_field;
+            } flags;
+            BitField<8, 4, u64> log2_max_frame_num_minus4;
+            BitField<12, 2, u64> chroma_format_idc;
+            BitField<14, 2, u64> pic_order_cnt_type;
+            BitField<16, 6, s64> pic_init_qp_minus26;
+            BitField<22, 5, s64> chroma_qp_index_offset;
+            BitField<27, 5, s64> second_chroma_qp_index_offset;
+            BitField<32, 2, u64> weighted_bipred_idc;
+            BitField<34, 7, u64> curr_pic_idx;
+            BitField<41, 5, u64> curr_col_idx;
+            BitField<46, 16, u64> frame_number;
+            BitField<62, 1, u64> frame_surfaces;
+            BitField<63, 1, u64> output_memory_layout;
+        };
     };
-    static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
+    static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
 
     struct H264DecoderContext {
-        INSERT_PADDING_BYTES(0x48);
-        u32 frame_data_size{};
-        INSERT_PADDING_BYTES(0xc);
-        H264ParameterSet h264_parameter_set{};
-        INSERT_PADDING_BYTES(0x100);
-        std::array<u8, 0x60> scaling_matrix_4;
-        std::array<u8, 0x80> scaling_matrix_8;
+        INSERT_PADDING_WORDS_NOINIT(18);       ///< 0x0000
+        u32 stream_len;                        ///< 0x0048
+        INSERT_PADDING_WORDS_NOINIT(3);        ///< 0x004C
+        H264ParameterSet h264_parameter_set;   ///< 0x0058
+        INSERT_PADDING_WORDS_NOINIT(66);       ///< 0x00B8
+        std::array<u8, 0x60> weight_scale;     ///< 0x01C0
+        std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
     };
-    static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
-
-    std::vector<u8> frame;
-    GPU& gpu;
+    static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(H264ParameterSet, field_name) == position,                              \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
+    ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
+    ASSERT_POSITION(frame_mbs_only_flag, 0x08);
+    ASSERT_POSITION(pic_width_in_mbs, 0x0C);
+    ASSERT_POSITION(frame_height_in_map_units, 0x10);
+    ASSERT_POSITION(tile_format, 0x14);
+    ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
+    ASSERT_POSITION(pic_order_present_flag, 0x1C);
+    ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
+    ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
+    ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
+    ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
+    ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
+    ASSERT_POSITION(pitch_luma, 0x34);
+    ASSERT_POSITION(pitch_chroma, 0x38);
+    ASSERT_POSITION(luma_top_offset, 0x3C);
+    ASSERT_POSITION(luma_bot_offset, 0x40);
+    ASSERT_POSITION(luma_frame_offset, 0x44);
+    ASSERT_POSITION(chroma_top_offset, 0x48);
+    ASSERT_POSITION(chroma_bot_offset, 0x4C);
+    ASSERT_POSITION(chroma_frame_offset, 0x50);
+    ASSERT_POSITION(hist_buffer_size, 0x54);
+    ASSERT_POSITION(flags, 0x58);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(H264DecoderContext, field_name) == position,                            \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_POSITION(stream_len, 0x48);
+    ASSERT_POSITION(h264_parameter_set, 0x58);
+    ASSERT_POSITION(weight_scale, 0x1C0);
+#undef ASSERT_POSITION
 };
 
 } // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 29bb31418..902bc2a98 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
 }
 
 Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
-    PictureInfo picture_info{};
+    PictureInfo picture_info;
     gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
     Vp9PictureInfo vp9_info = picture_info.Convert();
 
@@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
 }
 
 void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
-    EntropyProbs entropy{};
+    EntropyProbs entropy;
     gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
     entropy.Convert(dst);
 }
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 139501a1c..2da14f3ca 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -15,10 +15,10 @@ class GPU;
 
 namespace Decoder {
 struct Vp9FrameDimensions {
-    s16 width{};
-    s16 height{};
-    s16 luma_pitch{};
-    s16 chroma_pitch{};
+    s16 width;
+    s16 height;
+    s16 luma_pitch;
+    s16 chroma_pitch;
 };
 static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
 
@@ -49,87 +49,87 @@ enum class TxMode {
 };
 
 struct Segmentation {
-    u8 enabled{};
-    u8 update_map{};
-    u8 temporal_update{};
-    u8 abs_delta{};
-    std::array<u32, 8> feature_mask{};
-    std::array<std::array<s16, 4>, 8> feature_data{};
+    u8 enabled;
+    u8 update_map;
+    u8 temporal_update;
+    u8 abs_delta;
+    std::array<u32, 8> feature_mask;
+    std::array<std::array<s16, 4>, 8> feature_data;
 };
 static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
 
 struct LoopFilter {
-    u8 mode_ref_delta_enabled{};
-    std::array<s8, 4> ref_deltas{};
-    std::array<s8, 2> mode_deltas{};
+    u8 mode_ref_delta_enabled;
+    std::array<s8, 4> ref_deltas;
+    std::array<s8, 2> mode_deltas;
 };
 static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
 
 struct Vp9EntropyProbs {
-    std::array<u8, 36> y_mode_prob{};
-    std::array<u8, 64> partition_prob{};
-    std::array<u8, 1728> coef_probs{};
-    std::array<u8, 8> switchable_interp_prob{};
-    std::array<u8, 28> inter_mode_prob{};
-    std::array<u8, 4> intra_inter_prob{};
-    std::array<u8, 5> comp_inter_prob{};
-    std::array<u8, 10> single_ref_prob{};
-    std::array<u8, 5> comp_ref_prob{};
-    std::array<u8, 6> tx_32x32_prob{};
-    std::array<u8, 4> tx_16x16_prob{};
-    std::array<u8, 2> tx_8x8_prob{};
-    std::array<u8, 3> skip_probs{};
-    std::array<u8, 3> joints{};
-    std::array<u8, 2> sign{};
-    std::array<u8, 20> classes{};
-    std::array<u8, 2> class_0{};
-    std::array<u8, 20> prob_bits{};
-    std::array<u8, 12> class_0_fr{};
-    std::array<u8, 6> fr{};
-    std::array<u8, 2> class_0_hp{};
-    std::array<u8, 2> high_precision{};
+    std::array<u8, 36> y_mode_prob;           ///< 0x0000
+    std::array<u8, 64> partition_prob;        ///< 0x0024
+    std::array<u8, 1728> coef_probs;          ///< 0x0064
+    std::array<u8, 8> switchable_interp_prob; ///< 0x0724
+    std::array<u8, 28> inter_mode_prob;       ///< 0x072C
+    std::array<u8, 4> intra_inter_prob;       ///< 0x0748
+    std::array<u8, 5> comp_inter_prob;        ///< 0x074C
+    std::array<u8, 10> single_ref_prob;       ///< 0x0751
+    std::array<u8, 5> comp_ref_prob;          ///< 0x075B
+    std::array<u8, 6> tx_32x32_prob;          ///< 0x0760
+    std::array<u8, 4> tx_16x16_prob;          ///< 0x0766
+    std::array<u8, 2> tx_8x8_prob;            ///< 0x076A
+    std::array<u8, 3> skip_probs;             ///< 0x076C
+    std::array<u8, 3> joints;                 ///< 0x076F
+    std::array<u8, 2> sign;                   ///< 0x0772
+    std::array<u8, 20> classes;               ///< 0x0774
+    std::array<u8, 2> class_0;                ///< 0x0788
+    std::array<u8, 20> prob_bits;             ///< 0x078A
+    std::array<u8, 12> class_0_fr;            ///< 0x079E
+    std::array<u8, 6> fr;                     ///< 0x07AA
+    std::array<u8, 2> class_0_hp;             ///< 0x07B0
+    std::array<u8, 2> high_precision;         ///< 0x07B2
 };
 static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
 
 struct Vp9PictureInfo {
-    bool is_key_frame{};
-    bool intra_only{};
-    bool last_frame_was_key{};
-    bool frame_size_changed{};
-    bool error_resilient_mode{};
-    bool last_frame_shown{};
-    bool show_frame{};
-    std::array<s8, 4> ref_frame_sign_bias{};
-    s32 base_q_index{};
-    s32 y_dc_delta_q{};
-    s32 uv_dc_delta_q{};
-    s32 uv_ac_delta_q{};
-    bool lossless{};
-    s32 transform_mode{};
-    bool allow_high_precision_mv{};
-    s32 interp_filter{};
-    s32 reference_mode{};
-    s8 comp_fixed_ref{};
-    std::array<s8, 2> comp_var_ref{};
-    s32 log2_tile_cols{};
-    s32 log2_tile_rows{};
-    bool segment_enabled{};
-    bool segment_map_update{};
-    bool segment_map_temporal_update{};
-    s32 segment_abs_delta{};
-    std::array<u32, 8> segment_feature_enable{};
-    std::array<std::array<s16, 4>, 8> segment_feature_data{};
-    bool mode_ref_delta_enabled{};
-    bool use_prev_in_find_mv_refs{};
-    std::array<s8, 4> ref_deltas{};
-    std::array<s8, 2> mode_deltas{};
-    Vp9EntropyProbs entropy{};
-    Vp9FrameDimensions frame_size{};
-    u8 first_level{};
-    u8 sharpness_level{};
-    u32 bitstream_size{};
-    std::array<u64, 4> frame_offsets{};
-    std::array<bool, 4> refresh_frame{};
+    bool is_key_frame;
+    bool intra_only;
+    bool last_frame_was_key;
+    bool frame_size_changed;
+    bool error_resilient_mode;
+    bool last_frame_shown;
+    bool show_frame;
+    std::array<s8, 4> ref_frame_sign_bias;
+    s32 base_q_index;
+    s32 y_dc_delta_q;
+    s32 uv_dc_delta_q;
+    s32 uv_ac_delta_q;
+    bool lossless;
+    s32 transform_mode;
+    bool allow_high_precision_mv;
+    s32 interp_filter;
+    s32 reference_mode;
+    s8 comp_fixed_ref;
+    std::array<s8, 2> comp_var_ref;
+    s32 log2_tile_cols;
+    s32 log2_tile_rows;
+    bool segment_enabled;
+    bool segment_map_update;
+    bool segment_map_temporal_update;
+    s32 segment_abs_delta;
+    std::array<u32, 8> segment_feature_enable;
+    std::array<std::array<s16, 4>, 8> segment_feature_data;
+    bool mode_ref_delta_enabled;
+    bool use_prev_in_find_mv_refs;
+    std::array<s8, 4> ref_deltas;
+    std::array<s8, 2> mode_deltas;
+    Vp9EntropyProbs entropy;
+    Vp9FrameDimensions frame_size;
+    u8 first_level;
+    u8 sharpness_level;
+    u32 bitstream_size;
+    std::array<u64, 4> frame_offsets;
+    std::array<bool, 4> refresh_frame;
 };
 
 struct Vp9FrameContainer {
@@ -138,35 +138,35 @@ struct Vp9FrameContainer {
 };
 
 struct PictureInfo {
-    INSERT_PADDING_WORDS(12);
-    u32 bitstream_size{};
-    INSERT_PADDING_WORDS(5);
-    Vp9FrameDimensions last_frame_size{};
-    Vp9FrameDimensions golden_frame_size{};
-    Vp9FrameDimensions alt_frame_size{};
-    Vp9FrameDimensions current_frame_size{};
-    u32 vp9_flags{};
-    std::array<s8, 4> ref_frame_sign_bias{};
-    u8 first_level{};
-    u8 sharpness_level{};
-    u8 base_q_index{};
-    u8 y_dc_delta_q{};
-    u8 uv_ac_delta_q{};
-    u8 uv_dc_delta_q{};
-    u8 lossless{};
-    u8 tx_mode{};
-    u8 allow_high_precision_mv{};
-    u8 interp_filter{};
-    u8 reference_mode{};
-    s8 comp_fixed_ref{};
-    std::array<s8, 2> comp_var_ref{};
-    u8 log2_tile_cols{};
-    u8 log2_tile_rows{};
-    Segmentation segmentation{};
-    LoopFilter loop_filter{};
-    INSERT_PADDING_BYTES(5);
-    u32 surface_params{};
-    INSERT_PADDING_WORDS(3);
+    INSERT_PADDING_WORDS_NOINIT(12);       ///< 0x00
+    u32 bitstream_size;                    ///< 0x30
+    INSERT_PADDING_WORDS_NOINIT(5);        ///< 0x34
+    Vp9FrameDimensions last_frame_size;    ///< 0x48
+    Vp9FrameDimensions golden_frame_size;  ///< 0x50
+    Vp9FrameDimensions alt_frame_size;     ///< 0x58
+    Vp9FrameDimensions current_frame_size; ///< 0x60
+    u32 vp9_flags;                         ///< 0x68
+    std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
+    u8 first_level;                        ///< 0x70
+    u8 sharpness_level;                    ///< 0x71
+    u8 base_q_index;                       ///< 0x72
+    u8 y_dc_delta_q;                       ///< 0x73
+    u8 uv_ac_delta_q;                      ///< 0x74
+    u8 uv_dc_delta_q;                      ///< 0x75
+    u8 lossless;                           ///< 0x76
+    u8 tx_mode;                            ///< 0x77
+    u8 allow_high_precision_mv;            ///< 0x78
+    u8 interp_filter;                      ///< 0x79
+    u8 reference_mode;                     ///< 0x7A
+    s8 comp_fixed_ref;                     ///< 0x7B
+    std::array<s8, 2> comp_var_ref;        ///< 0x7C
+    u8 log2_tile_cols;                     ///< 0x7E
+    u8 log2_tile_rows;                     ///< 0x7F
+    Segmentation segmentation;             ///< 0x80
+    LoopFilter loop_filter;                ///< 0xE4
+    INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB
+    u32 surface_params;                    ///< 0xF0
+    INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4
 
     [[nodiscard]] Vp9PictureInfo Convert() const {
         return {
@@ -176,6 +176,7 @@ struct PictureInfo {
             .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
             .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
             .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
+            .show_frame = false,
             .ref_frame_sign_bias = ref_frame_sign_bias,
             .base_q_index = base_q_index,
             .y_dc_delta_q = y_dc_delta_q,
@@ -204,45 +205,48 @@ struct PictureInfo {
                                         !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
             .ref_deltas = loop_filter.ref_deltas,
             .mode_deltas = loop_filter.mode_deltas,
+            .entropy{},
             .frame_size = current_frame_size,
             .first_level = first_level,
             .sharpness_level = sharpness_level,
             .bitstream_size = bitstream_size,
+            .frame_offsets{},
+            .refresh_frame{},
         };
     }
 };
 static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
 
 struct EntropyProbs {
-    INSERT_PADDING_BYTES(1024);
-    std::array<u8, 28> inter_mode_prob{};
-    std::array<u8, 4> intra_inter_prob{};
-    INSERT_PADDING_BYTES(80);
-    std::array<u8, 2> tx_8x8_prob{};
-    std::array<u8, 4> tx_16x16_prob{};
-    std::array<u8, 6> tx_32x32_prob{};
-    std::array<u8, 4> y_mode_prob_e8{};
-    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
-    INSERT_PADDING_BYTES(64);
-    std::array<u8, 64> partition_prob{};
-    INSERT_PADDING_BYTES(10);
-    std::array<u8, 8> switchable_interp_prob{};
-    std::array<u8, 5> comp_inter_prob{};
-    std::array<u8, 3> skip_probs{};
-    INSERT_PADDING_BYTES(1);
-    std::array<u8, 3> joints{};
-    std::array<u8, 2> sign{};
-    std::array<u8, 2> class_0{};
-    std::array<u8, 6> fr{};
-    std::array<u8, 2> class_0_hp{};
-    std::array<u8, 2> high_precision{};
-    std::array<u8, 20> classes{};
-    std::array<u8, 12> class_0_fr{};
-    std::array<u8, 20> pred_bits{};
-    std::array<u8, 10> single_ref_prob{};
-    std::array<u8, 5> comp_ref_prob{};
-    INSERT_PADDING_BYTES(17);
-    std::array<u8, 2304> coef_probs{};
+    INSERT_PADDING_BYTES_NOINIT(1024);                 ///< 0x0000
+    std::array<u8, 28> inter_mode_prob;                ///< 0x0400
+    std::array<u8, 4> intra_inter_prob;                ///< 0x041C
+    INSERT_PADDING_BYTES_NOINIT(80);                   ///< 0x0420
+    std::array<u8, 2> tx_8x8_prob;                     ///< 0x0470
+    std::array<u8, 4> tx_16x16_prob;                   ///< 0x0472
+    std::array<u8, 6> tx_32x32_prob;                   ///< 0x0476
+    std::array<u8, 4> y_mode_prob_e8;                  ///< 0x047C
+    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
+    INSERT_PADDING_BYTES_NOINIT(64);                   ///< 0x04A0
+    std::array<u8, 64> partition_prob;                 ///< 0x04E0
+    INSERT_PADDING_BYTES_NOINIT(10);                   ///< 0x0520
+    std::array<u8, 8> switchable_interp_prob;          ///< 0x052A
+    std::array<u8, 5> comp_inter_prob;                 ///< 0x0532
+    std::array<u8, 3> skip_probs;                      ///< 0x0537
+    INSERT_PADDING_BYTES_NOINIT(1);                    ///< 0x053A
+    std::array<u8, 3> joints;                          ///< 0x053B
+    std::array<u8, 2> sign;                            ///< 0x053E
+    std::array<u8, 2> class_0;                         ///< 0x0540
+    std::array<u8, 6> fr;                              ///< 0x0542
+    std::array<u8, 2> class_0_hp;                      ///< 0x0548
+    std::array<u8, 2> high_precision;                  ///< 0x054A
+    std::array<u8, 20> classes;                        ///< 0x054C
+    std::array<u8, 12> class_0_fr;                     ///< 0x0560
+    std::array<u8, 20> pred_bits;                      ///< 0x056C
+    std::array<u8, 10> single_ref_prob;                ///< 0x0580
+    std::array<u8, 5> comp_ref_prob;                   ///< 0x058A
+    INSERT_PADDING_BYTES_NOINIT(17);                   ///< 0x058F
+    std::array<u8, 2304> coef_probs;                   ///< 0x05A0
 
     void Convert(Vp9EntropyProbs& fc) {
         fc.inter_mode_prob = inter_mode_prob;
@@ -293,10 +297,45 @@ struct RefPoolElement {
 };
 
 struct FrameContexts {
-    s64 from{};
-    bool adapted{};
-    Vp9EntropyProbs probs{};
+    s64 from;
+    bool adapted;
+    Vp9EntropyProbs probs;
 };
 
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(Vp9EntropyProbs, field_name) == position,                               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(partition_prob, 0x0024);
+ASSERT_POSITION(switchable_interp_prob, 0x0724);
+ASSERT_POSITION(sign, 0x0772);
+ASSERT_POSITION(class_0_fr, 0x079E);
+ASSERT_POSITION(high_precision, 0x07B2);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(PictureInfo, field_name) == position,                                   \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(bitstream_size, 0x30);
+ASSERT_POSITION(last_frame_size, 0x48);
+ASSERT_POSITION(first_level, 0x70);
+ASSERT_POSITION(segmentation, 0x80);
+ASSERT_POSITION(loop_filter, 0xE4);
+ASSERT_POSITION(surface_params, 0xF0);
+#undef ASSERT_POSITION
+
+#define ASSERT_POSITION(field_name, position)                                                      \
+    static_assert(offsetof(EntropyProbs, field_name) == position,                                  \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_POSITION(inter_mode_prob, 0x400);
+ASSERT_POSITION(tx_8x8_prob, 0x470);
+ASSERT_POSITION(partition_prob, 0x4E0);
+ASSERT_POSITION(class_0, 0x540);
+ASSERT_POSITION(class_0_fr, 0x560);
+ASSERT_POSITION(coef_probs, 0x5A0);
+#undef ASSERT_POSITION
+
 }; // namespace Decoder
 }; // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index e4f919afd..b5e3b70fc 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -8,22 +8,21 @@
 
 namespace Tegra {
 
-Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
+#define NVDEC_REG_INDEX(field_name)                                                                \
+    (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
+
+Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
 
 Nvdec::~Nvdec() = default;
 
-void Nvdec::ProcessMethod(Method method, u32 argument) {
-    if (method == Method::SetVideoCodec) {
-        codec->StateWrite(static_cast<u32>(method), argument);
-    } else {
-        codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
-    }
+void Nvdec::ProcessMethod(u32 method, u32 argument) {
+    state.reg_array[method] = static_cast<u64>(argument) << 8;
 
     switch (method) {
-    case Method::SetVideoCodec:
+    case NVDEC_REG_INDEX(set_codec_id):
         codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
         break;
-    case Method::Execute:
+    case NVDEC_REG_INDEX(execute):
         Execute();
         break;
     }
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e66be80b8..6e1da0b04 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -14,16 +14,11 @@ class GPU;
 
 class Nvdec {
 public:
-    enum class Method : u32 {
-        SetVideoCodec = 0x80,
-        Execute = 0xc0,
-    };
-
     explicit Nvdec(GPU& gpu);
     ~Nvdec();
 
     /// Writes the method into the state, Invoke Execute() if encountered
-    void ProcessMethod(Method method, u32 argument);
+    void ProcessMethod(u32 method, u32 argument);
 
     /// Return most recently decoded frame
     [[nodiscard]] AVFramePtr GetFrame();
@@ -33,6 +28,7 @@ private:
     void Execute();
 
     GPU& gpu;
+    NvdecCommon::NvdecRegisters state;
     std::unique_ptr<Codec> codec;
 };
 } // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
index 01b5e086d..6a24e00a0 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -4,40 +4,13 @@
 
 #pragma once
 
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 
 namespace Tegra::NvdecCommon {
 
-struct NvdecRegisters {
-    INSERT_PADDING_WORDS(256);
-    u64 set_codec_id{};
-    INSERT_PADDING_WORDS(254);
-    u64 set_platform_id{};
-    u64 picture_info_offset{};
-    u64 frame_bitstream_offset{};
-    u64 frame_number{};
-    u64 h264_slice_data_offsets{};
-    u64 h264_mv_dump_offset{};
-    INSERT_PADDING_WORDS(6);
-    u64 frame_stats_offset{};
-    u64 h264_last_surface_luma_offset{};
-    u64 h264_last_surface_chroma_offset{};
-    std::array<u64, 17> surface_luma_offset{};
-    std::array<u64, 17> surface_chroma_offset{};
-    INSERT_PADDING_WORDS(132);
-    u64 vp9_entropy_probs_offset{};
-    u64 vp9_backward_updates_offset{};
-    u64 vp9_last_frame_segmap_offset{};
-    u64 vp9_curr_frame_segmap_offset{};
-    INSERT_PADDING_WORDS(2);
-    u64 vp9_last_frame_mvs_offset{};
-    u64 vp9_curr_frame_mvs_offset{};
-    INSERT_PADDING_WORDS(2);
-};
-static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
-
-enum class VideoCodec : u32 {
+enum class VideoCodec : u64 {
     None = 0x0,
     H264 = 0x3,
     Vp8 = 0x5,
@@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
     Vp9 = 0x9,
 };
 
+// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
+// doubling the sizes here is compensating for that.
+struct NvdecRegisters {
+    static constexpr std::size_t NUM_REGS = 0x178;
+
+    union {
+        struct {
+            INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
+            VideoCodec set_codec_id;          ///< 0x0400
+            INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
+            u64 execute;                      ///< 0x0600
+            INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
+            struct {                          ///< 0x0800
+                union {
+                    BitField<0, 3, VideoCodec> codec;
+                    BitField<4, 1, u64> gp_timer_on;
+                    BitField<13, 1, u64> mb_timer_on;
+                    BitField<14, 1, u64> intra_frame_pslc;
+                    BitField<17, 1, u64> all_intra_frame;
+                };
+            } control_params;
+            u64 picture_info_offset;                   ///< 0x0808
+            u64 frame_bitstream_offset;                ///< 0x0810
+            u64 frame_number;                          ///< 0x0818
+            u64 h264_slice_data_offsets;               ///< 0x0820
+            u64 h264_mv_dump_offset;                   ///< 0x0828
+            INSERT_PADDING_WORDS_NOINIT(6);            ///< 0x0830
+            u64 frame_stats_offset;                    ///< 0x0848
+            u64 h264_last_surface_luma_offset;         ///< 0x0850
+            u64 h264_last_surface_chroma_offset;       ///< 0x0858
+            std::array<u64, 17> surface_luma_offset;   ///< 0x0860
+            std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
+            INSERT_PADDING_WORDS_NOINIT(132);          ///< 0x0970
+            u64 vp9_entropy_probs_offset;              ///< 0x0B80
+            u64 vp9_backward_updates_offset;           ///< 0x0B88
+            u64 vp9_last_frame_segmap_offset;          ///< 0x0B90
+            u64 vp9_curr_frame_segmap_offset;          ///< 0x0B98
+            INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BA0
+            u64 vp9_last_frame_mvs_offset;             ///< 0x0BA8
+            u64 vp9_curr_frame_mvs_offset;             ///< 0x0BB0
+            INSERT_PADDING_WORDS_NOINIT(2);            ///< 0x0BB8
+        };
+        std::array<u64, NUM_REGS> reg_array;
+    };
+};
+static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64),                  \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(set_codec_id, 0x80);
+ASSERT_REG_POSITION(execute, 0xC0);
+ASSERT_REG_POSITION(control_params, 0x100);
+ASSERT_REG_POSITION(picture_info_offset, 0x101);
+ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
+ASSERT_REG_POSITION(frame_number, 0x103);
+ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
+ASSERT_REG_POSITION(frame_stats_offset, 0x109);
+ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
+ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
+ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
+ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
+ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
+ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
+ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
+ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
+ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
+ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
+
+#undef ASSERT_REG_POSITION
+
 } // namespace Tegra::NvdecCommon
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2208e1922..c9cff7450 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,7 +18,10 @@ set(SHADER_FILES
     vulkan_uint8.comp
 )
 
-find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
+find_program(GLSLANGVALIDATOR "glslangValidator")
+if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND")
+    message(FATAL_ERROR "Required program `glslangValidator` not found.")
+endif()
 
 set(GLSL_FLAGS "")
 set(QUIET_FLAG "--quiet")
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 320ee8d30..63d8ad42a 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -42,6 +42,8 @@ public:
 
     [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
 
+    [[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
+
     // Getter/setter functions:
     // ------------------------
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3f4532ca7..3b00614e7 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -202,13 +202,13 @@ Device::Device() {
         LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
         throw std::runtime_error{"Insufficient version"};
     }
-    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
     const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
     const std::vector extensions = GetExtensions();
 
-    const bool is_nvidia = vendor == "NVIDIA Corporation";
-    const bool is_amd = vendor == "ATI Technologies Inc.";
-    const bool is_intel = vendor == "Intel";
+    const bool is_nvidia = vendor_name == "NVIDIA Corporation";
+    const bool is_amd = vendor_name == "ATI Technologies Inc.";
+    const bool is_intel = vendor_name == "Intel";
 
 #ifdef __unix__
     const bool is_linux = true;
@@ -275,6 +275,56 @@ Device::Device() {
     }
 }
 
+std::string Device::GetVendorName() const {
+    if (vendor_name == "NVIDIA Corporation") {
+        return "NVIDIA";
+    }
+    if (vendor_name == "ATI Technologies Inc.") {
+        return "AMD";
+    }
+    if (vendor_name == "Intel") {
+        // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
+        // Simply return `INTEL` for those as well as the Windows driver.
+        return "INTEL";
+    }
+    if (vendor_name == "Intel Open Source Technology Center") {
+        return "I965";
+    }
+    if (vendor_name == "Mesa Project") {
+        return "I915";
+    }
+    if (vendor_name == "Mesa/X.org") {
+        // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
+        // MESA instead of one of those driver names.
+        return "MESA";
+    }
+    if (vendor_name == "AMD") {
+        return "RADEONSI";
+    }
+    if (vendor_name == "nouveau") {
+        return "NOUVEAU";
+    }
+    if (vendor_name == "X.Org") {
+        return "R600";
+    }
+    if (vendor_name == "Collabora Ltd") {
+        return "ZINK";
+    }
+    if (vendor_name == "Intel Corporation") {
+        return "OPENSWR";
+    }
+    if (vendor_name == "Microsoft Corporation") {
+        return "D3D12";
+    }
+    if (vendor_name == "NVIDIA") {
+        // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
+        // strategy would have returned `NVIDIA` here for this driver, the same result as the
+        // proprietary driver.
+        return "TEGRA";
+    }
+    return vendor_name;
+}
+
 Device::Device(std::nullptr_t) {
     max_uniform_buffers.fill(std::numeric_limits<u32>::max());
     uniform_buffer_alignment = 4;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index f24bd0c7b..2c2b13767 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -22,6 +22,8 @@ public:
     explicit Device();
     explicit Device(std::nullptr_t);
 
+    [[nodiscard]] std::string GetVendorName() const;
+
     u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
         return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
     }
@@ -130,6 +132,7 @@ private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
 
+    std::string vendor_name;
     std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
     std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
     size_t uniform_buffer_alignment{};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index e892bd9ba..ff0f03e99 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -342,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
                                         VideoCommon::SubresourceLayers subresource, GLenum target) {
     switch (target) {
+    case GL_TEXTURE_1D:
+        return CopyOrigin{
+            .level = static_cast<GLint>(subresource.base_level),
+            .x = static_cast<GLint>(offset.x),
+            .y = static_cast<GLint>(0),
+            .z = static_cast<GLint>(0),
+        };
+    case GL_TEXTURE_1D_ARRAY:
+        return CopyOrigin{
+            .level = static_cast<GLint>(subresource.base_level),
+            .x = static_cast<GLint>(offset.x),
+            .y = static_cast<GLint>(0),
+            .z = static_cast<GLint>(subresource.base_layer),
+        };
     case GL_TEXTURE_2D_ARRAY:
     case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
         return CopyOrigin{
@@ -367,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
                                         VideoCommon::SubresourceLayers dst_subresource,
                                         GLenum target) {
     switch (target) {
+    case GL_TEXTURE_1D:
+        return CopyRegion{
+            .width = static_cast<GLsizei>(extent.width),
+            .height = static_cast<GLsizei>(1),
+            .depth = static_cast<GLsizei>(1),
+        };
+    case GL_TEXTURE_1D_ARRAY:
+        return CopyRegion{
+            .width = static_cast<GLsizei>(extent.width),
+            .height = static_cast<GLsizei>(1),
+            .depth = static_cast<GLsizei>(dst_subresource.num_layers),
+        };
     case GL_TEXTURE_2D_ARRAY:
     case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
         return CopyRegion{
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc19a110f..0b66f8332 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -70,6 +70,10 @@ public:
         return &rasterizer;
     }
 
+    [[nodiscard]] std::string GetDeviceVendor() const override {
+        return device.GetVendorName();
+    }
+
 private:
     /// Initializes the OpenGL state and creates persistent objects.
     void InitOpenGLObjects();
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 72071316c..d7d17e110 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -47,6 +47,10 @@ public:
         return &rasterizer;
     }
 
+    [[nodiscard]] std::string GetDeviceVendor() const override {
+        return device.GetDriverName();
+    }
+
 private:
     void Report() const;
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7cfd02b6..d8dbd3824 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1057,9 +1057,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
     std::vector<ImageId> right_aliased_ids;
     std::vector<ImageId> bad_overlap_ids;
     ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
-        if (info.type != overlap.info.type) {
-            return;
-        }
         if (info.type == ImageType::Linear) {
             if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
                 // Alias linear images with the same pitch
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 23814afd2..f214510da 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
     return (supported_usage & wanted_usage) == wanted_usage;
 }
 
+std::string Device::GetDriverName() const {
+    switch (driver_id) {
+    case VK_DRIVER_ID_AMD_PROPRIETARY:
+        return "AMD";
+    case VK_DRIVER_ID_AMD_OPEN_SOURCE:
+        return "AMDVLK";
+    case VK_DRIVER_ID_MESA_RADV:
+        return "RADV";
+    case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
+        return "NVIDIA";
+    case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
+        return "INTEL";
+    case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
+        return "ANV";
+    case VK_DRIVER_ID_MESA_LLVMPIPE:
+        return "LAVAPIPE";
+    default:
+        return vendor_name;
+    }
+}
+
 void Device::CheckSuitability(bool requires_swapchain) const {
     std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
     bool has_swapchain = false;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 88b298196..96c0f8c60 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -45,6 +45,9 @@ public:
     /// Reports a shader to Nsight Aftermath.
     void SaveShader(const std::vector<u32>& spirv) const;
 
+    /// Returns the name of the VkDriverId reported from Vulkan.
+    std::string GetDriverName() const;
+
     /// Returns the dispatch loader with direct function pointers of the device.
     const vk::DeviceDispatch& GetDispatchLoader() const {
         return dld;