diff options
Diffstat (limited to 'src/video_core')
20 files changed, 643 insertions, 364 deletions
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094..8b86ad050 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), - data); + nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); break; default: break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260..1b4bbc8ac 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { av_free(ptr); } -Codec::Codec(GPU& gpu_) - : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() { avcodec_close(av_codec_ctx); } +void Codec::Initialize() { + AVCodecID codec{AV_CODEC_ID_NONE}; + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + codec = AV_CODEC_ID_H264; + break; + case NvdecCommon::VideoCodec::Vp9: + codec = AV_CODEC_ID_VP9; + break; + default: + return; + } + av_codec = avcodec_find_decoder(codec); + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + return; +} + void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { if (current_codec != codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); } } -void Codec::StateWrite(u32 offset, u64 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); - std::memcpy(state_offset, &arguments, sizeof(u64)); -} - void Codec::Decode() { - bool is_first_frame = false; + const bool is_first_frame = !initialized; if (!initialized) { - if (current_codec == NvdecCommon::VideoCodec::H264) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); - } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); - return; - } - - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - - // TODO(ameerj): libavcodec gpu hw acceleration - - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - return; - } - initialized = true; - is_first_frame = true; + Initialize(); } - bool vp9_hidden_frame = false; + bool vp9_hidden_frame = false; AVPacket packet{}; av_init_packet(&packet); std::vector<u8> frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() { } packet.data = frame_data.data(); - packet.size = static_cast<int>(frame_data.size()); + packet.size = static_cast<s32>(frame_data.size()); avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } +std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case NvdecCommon::VideoCodec::None: + return "None"; + case NvdecCommon::VideoCodec::H264: + return "H264"; + case NvdecCommon::VideoCodec::Vp8: + return "VP8"; + case NvdecCommon::VideoCodec::H265: + return "H265"; + case NvdecCommon::VideoCodec::Vp9: + return "VP9"; + default: + return "Unknown"; + } +}; + } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 8a2a6c360..96c823c76 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -34,15 +34,15 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu); + explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); ~Codec(); + /// Initialize the codec, returning success or failure + void Initialize(); + /// Sets NVDEC video stream codec void SetTargetCodec(NvdecCommon::VideoCodec codec); - /// Populate NvdecRegisters state with argument value at the provided offset - void StateWrite(u32 offset, u64 arguments); - /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -51,6 +51,8 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; private: bool initialized{}; @@ -60,10 +62,10 @@ private: AVCodecContext* av_codec_ctx{nullptr}; GPU& gpu; + const NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; - NvdecCommon::NvdecRegisters state{}; std::queue<AVFramePtr> av_frames{}; }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed98..5fb6d45ee 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,134 +45,129 @@ H264::~H264() = default; const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { - H264DecoderContext context{}; + H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); - const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); + const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.frame_data_size); - + frame.resize(context.stream_len); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); - } else { - /// Encode header - H264BitWriter writer{}; - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - const auto chroma_format_idc = - static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); - writer.WriteUe(chroma_format_idc); - if (chroma_format_idc == 3) { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag - writer.WriteBit(false); // Scaling matrix present flag - - const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); - writer.WriteUe(order_cnt_type); - if (order_cnt_type == 0) { - writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); - } else if (order_cnt_type == 1) { - writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / - (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + return frame; + } - writer.WriteUe(16); + // Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const u32 chroma_format_idc = + static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { writer.WriteBit(false); - writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); - writer.WriteUe(pic_height - 1); - writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - - if (!context.h264_parameter_set.frame_mbs_only_flag) { - writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); - } + } - writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); - writer.WriteBit(false); // Frame cropping flag - writer.WriteBit(false); // VUI parameter present flag + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag - writer.End(); + writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); - // H264 PPS - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); + const auto order_cnt_type = + static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + writer.WriteSe(0); + writer.WriteSe(0); writer.WriteUe(0); - writer.WriteUe(0); + } - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); - writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); - writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); - writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); - s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); - pic_init_qp = (pic_init_qp << 26) >> 26; - writer.WriteSe(pic_init_qp); - writer.WriteSe(0); - s32 chroma_qp_index_offset = - static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); - chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - writer.WriteSe(chroma_qp_index_offset); - writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); - writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); - writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); - writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); + } + writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); + writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); + s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); + std::span<const u8> matrix{context.weight_scale}; + writer.WriteScalingList(matrix, index * 16, 16); + } - for (s32 index = 0; index < 6; index++) { + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); - const auto matrix_x4 = - std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); - writer.WriteScalingList(matrix_x4, index * 16, 16); - } - - if (context.h264_parameter_set.transform_8x8_mode_flag) { - for (s32 index = 0; index < 2; index++) { - writer.WriteBit(true); - const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), - context.scaling_matrix_8.end()); - - writer.WriteScalingList(matrix_x8, index * 64, 64); - } + std::span<const u8> matrix{context.weight_scale_8x8}; + writer.WriteScalingList(matrix, index * 64, 64); } + } - s32 chroma_qp_index_offset2 = - static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); - chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + s32 chroma_qp_index_offset2 = + static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); - writer.WriteSe(chroma_qp_index_offset2); + writer.WriteSe(chroma_qp_index_offset2); - writer.End(); + writer.End(); - const auto& encoded_header = writer.GetByteArray(); - frame.resize(encoded_header.size() + context.frame_data_size); - std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.stream_len); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), - context.frame_data_size); - } + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), context.stream_len); return frame; } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { std::vector<u8> scan(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3..bfe84a472 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@ #pragma once +#include <span> #include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); + void WriteScalingList(std::span<const u8> list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -78,40 +80,110 @@ public: const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: + std::vector<u8> frame; + GPU& gpu; + struct H264ParameterSet { - u32 log2_max_pic_order_cnt{}; - u32 delta_pic_order_always_zero_flag{}; - u32 frame_mbs_only_flag{}; - u32 pic_width_in_mbs{}; - u32 pic_height_in_map_units{}; - INSERT_PADDING_WORDS(1); - u32 entropy_coding_mode_flag{}; - u32 bottom_field_pic_order_flag{}; - u32 num_refidx_l0_default_active{}; - u32 num_refidx_l1_default_active{}; - u32 deblocking_filter_control_flag{}; - u32 redundant_pic_count_flag{}; - u32 transform_8x8_mode_flag{}; - INSERT_PADDING_WORDS(9); - u64 flags{}; - u32 frame_number{}; - u32 frame_number2{}; + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_map_units; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + u32 luma_top_offset; ///< 0x3C + u32 luma_bot_offset; ///< 0x40 + u32 luma_frame_offset; ///< 0x44 + u32 chroma_top_offset; ///< 0x48 + u32 chroma_bot_offset; ///< 0x4C + u32 chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; }; - static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); struct H264DecoderContext { - INSERT_PADDING_BYTES(0x48); - u32 frame_data_size{}; - INSERT_PADDING_BYTES(0xc); - H264ParameterSet h264_parameter_set{}; - INSERT_PADDING_BYTES(0x100); - std::array<u8, 0x60> scaling_matrix_4; - std::array<u8, 0x80> scaling_matrix_8; + INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 + u32 stream_len; ///< 0x0048 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C + H264ParameterSet h264_parameter_set; ///< 0x0058 + INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 + std::array<u8, 0x60> weight_scale; ///< 0x01C0 + std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 }; - static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - - std::vector<u8> frame; - GPU& gpu; + static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); + ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); + ASSERT_POSITION(frame_mbs_only_flag, 0x08); + ASSERT_POSITION(pic_width_in_mbs, 0x0C); + ASSERT_POSITION(frame_height_in_map_units, 0x10); + ASSERT_POSITION(tile_format, 0x14); + ASSERT_POSITION(entropy_coding_mode_flag, 0x18); + ASSERT_POSITION(pic_order_present_flag, 0x1C); + ASSERT_POSITION(num_refidx_l0_default_active, 0x20); + ASSERT_POSITION(num_refidx_l1_default_active, 0x24); + ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); + ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); + ASSERT_POSITION(transform_8x8_mode_flag, 0x30); + ASSERT_POSITION(pitch_luma, 0x34); + ASSERT_POSITION(pitch_chroma, 0x38); + ASSERT_POSITION(luma_top_offset, 0x3C); + ASSERT_POSITION(luma_bot_offset, 0x40); + ASSERT_POSITION(luma_frame_offset, 0x44); + ASSERT_POSITION(chroma_top_offset, 0x48); + ASSERT_POSITION(chroma_bot_offset, 0x4C); + ASSERT_POSITION(chroma_frame_offset, 0x50); + ASSERT_POSITION(hist_buffer_size, 0x54); + ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(stream_len, 0x48); + ASSERT_POSITION(h264_parameter_set, 0x58); + ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb31418..902bc2a98 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { - PictureInfo picture_info{}; + PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { - EntropyProbs entropy{}; + EntropyProbs entropy; gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c..2da14f3ca 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU; namespace Decoder { struct Vp9FrameDimensions { - s16 width{}; - s16 height{}; - s16 luma_pitch{}; - s16 chroma_pitch{}; + s16 width; + s16 height; + s16 luma_pitch; + s16 chroma_pitch; }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode { }; struct Segmentation { - u8 enabled{}; - u8 update_map{}; - u8 temporal_update{}; - u8 abs_delta{}; - std::array<u32, 8> feature_mask{}; - std::array<std::array<s16, 4>, 8> feature_data{}; + u8 enabled; + u8 update_map; + u8 temporal_update; + u8 abs_delta; + std::array<u32, 8> feature_mask; + std::array<std::array<s16, 4>, 8> feature_data; }; static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); struct LoopFilter { - u8 mode_ref_delta_enabled{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; + u8 mode_ref_delta_enabled; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; }; static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { - std::array<u8, 36> y_mode_prob{}; - std::array<u8, 64> partition_prob{}; - std::array<u8, 1728> coef_probs{}; - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 3> skip_probs{}; - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 20> classes{}; - std::array<u8, 2> class_0{}; - std::array<u8, 20> prob_bits{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; + std::array<u8, 36> y_mode_prob; ///< 0x0000 + std::array<u8, 64> partition_prob; ///< 0x0024 + std::array<u8, 1728> coef_probs; ///< 0x0064 + std::array<u8, 8> switchable_interp_prob; ///< 0x0724 + std::array<u8, 28> inter_mode_prob; ///< 0x072C + std::array<u8, 4> intra_inter_prob; ///< 0x0748 + std::array<u8, 5> comp_inter_prob; ///< 0x074C + std::array<u8, 10> single_ref_prob; ///< 0x0751 + std::array<u8, 5> comp_ref_prob; ///< 0x075B + std::array<u8, 6> tx_32x32_prob; ///< 0x0760 + std::array<u8, 4> tx_16x16_prob; ///< 0x0766 + std::array<u8, 2> tx_8x8_prob; ///< 0x076A + std::array<u8, 3> skip_probs; ///< 0x076C + std::array<u8, 3> joints; ///< 0x076F + std::array<u8, 2> sign; ///< 0x0772 + std::array<u8, 20> classes; ///< 0x0774 + std::array<u8, 2> class_0; ///< 0x0788 + std::array<u8, 20> prob_bits; ///< 0x078A + std::array<u8, 12> class_0_fr; ///< 0x079E + std::array<u8, 6> fr; ///< 0x07AA + std::array<u8, 2> class_0_hp; ///< 0x07B0 + std::array<u8, 2> high_precision; ///< 0x07B2 }; static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame{}; - bool intra_only{}; - bool last_frame_was_key{}; - bool frame_size_changed{}; - bool error_resilient_mode{}; - bool last_frame_shown{}; - bool show_frame{}; - std::array<s8, 4> ref_frame_sign_bias{}; - s32 base_q_index{}; - s32 y_dc_delta_q{}; - s32 uv_dc_delta_q{}; - s32 uv_ac_delta_q{}; - bool lossless{}; - s32 transform_mode{}; - bool allow_high_precision_mv{}; - s32 interp_filter{}; - s32 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - s32 log2_tile_cols{}; - s32 log2_tile_rows{}; - bool segment_enabled{}; - bool segment_map_update{}; - bool segment_map_temporal_update{}; - s32 segment_abs_delta{}; - std::array<u32, 8> segment_feature_enable{}; - std::array<std::array<s16, 4>, 8> segment_feature_data{}; - bool mode_ref_delta_enabled{}; - bool use_prev_in_find_mv_refs{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; - Vp9EntropyProbs entropy{}; - Vp9FrameDimensions frame_size{}; - u8 first_level{}; - u8 sharpness_level{}; - u32 bitstream_size{}; - std::array<u64, 4> frame_offsets{}; - std::array<bool, 4> refresh_frame{}; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool frame_size_changed; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + std::array<s8, 4> ref_frame_sign_bias; + s32 base_q_index; + s32 y_dc_delta_q; + s32 uv_dc_delta_q; + s32 uv_ac_delta_q; + bool lossless; + s32 transform_mode; + bool allow_high_precision_mv; + s32 interp_filter; + s32 reference_mode; + s8 comp_fixed_ref; + std::array<s8, 2> comp_var_ref; + s32 log2_tile_cols; + s32 log2_tile_rows; + bool segment_enabled; + bool segment_map_update; + bool segment_map_temporal_update; + s32 segment_abs_delta; + std::array<u32, 8> segment_feature_enable; + std::array<std::array<s16, 4>, 8> segment_feature_data; + bool mode_ref_delta_enabled; + bool use_prev_in_find_mv_refs; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; + Vp9EntropyProbs entropy; + Vp9FrameDimensions frame_size; + u8 first_level; + u8 sharpness_level; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; + std::array<bool, 4> refresh_frame; }; struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer { }; struct PictureInfo { - INSERT_PADDING_WORDS(12); - u32 bitstream_size{}; - INSERT_PADDING_WORDS(5); - Vp9FrameDimensions last_frame_size{}; - Vp9FrameDimensions golden_frame_size{}; - Vp9FrameDimensions alt_frame_size{}; - Vp9FrameDimensions current_frame_size{}; - u32 vp9_flags{}; - std::array<s8, 4> ref_frame_sign_bias{}; - u8 first_level{}; - u8 sharpness_level{}; - u8 base_q_index{}; - u8 y_dc_delta_q{}; - u8 uv_ac_delta_q{}; - u8 uv_dc_delta_q{}; - u8 lossless{}; - u8 tx_mode{}; - u8 allow_high_precision_mv{}; - u8 interp_filter{}; - u8 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - u8 log2_tile_cols{}; - u8 log2_tile_rows{}; - Segmentation segmentation{}; - LoopFilter loop_filter{}; - INSERT_PADDING_BYTES(5); - u32 surface_params{}; - INSERT_PADDING_WORDS(3); + INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 + u32 bitstream_size; ///< 0x30 + INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 + Vp9FrameDimensions last_frame_size; ///< 0x48 + Vp9FrameDimensions golden_frame_size; ///< 0x50 + Vp9FrameDimensions alt_frame_size; ///< 0x58 + Vp9FrameDimensions current_frame_size; ///< 0x60 + u32 vp9_flags; ///< 0x68 + std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C + u8 first_level; ///< 0x70 + u8 sharpness_level; ///< 0x71 + u8 base_q_index; ///< 0x72 + u8 y_dc_delta_q; ///< 0x73 + u8 uv_ac_delta_q; ///< 0x74 + u8 uv_dc_delta_q; ///< 0x75 + u8 lossless; ///< 0x76 + u8 tx_mode; ///< 0x77 + u8 allow_high_precision_mv; ///< 0x78 + u8 interp_filter; ///< 0x79 + u8 reference_mode; ///< 0x7A + s8 comp_fixed_ref; ///< 0x7B + std::array<s8, 2> comp_var_ref; ///< 0x7C + u8 log2_tile_cols; ///< 0x7E + u8 log2_tile_rows; ///< 0x7F + Segmentation segmentation; ///< 0x80 + LoopFilter loop_filter; ///< 0xE4 + INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB + u32 surface_params; ///< 0xF0 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 [[nodiscard]] Vp9PictureInfo Convert() const { return { @@ -176,6 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .show_frame = false, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo { !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, + .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, .bitstream_size = bitstream_size, + .frame_offsets{}, + .refresh_frame{}, }; } }; static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { - INSERT_PADDING_BYTES(1024); - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - INSERT_PADDING_BYTES(80); - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> y_mode_prob_e8{}; - std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; - INSERT_PADDING_BYTES(64); - std::array<u8, 64> partition_prob{}; - INSERT_PADDING_BYTES(10); - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 3> skip_probs{}; - INSERT_PADDING_BYTES(1); - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 2> class_0{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; - std::array<u8, 20> classes{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 20> pred_bits{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - INSERT_PADDING_BYTES(17); - std::array<u8, 2304> coef_probs{}; + INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 + std::array<u8, 28> inter_mode_prob; ///< 0x0400 + std::array<u8, 4> intra_inter_prob; ///< 0x041C + INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 + std::array<u8, 2> tx_8x8_prob; ///< 0x0470 + std::array<u8, 4> tx_16x16_prob; ///< 0x0472 + std::array<u8, 6> tx_32x32_prob; ///< 0x0476 + std::array<u8, 4> y_mode_prob_e8; ///< 0x047C + std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 + INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 + std::array<u8, 64> partition_prob; ///< 0x04E0 + INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 + std::array<u8, 8> switchable_interp_prob; ///< 0x052A + std::array<u8, 5> comp_inter_prob; ///< 0x0532 + std::array<u8, 3> skip_probs; ///< 0x0537 + INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A + std::array<u8, 3> joints; ///< 0x053B + std::array<u8, 2> sign; ///< 0x053E + std::array<u8, 2> class_0; ///< 0x0540 + std::array<u8, 6> fr; ///< 0x0542 + std::array<u8, 2> class_0_hp; ///< 0x0548 + std::array<u8, 2> high_precision; ///< 0x054A + std::array<u8, 20> classes; ///< 0x054C + std::array<u8, 12> class_0_fr; ///< 0x0560 + std::array<u8, 20> pred_bits; ///< 0x056C + std::array<u8, 10> single_ref_prob; ///< 0x0580 + std::array<u8, 5> comp_ref_prob; ///< 0x058A + INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F + std::array<u8, 2304> coef_probs; ///< 0x05A0 void Convert(Vp9EntropyProbs& fc) { fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement { }; struct FrameContexts { - s64 from{}; - bool adapted{}; - Vp9EntropyProbs probs{}; + s64 from; + bool adapted; + Vp9EntropyProbs probs; }; +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(PictureInfo, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION + }; // namespace Decoder }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd..b5e3b70fc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@ namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} +#define NVDEC_REG_INDEX(field_name) \ + (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { - if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast<u32>(method), argument); - } else { - codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); - } +void Nvdec::ProcessMethod(u32 method, u32 argument) { + state.reg_array[method] = static_cast<u64>(argument) << 8; switch (method) { - case Method::SetVideoCodec: + case NVDEC_REG_INDEX(set_codec_id): codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); break; - case Method::Execute: + case NVDEC_REG_INDEX(execute): Execute(); break; } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b8..6e1da0b04 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU; class Nvdec { public: - enum class Method : u32 { - SetVideoCodec = 0x80, - Execute = 0xc0, - }; - explicit Nvdec(GPU& gpu); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); + void ProcessMethod(u32 method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private: void Execute(); GPU& gpu; + NvdecCommon::NvdecRegisters state; std::unique_ptr<Codec> codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d..6a24e00a0 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@ #pragma once +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra::NvdecCommon { -struct NvdecRegisters { - INSERT_PADDING_WORDS(256); - u64 set_codec_id{}; - INSERT_PADDING_WORDS(254); - u64 set_platform_id{}; - u64 picture_info_offset{}; - u64 frame_bitstream_offset{}; - u64 frame_number{}; - u64 h264_slice_data_offsets{}; - u64 h264_mv_dump_offset{}; - INSERT_PADDING_WORDS(6); - u64 frame_stats_offset{}; - u64 h264_last_surface_luma_offset{}; - u64 h264_last_surface_chroma_offset{}; - std::array<u64, 17> surface_luma_offset{}; - std::array<u64, 17> surface_chroma_offset{}; - INSERT_PADDING_WORDS(132); - u64 vp9_entropy_probs_offset{}; - u64 vp9_backward_updates_offset{}; - u64 vp9_last_frame_segmap_offset{}; - u64 vp9_curr_frame_segmap_offset{}; - INSERT_PADDING_WORDS(2); - u64 vp9_last_frame_mvs_offset{}; - u64 vp9_curr_frame_mvs_offset{}; - INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { Vp9 = 0x9, }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { + static constexpr std::size_t NUM_REGS = 0x178; + + union { + struct { + INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 + VideoCodec set_codec_id; ///< 0x0400 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 + u64 execute; ///< 0x0600 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 + struct { ///< 0x0800 + union { + BitField<0, 3, VideoCodec> codec; + BitField<4, 1, u64> gp_timer_on; + BitField<13, 1, u64> mb_timer_on; + BitField<14, 1, u64> intra_frame_pslc; + BitField<17, 1, u64> all_intra_frame; + }; + } control_params; + u64 picture_info_offset; ///< 0x0808 + u64 frame_bitstream_offset; ///< 0x0810 + u64 frame_number; ///< 0x0818 + u64 h264_slice_data_offsets; ///< 0x0820 + u64 h264_mv_dump_offset; ///< 0x0828 + INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 + u64 frame_stats_offset; ///< 0x0848 + u64 h264_last_surface_luma_offset; ///< 0x0850 + u64 h264_last_surface_chroma_offset; ///< 0x0858 + std::array<u64, 17> surface_luma_offset; ///< 0x0860 + std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 + INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + u64 vp9_entropy_probs_offset; ///< 0x0B80 + u64 vp9_backward_updates_offset; ///< 0x0B88 + u64 vp9_last_frame_segmap_offset; ///< 0x0B90 + u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 + u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 + u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 + }; + std::array<u64, NUM_REGS> reg_array; + }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION + } // namespace Tegra::NvdecCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2208e1922..c9cff7450 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,7 +18,10 @@ set(SHADER_FILES vulkan_uint8.comp ) -find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) +find_program(GLSLANGVALIDATOR "glslangValidator") +if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND") + message(FATAL_ERROR "Required program `glslangValidator` not found.") +endif() set(GLSL_FLAGS "") set(QUIET_FLAG "--quiet") diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 320ee8d30..63d8ad42a 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -42,6 +42,8 @@ public: [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; + [[nodiscard]] virtual std::string GetDeviceVendor() const = 0; + // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f4532ca7..3b00614e7 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -202,13 +202,13 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); throw std::runtime_error{"Insufficient version"}; } - const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); - const bool is_nvidia = vendor == "NVIDIA Corporation"; - const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; + const bool is_nvidia = vendor_name == "NVIDIA Corporation"; + const bool is_amd = vendor_name == "ATI Technologies Inc."; + const bool is_intel = vendor_name == "Intel"; #ifdef __unix__ const bool is_linux = true; @@ -275,6 +275,56 @@ Device::Device() { } } +std::string Device::GetVendorName() const { + if (vendor_name == "NVIDIA Corporation") { + return "NVIDIA"; + } + if (vendor_name == "ATI Technologies Inc.") { + return "AMD"; + } + if (vendor_name == "Intel") { + // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris. + // Simply return `INTEL` for those as well as the Windows driver. + return "INTEL"; + } + if (vendor_name == "Intel Open Source Technology Center") { + return "I965"; + } + if (vendor_name == "Mesa Project") { + return "I915"; + } + if (vendor_name == "Mesa/X.org") { + // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return + // MESA instead of one of those driver names. + return "MESA"; + } + if (vendor_name == "AMD") { + return "RADEONSI"; + } + if (vendor_name == "nouveau") { + return "NOUVEAU"; + } + if (vendor_name == "X.Org") { + return "R600"; + } + if (vendor_name == "Collabora Ltd") { + return "ZINK"; + } + if (vendor_name == "Intel Corporation") { + return "OPENSWR"; + } + if (vendor_name == "Microsoft Corporation") { + return "D3D12"; + } + if (vendor_name == "NVIDIA") { + // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default + // strategy would have returned `NVIDIA` here for this driver, the same result as the + // proprietary driver. + return "TEGRA"; + } + return vendor_name; +} + Device::Device(std::nullptr_t) { max_uniform_buffers.fill(std::numeric_limits<u32>::max()); uniform_buffer_alignment = 4; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index f24bd0c7b..2c2b13767 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -22,6 +22,8 @@ public: explicit Device(); explicit Device(std::nullptr_t); + [[nodiscard]] std::string GetVendorName() const; + u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; } @@ -130,6 +132,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); + std::string vendor_name; std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; size_t uniform_buffer_alignment{}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e892bd9ba..ff0f03e99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -342,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, VideoCommon::SubresourceLayers subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(0), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(subresource.base_layer), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyOrigin{ @@ -367,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 VideoCommon::SubresourceLayers dst_subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(1), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(dst_subresource.num_layers), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyRegion{ diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc19a110f..0b66f8332 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -70,6 +70,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetVendorName(); + } + private: /// Initializes the OpenGL state and creates persistent objects. void InitOpenGLObjects(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 72071316c..d7d17e110 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,6 +47,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetDriverName(); + } + private: void Report() const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c7cfd02b6..d8dbd3824 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1057,9 +1057,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector<ImageId> right_aliased_ids; std::vector<ImageId> bad_overlap_ids; ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { - if (info.type != overlap.info.type) { - return; - } if (info.type == ImageType::Linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 23814afd2..f214510da 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want return (supported_usage & wanted_usage) == wanted_usage; } +std::string Device::GetDriverName() const { + switch (driver_id) { + case VK_DRIVER_ID_AMD_PROPRIETARY: + return "AMD"; + case VK_DRIVER_ID_AMD_OPEN_SOURCE: + return "AMDVLK"; + case VK_DRIVER_ID_MESA_RADV: + return "RADV"; + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + return "NVIDIA"; + case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS: + return "INTEL"; + case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA: + return "ANV"; + case VK_DRIVER_ID_MESA_LLVMPIPE: + return "LAVAPIPE"; + default: + return vendor_name; + } +} + void Device::CheckSuitability(bool requires_swapchain) const { std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; bool has_swapchain = false; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 88b298196..96c0f8c60 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -45,6 +45,9 @@ public: /// Reports a shader to Nsight Aftermath. void SaveShader(const std::vector<u32>& spirv) const; + /// Returns the name of the VkDriverId reported from Vulkan. + std::string GetDriverName() const; + /// Returns the dispatch loader with direct function pointers of the device. const vk::DeviceDispatch& GetDispatchLoader() const { return dld; |