diff options
Diffstat (limited to 'src')
42 files changed, 933 insertions, 495 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ed94e5d4e..65a4922ea 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,6 +55,7 @@ if (MSVC) /we4018 # 'expression': signed/unsigned mismatch /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled /we4101 # 'identifier': unreferenced local variable + /we4189 # 'identifier': local variable is initialized but not referenced /we4265 # 'class': class has virtual functions, but destructor is not virtual /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data /we4305 # 'context': truncation from 'type1' to 'type2' diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 80ffddb10..ccd5ca6cc 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -12,6 +12,7 @@ #include "audio_core/voice_context.h" #include "common/logging/log.h" #include "common/settings.h" +#include "core/core_timing.h" #include "core/memory.h" namespace { @@ -68,7 +69,9 @@ namespace { } // namespace namespace AudioCore { -AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, +constexpr s32 NUM_BUFFERS = 2; + +AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_, AudioCommon::AudioRendererParameter params, Stream::ReleaseCallback&& release_callback, std::size_t instance_number) @@ -77,7 +80,8 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory sink_context(params.sink_count), splitter_context(), voices(params.voice_count), memory{memory_}, command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context, - memory) { + memory), + core_timing{core_timing_} { behavior_info.SetUserRevision(params.revision); splitter_context.Initialize(behavior_info, params.splitter_count, params.num_splitter_send_channels); @@ -86,16 +90,27 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory stream = audio_out->OpenStream( core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback)); - audio_out->StartStream(stream); - - QueueMixedBuffer(0); - QueueMixedBuffer(1); - QueueMixedBuffer(2); - QueueMixedBuffer(3); + process_event = Core::Timing::CreateEvent( + fmt::format("AudioRenderer-Instance{}-Process", instance_number), + [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); }); + for (s32 i = 0; i < NUM_BUFFERS; ++i) { + QueueMixedBuffer(i); + } } AudioRenderer::~AudioRenderer() = default; +ResultCode AudioRenderer::Start() { + audio_out->StartStream(stream); + ReleaseAndQueueBuffers(); + return ResultSuccess; +} + +ResultCode AudioRenderer::Stop() { + audio_out->StopStream(stream); + return ResultSuccess; +} + u32 AudioRenderer::GetSampleRate() const { return worker_params.sample_rate; } @@ -114,7 +129,7 @@ Stream::State AudioRenderer::GetStreamState() const { ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params, std::vector<u8>& output_params) { - + std::scoped_lock lock{mutex}; InfoUpdater info_updater{input_params, output_params, behavior_info}; if (!info_updater.UpdateBehaviorInfo(behavior_info)) { @@ -194,9 +209,6 @@ ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_param LOG_ERROR(Audio, "Audio buffers were not consumed!"); return AudioCommon::Audren::ERR_INVALID_PARAMETERS; } - - ReleaseAndQueueBuffers(); - return ResultSuccess; } @@ -220,10 +232,8 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { command_generator.PostCommand(); // Base sample size std::size_t BUFFER_SIZE{worker_params.sample_count}; - // Samples - std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels()); - // Make sure to clear our samples - std::memset(buffer.data(), 0, buffer.size() * sizeof(s16)); + // Samples, making sure to clear + std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels(), 0); if (sink_context.InUse()) { const auto stream_channel_count = stream->GetNumChannels(); @@ -315,10 +325,24 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { } void AudioRenderer::ReleaseAndQueueBuffers() { - const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)}; - for (const auto& tag : released_buffers) { - QueueMixedBuffer(tag); + if (!stream->IsPlaying()) { + return; } + + { + std::scoped_lock lock{mutex}; + const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)}; + for (const auto& tag : released_buffers) { + QueueMixedBuffer(tag); + } + } + + const f32 sample_rate = static_cast<f32>(GetSampleRate()); + const f32 sample_count = static_cast<f32>(GetSampleCount()); + const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240)); + const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1; + const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1)); + core_timing.ScheduleEvent(next_event_time, process_event, {}); } } // namespace AudioCore diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 18567f618..88fdd13dd 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -6,6 +6,7 @@ #include <array> #include <memory> +#include <mutex> #include <vector> #include "audio_core/behavior_info.h" @@ -45,6 +46,8 @@ public: [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, std::vector<u8>& output_params); + [[nodiscard]] ResultCode Start(); + [[nodiscard]] ResultCode Stop(); void QueueMixedBuffer(Buffer::Tag tag); void ReleaseAndQueueBuffers(); [[nodiscard]] u32 GetSampleRate() const; @@ -68,6 +71,9 @@ private: Core::Memory::Memory& memory; CommandGenerator command_generator; std::size_t elapsed_frame_count{}; + Core::Timing::CoreTiming& core_timing; + std::shared_ptr<Core::Timing::EventType> process_event; + std::mutex mutex; }; } // namespace AudioCore diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp index 437cc5ccd..27437f1ea 100644 --- a/src/audio_core/command_generator.cpp +++ b/src/audio_core/command_generator.cpp @@ -795,7 +795,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta state.lowpass_1 = 0.0f; } else { const auto a = 1.0f - hf_gain; - const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference / + const auto b = 2.0f * (2.0f - hf_gain * CosD(256.0f * info.hf_reference / static_cast<f32>(info.sample_rate))); const auto c = std::sqrt(b * b - 4.0f * a * a); @@ -843,7 +843,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta } const auto max_early_delay = state.early_delay_line.GetMaxDelay(); - const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f); + const auto reflection_time = 1000.0f * (0.9998f * info.reverb_delay + 0.02f); for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) { const auto length = AudioCommon::CalculateDelaySamples( sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]); @@ -1004,7 +1004,8 @@ void CommandGenerator::GenerateFinalMixCommand() { } s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, - s32 sample_count, s32 channel, std::size_t mix_offset) { + s32 sample_start_offset, s32 sample_end_offset, s32 sample_count, + s32 channel, std::size_t mix_offset) { const auto& in_params = voice_info.GetInParams(); const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; if (wave_buffer.buffer_address == 0) { @@ -1013,14 +1014,12 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s if (wave_buffer.buffer_size == 0) { return 0; } - if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) { + if (sample_end_offset < sample_start_offset) { return 0; } - const auto samples_remaining = - (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset; + const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset; const auto start_offset = - ((wave_buffer.start_sample_offset + dsp_state.offset) * in_params.channel_count) * - sizeof(s16); + ((dsp_state.offset + sample_start_offset) * in_params.channel_count) * sizeof(s16); const auto buffer_pos = wave_buffer.buffer_address + start_offset; const auto samples_processed = std::min(sample_count, samples_remaining); @@ -1044,8 +1043,8 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s } s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, - s32 sample_count, [[maybe_unused]] s32 channel, - std::size_t mix_offset) { + s32 sample_start_offset, s32 sample_end_offset, s32 sample_count, + [[maybe_unused]] s32 channel, std::size_t mix_offset) { const auto& in_params = voice_info.GetInParams(); const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; if (wave_buffer.buffer_address == 0) { @@ -1054,7 +1053,7 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s if (wave_buffer.buffer_size == 0) { return 0; } - if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) { + if (sample_end_offset < sample_start_offset) { return 0; } @@ -1079,10 +1078,9 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s s32 coef1 = coeffs[idx * 2]; s32 coef2 = coeffs[idx * 2 + 1]; - const auto samples_remaining = - (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset; + const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset; const auto samples_processed = std::min(sample_count, samples_remaining); - const auto sample_pos = wave_buffer.start_sample_offset + dsp_state.offset; + const auto sample_pos = dsp_state.offset + sample_start_offset; const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME; auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) + @@ -1210,9 +1208,8 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o } std::size_t temp_mix_offset{}; - bool is_buffer_completed{false}; auto samples_remaining = sample_count; - while (samples_remaining > 0 && !is_buffer_completed) { + while (samples_remaining > 0) { const auto samples_to_output = std::min(samples_remaining, min_required_samples); const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15; @@ -1229,24 +1226,38 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; // No more data can be read if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) { - is_buffer_completed = true; break; } if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 && wave_buffer.context_address != 0 && wave_buffer.context_size != 0) { - // TODO(ogniK): ADPCM loop context + memory.ReadBlock(wave_buffer.context_address, &dsp_state.context, + sizeof(ADPCMContext)); + } + + s32 samples_offset_start; + s32 samples_offset_end; + if (dsp_state.loop_count > 0 && wave_buffer.loop_start_sample != 0 && + wave_buffer.loop_end_sample != 0 && + wave_buffer.loop_start_sample <= wave_buffer.loop_end_sample) { + samples_offset_start = wave_buffer.loop_start_sample; + samples_offset_end = wave_buffer.loop_end_sample; + } else { + samples_offset_start = wave_buffer.start_sample_offset; + samples_offset_end = wave_buffer.end_sample_offset; } s32 samples_decoded{0}; switch (in_params.sample_format) { case SampleFormat::Pcm16: - samples_decoded = DecodePcm16(voice_info, dsp_state, samples_to_read - samples_read, - channel, temp_mix_offset); + samples_decoded = + DecodePcm16(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); break; case SampleFormat::Adpcm: - samples_decoded = DecodeAdpcm(voice_info, dsp_state, samples_to_read - samples_read, - channel, temp_mix_offset); + samples_decoded = + DecodeAdpcm(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); break; default: UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format); @@ -1257,15 +1268,19 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o dsp_state.offset += samples_decoded; dsp_state.played_sample_count += samples_decoded; - if (dsp_state.offset >= - (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) || + if (dsp_state.offset >= (samples_offset_end - samples_offset_start) || samples_decoded == 0) { // Reset our sample offset dsp_state.offset = 0; if (wave_buffer.is_looping) { - if (samples_decoded == 0) { + dsp_state.loop_count++; + if (wave_buffer.loop_count > 0 && + (dsp_state.loop_count > wave_buffer.loop_count || samples_decoded == 0)) { // End of our buffer - is_buffer_completed = true; + voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer); + } + + if (samples_decoded == 0) { break; } @@ -1273,15 +1288,8 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o dsp_state.played_sample_count = 0; } } else { - // Update our wave buffer states - dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false; - dsp_state.wave_buffer_consumed++; - dsp_state.wave_buffer_index = - (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS; - if (wave_buffer.end_of_stream) { - dsp_state.played_sample_count = 0; - } + voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer); } } } diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h index 2ebb755b0..673e4fbef 100644 --- a/src/audio_core/command_generator.h +++ b/src/audio_core/command_generator.h @@ -86,10 +86,10 @@ private: std::vector<u8>& work_buffer); void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear); // DSP Code - s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, - s32 channel, std::size_t mix_offset); - s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, - s32 channel, std::size_t mix_offset); + s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, + s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); + s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, + s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state, s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id); diff --git a/src/audio_core/info_updater.cpp b/src/audio_core/info_updater.cpp index 4a5b1b4ab..9b4ca1851 100644 --- a/src/audio_core/info_updater.cpp +++ b/src/audio_core/info_updater.cpp @@ -189,9 +189,6 @@ bool InfoUpdater::UpdateVoices(VoiceContext& voice_context, if (voice_in_params.is_new) { // Default our values for our voice voice_info.Initialize(); - if (channel_count == 0 || channel_count > AudioCommon::MAX_CHANNEL_COUNT) { - continue; - } // Zero out our voice states for (std::size_t channel = 0; channel < channel_count; channel++) { diff --git a/src/audio_core/voice_context.cpp b/src/audio_core/voice_context.cpp index 867b8fc6b..d8c954b60 100644 --- a/src/audio_core/voice_context.cpp +++ b/src/audio_core/voice_context.cpp @@ -66,7 +66,7 @@ void ServerVoiceInfo::Initialize() { in_params.last_volume = 0.0f; in_params.biquad_filter.fill({}); in_params.wave_buffer_count = 0; - in_params.wave_bufffer_head = 0; + in_params.wave_buffer_head = 0; in_params.mix_id = AudioCommon::NO_MIX; in_params.splitter_info_id = AudioCommon::NO_SPLITTER; in_params.additional_params_address = 0; @@ -75,7 +75,7 @@ void ServerVoiceInfo::Initialize() { out_params.played_sample_count = 0; out_params.wave_buffer_consumed = 0; in_params.voice_drop_flag = false; - in_params.buffer_mapped = false; + in_params.buffer_mapped = true; in_params.wave_buffer_flush_request_count = 0; in_params.was_biquad_filter_enabled.fill(false); @@ -126,7 +126,7 @@ void ServerVoiceInfo::UpdateParameters(const VoiceInfo::InParams& voice_in, in_params.volume = voice_in.volume; in_params.biquad_filter = voice_in.biquad_filter; in_params.wave_buffer_count = voice_in.wave_buffer_count; - in_params.wave_bufffer_head = voice_in.wave_buffer_head; + in_params.wave_buffer_head = voice_in.wave_buffer_head; if (behavior_info.IsFlushVoiceWaveBuffersSupported()) { const auto in_request_count = in_params.wave_buffer_flush_request_count; const auto voice_request_count = voice_in.wave_buffer_flush_request_count; @@ -185,14 +185,16 @@ void ServerVoiceInfo::UpdateWaveBuffers( wave_buffer.buffer_size = 0; wave_buffer.context_address = 0; wave_buffer.context_size = 0; + wave_buffer.loop_start_sample = 0; + wave_buffer.loop_end_sample = 0; wave_buffer.sent_to_dsp = true; } // Mark all our wave buffers as invalid for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count); channel++) { - for (auto& is_valid : voice_states[channel]->is_wave_buffer_valid) { - is_valid = false; + for (std::size_t i = 0; i < AudioCommon::MAX_WAVE_BUFFERS; ++i) { + voice_states[channel]->is_wave_buffer_valid[i] = false; } } } @@ -211,7 +213,7 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer, const WaveBuffer& in_wave_buffer, SampleFormat sample_format, bool is_buffer_valid, [[maybe_unused]] BehaviorInfo& behavior_info) { - if (!is_buffer_valid && out_wavebuffer.sent_to_dsp) { + if (!is_buffer_valid && out_wavebuffer.sent_to_dsp && out_wavebuffer.buffer_address != 0) { out_wavebuffer.buffer_address = 0; out_wavebuffer.buffer_size = 0; } @@ -219,11 +221,40 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer, if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) { // Validate sample offset sizings if (sample_format == SampleFormat::Pcm16) { - const auto buffer_size = in_wave_buffer.buffer_size; - if (in_wave_buffer.start_sample_offset < 0 || in_wave_buffer.end_sample_offset < 0 || - (buffer_size < (sizeof(s16) * in_wave_buffer.start_sample_offset)) || - (buffer_size < (sizeof(s16) * in_wave_buffer.end_sample_offset))) { + const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size); + const s64 start = sizeof(s16) * in_wave_buffer.start_sample_offset; + const s64 end = sizeof(s16) * in_wave_buffer.end_sample_offset; + if (0 > start || start > buffer_size || 0 > end || end > buffer_size) { // TODO(ogniK): Write error info + LOG_ERROR(Audio, + "PCM16 wavebuffer has an invalid size. Buffer has size 0x{:08X}, but " + "offsets were " + "{:08X} - 0x{:08X}", + buffer_size, sizeof(s16) * in_wave_buffer.start_sample_offset, + sizeof(s16) * in_wave_buffer.end_sample_offset); + return; + } + } else if (sample_format == SampleFormat::Adpcm) { + const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size); + const s64 start_frames = in_wave_buffer.start_sample_offset / 14; + const s64 start_extra = in_wave_buffer.start_sample_offset % 14 == 0 + ? 0 + : (in_wave_buffer.start_sample_offset % 14) / 2 + 1 + + (in_wave_buffer.start_sample_offset % 2); + const s64 start = start_frames * 8 + start_extra; + const s64 end_frames = in_wave_buffer.end_sample_offset / 14; + const s64 end_extra = in_wave_buffer.end_sample_offset % 14 == 0 + ? 0 + : (in_wave_buffer.end_sample_offset % 14) / 2 + 1 + + (in_wave_buffer.end_sample_offset % 2); + const s64 end = end_frames * 8 + end_extra; + if (in_wave_buffer.start_sample_offset < 0 || start > buffer_size || + in_wave_buffer.end_sample_offset < 0 || end > buffer_size) { + LOG_ERROR(Audio, + "ADPMC wavebuffer has an invalid size. Buffer has size 0x{:08X}, but " + "offsets were " + "{:08X} - 0x{:08X}", + in_wave_buffer.buffer_size, start, end); return; } } @@ -239,29 +270,34 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer, out_wavebuffer.buffer_size = in_wave_buffer.buffer_size; out_wavebuffer.context_address = in_wave_buffer.context_address; out_wavebuffer.context_size = in_wave_buffer.context_size; + out_wavebuffer.loop_start_sample = in_wave_buffer.loop_start_sample; + out_wavebuffer.loop_end_sample = in_wave_buffer.loop_end_sample; in_params.buffer_mapped = in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0; // TODO(ogniK): Pool mapper attachment // TODO(ogniK): IsAdpcmLoopContextBugFixed + if (sample_format == SampleFormat::Adpcm && in_wave_buffer.context_address != 0 && + in_wave_buffer.context_size != 0 && behavior_info.IsAdpcmLoopContextBugFixed()) { + } else { + out_wavebuffer.context_address = 0; + out_wavebuffer.context_size = 0; + } } } void ServerVoiceInfo::WriteOutStatus( VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) { - if (voice_in.is_new) { + if (voice_in.is_new || in_params.is_new) { in_params.is_new = true; voice_out.wave_buffer_consumed = 0; voice_out.played_sample_count = 0; voice_out.voice_dropped = false; - } else if (!in_params.is_new) { - voice_out.wave_buffer_consumed = voice_states[0]->wave_buffer_consumed; - voice_out.played_sample_count = voice_states[0]->played_sample_count; - voice_out.voice_dropped = in_params.voice_drop_flag; } else { - voice_out.wave_buffer_consumed = 0; - voice_out.played_sample_count = 0; - voice_out.voice_dropped = false; + const auto& state = voice_states[0]; + voice_out.wave_buffer_consumed = state->wave_buffer_consumed; + voice_out.played_sample_count = state->played_sample_count; + voice_out.voice_dropped = state->voice_dropped; } } @@ -283,7 +319,8 @@ ServerVoiceInfo::OutParams& ServerVoiceInfo::GetOutParams() { bool ServerVoiceInfo::ShouldSkip() const { // TODO(ogniK): Handle unmapped wave buffers or parameters - return !in_params.in_use || (in_params.wave_buffer_count == 0) || in_params.voice_drop_flag; + return !in_params.in_use || in_params.wave_buffer_count == 0 || !in_params.buffer_mapped || + in_params.voice_drop_flag; } bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) { @@ -381,7 +418,7 @@ bool ServerVoiceInfo::UpdateParametersForCommandGeneration( void ServerVoiceInfo::FlushWaveBuffers( u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states, s32 channel_count) { - auto wave_head = in_params.wave_bufffer_head; + auto wave_head = in_params.wave_buffer_head; for (u8 i = 0; i < flush_count; i++) { in_params.wave_buffer[wave_head].sent_to_dsp = true; @@ -401,6 +438,17 @@ bool ServerVoiceInfo::HasValidWaveBuffer(const VoiceState* state) const { return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end(); } +void ServerVoiceInfo::SetWaveBufferCompleted(VoiceState& dsp_state, + const ServerWaveBuffer& wave_buffer) { + dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false; + dsp_state.wave_buffer_consumed++; + dsp_state.wave_buffer_index = (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS; + dsp_state.loop_count = 0; + if (wave_buffer.end_of_stream) { + dsp_state.played_sample_count = 0; + } +} + VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} { for (std::size_t i = 0; i < voice_count; i++) { voice_channel_resources.emplace_back(static_cast<s32>(i)); diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h index 70359cadb..e1050897b 100644 --- a/src/audio_core/voice_context.h +++ b/src/audio_core/voice_context.h @@ -60,10 +60,12 @@ struct WaveBuffer { u8 is_looping{}; u8 end_of_stream{}; u8 sent_to_server{}; - INSERT_PADDING_BYTES(5); + INSERT_PADDING_BYTES(1); + s32 loop_count{}; u64 context_address{}; u64 context_size{}; - INSERT_PADDING_BYTES(8); + u32 loop_start_sample{}; + u32 loop_end_sample{}; }; static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size"); @@ -76,6 +78,9 @@ struct ServerWaveBuffer { bool end_of_stream{}; VAddr context_address{}; std::size_t context_size{}; + s32 loop_count{}; + u32 loop_start_sample{}; + u32 loop_end_sample{}; bool sent_to_dsp{true}; }; @@ -108,6 +113,7 @@ struct VoiceState { u32 external_context_size; bool is_external_context_used; bool voice_dropped; + s32 loop_count; }; class VoiceChannelResource { @@ -206,7 +212,7 @@ public: float last_volume{}; std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{}; s32 wave_buffer_count{}; - s16 wave_bufffer_head{}; + s16 wave_buffer_head{}; INSERT_PADDING_BYTES(2); BehaviorFlags behavior_flags{}; VAddr additional_params_address{}; @@ -252,6 +258,7 @@ public: void FlushWaveBuffers(u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states, s32 channel_count); + void SetWaveBufferCompleted(VoiceState& dsp_state, const ServerWaveBuffer& wave_buffer); private: std::vector<s16> stored_samples; diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index 53b8b7ca0..7c0950bb0 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -345,8 +345,10 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList( static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type, const Service::FileSystem::FileSystemController& fs_controller) { const auto load_dir = fs_controller.GetModificationLoadRoot(title_id); + const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || - load_dir == nullptr || load_dir->GetSize() <= 0) { + ((load_dir == nullptr || load_dir->GetSize() <= 0) && + (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) { return; } @@ -356,7 +358,10 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t } const auto& disabled = Settings::values.disabled_addons[title_id]; - auto patch_dirs = load_dir->GetSubdirectories(); + std::vector<VirtualDir> patch_dirs = load_dir->GetSubdirectories(); + if (std::find(disabled.cbegin(), disabled.cend(), "SDMC") == disabled.cend()) { + patch_dirs.push_back(sdmc_load_dir); + } std::sort(patch_dirs.begin(), patch_dirs.end(), [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); }); @@ -402,7 +407,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t } VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type, - VirtualFile update_raw) const { + VirtualFile update_raw, bool apply_layeredfs) const { const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}", title_id, static_cast<u8>(type)); @@ -442,7 +447,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content } // LayeredFS - ApplyLayeredFS(romfs, title_id, type, fs_controller); + if (apply_layeredfs) { + ApplyLayeredFS(romfs, title_id, type, fs_controller); + } return romfs; } @@ -524,6 +531,15 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u } } + // SDMC mod directory (RomFS LayeredFS) + const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); + if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0 && + IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "romfs"))) { + const auto mod_disabled = + std::find(disabled.begin(), disabled.end(), "SDMC") != disabled.end(); + out.insert_or_assign(mod_disabled ? "[D] SDMC" : "SDMC", "LayeredFS"); + } + // DLC const auto dlc_entries = content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data); diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h index fb1853035..3be871f35 100644 --- a/src/core/file_sys/patch_manager.h +++ b/src/core/file_sys/patch_manager.h @@ -64,7 +64,8 @@ public: // - LayeredFS [[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset, ContentRecordType type = ContentRecordType::Program, - VirtualFile update_raw = nullptr) const; + VirtualFile update_raw = nullptr, + bool apply_layeredfs = true) const; // Returns a vector of pairs between patch names and patch versions. // i.e. Update 3.2.2 will return {"Update", "3.2.2"} diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp index cb56d8f2d..e5c72cd4d 100644 --- a/src/core/file_sys/sdmc_factory.cpp +++ b/src/core/file_sys/sdmc_factory.cpp @@ -12,23 +12,32 @@ namespace FileSys { constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB -SDMCFactory::SDMCFactory(VirtualDir dir_) - : dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>( - GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"), - [](const VirtualFile& file, const NcaID& id) { - return NAX{file, id}.GetDecrypted(); - })), +SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_) + : sd_dir(std::move(sd_dir_)), sd_mod_dir(std::move(sd_mod_dir_)), + contents(std::make_unique<RegisteredCache>( + GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/registered"), + [](const VirtualFile& file, const NcaID& id) { + return NAX{file, id}.GetDecrypted(); + })), placeholder(std::make_unique<PlaceholderCache>( - GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/placehld"))) {} + GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/placehld"))) {} SDMCFactory::~SDMCFactory() = default; ResultVal<VirtualDir> SDMCFactory::Open() const { - return MakeResult<VirtualDir>(dir); + return MakeResult<VirtualDir>(sd_dir); +} + +VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const { + // LayeredFS doesn't work on updates and title id-less homebrew + if (title_id == 0 || (title_id & 0xFFF) == 0x800) { + return nullptr; + } + return GetOrCreateDirectoryRelative(sd_mod_dir, fmt::format("/{:016X}", title_id)); } VirtualDir SDMCFactory::GetSDMCContentDirectory() const { - return GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents"); + return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents"); } RegisteredCache* SDMCFactory::GetSDMCContents() const { @@ -40,11 +49,11 @@ PlaceholderCache* SDMCFactory::GetSDMCPlaceholder() const { } VirtualDir SDMCFactory::GetImageDirectory() const { - return GetOrCreateDirectoryRelative(dir, "/Nintendo/Album"); + return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Album"); } u64 SDMCFactory::GetSDMCFreeSpace() const { - return GetSDMCTotalSpace() - dir->GetSize(); + return GetSDMCTotalSpace() - sd_dir->GetSize(); } u64 SDMCFactory::GetSDMCTotalSpace() const { diff --git a/src/core/file_sys/sdmc_factory.h b/src/core/file_sys/sdmc_factory.h index 2bb92ba93..3a3d11f3a 100644 --- a/src/core/file_sys/sdmc_factory.h +++ b/src/core/file_sys/sdmc_factory.h @@ -16,11 +16,12 @@ class PlaceholderCache; /// File system interface to the SDCard archive class SDMCFactory { public: - explicit SDMCFactory(VirtualDir dir); + explicit SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_); ~SDMCFactory(); ResultVal<VirtualDir> Open() const; + VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const; VirtualDir GetSDMCContentDirectory() const; RegisteredCache* GetSDMCContents() const; @@ -32,7 +33,8 @@ public: u64 GetSDMCTotalSpace() const; private: - VirtualDir dir; + VirtualDir sd_dir; + VirtualDir sd_mod_dir; std::unique_ptr<RegisteredCache> contents; std::unique_ptr<PlaceholderCache> placeholder; diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 800feba6e..7583d68b2 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -96,7 +96,7 @@ private: void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "(STUBBED) called"); - std::vector<u8> output_params(ctx.GetWriteBufferSize()); + std::vector<u8> output_params(ctx.GetWriteBufferSize(), 0); auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params); if (result.IsSuccess()) { @@ -110,17 +110,19 @@ private: void Start(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 2}; + const auto result = renderer->Start(); - rb.Push(ResultSuccess); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); } void Stop(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 2}; + const auto result = renderer->Stop(); - rb.Push(ResultSuccess); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); } void QuerySystemEvent(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index 3c16fe6c7..4a9b13e45 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -703,6 +703,16 @@ FileSys::VirtualDir FileSystemController::GetModificationLoadRoot(u64 title_id) return bis_factory->GetModificationLoadRoot(title_id); } +FileSys::VirtualDir FileSystemController::GetSDMCModificationLoadRoot(u64 title_id) const { + LOG_TRACE(Service_FS, "Opening SDMC mod load root for tid={:016X}", title_id); + + if (sdmc_factory == nullptr) { + return nullptr; + } + + return sdmc_factory->GetSDMCModificationLoadRoot(title_id); +} + FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const { LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id); @@ -733,20 +743,23 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove } using YuzuPath = Common::FS::YuzuPath; + const auto sdmc_dir_path = Common::FS::GetYuzuPath(YuzuPath::SDMCDir); + const auto sdmc_load_dir_path = sdmc_dir_path / "atmosphere/contents"; const auto rw_mode = FileSys::Mode::ReadWrite; auto nand_directory = vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode); - auto sd_directory = - vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::SDMCDir), rw_mode); + auto sd_directory = vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_dir_path), rw_mode); auto load_directory = vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read); + auto sd_load_directory = + vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_load_dir_path), FileSys::Mode::Read); auto dump_directory = vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode); if (bis_factory == nullptr) { - bis_factory = - std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory); + bis_factory = std::make_unique<FileSys::BISFactory>( + nand_directory, std::move(load_directory), std::move(dump_directory)); system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND, bis_factory->GetSystemNANDContents()); system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND, @@ -759,7 +772,8 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove } if (sdmc_factory == nullptr) { - sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory)); + sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory), + std::move(sd_load_directory)); system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC, sdmc_factory->GetSDMCContents()); } diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h index b6b1b9220..d387af3cb 100644 --- a/src/core/hle/service/filesystem/filesystem.h +++ b/src/core/hle/service/filesystem/filesystem.h @@ -115,6 +115,7 @@ public: FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const; FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const; + FileSys::VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const; FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const; FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const; diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp index 114aff31c..869d2763f 100644 --- a/src/core/hle/service/mii/manager.cpp +++ b/src/core/hle/service/mii/manager.cpp @@ -20,6 +20,7 @@ namespace { constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4}; +constexpr std::size_t BaseMiiCount{2}; constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()}; constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'}; @@ -415,7 +416,7 @@ u32 MiiManager::GetCount(SourceFlag source_flag) const { count += 0; } if ((source_flag & SourceFlag::Default) != SourceFlag::None) { - count += DefaultMiiCount; + count += (DefaultMiiCount - BaseMiiCount); } return static_cast<u32>(count); } @@ -445,7 +446,7 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_ return MakeResult(std::move(result)); } - for (std::size_t index = 0; index < DefaultMiiCount; index++) { + for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) { result.emplace_back(BuildDefault(index), Source::Default); } diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp index 100138d11..2fafd077f 100755 --- a/src/input_common/analog_from_button.cpp +++ b/src/input_common/analog_from_button.cpp @@ -27,6 +27,7 @@ public: down->SetCallback(callbacks); left->SetCallback(callbacks); right->SetCallback(callbacks); + modifier->SetCallback(callbacks); } bool IsAngleGreater(float old_angle, float new_angle) const { diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094..8b86ad050 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), - data); + nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); break; default: break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260..1b4bbc8ac 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { av_free(ptr); } -Codec::Codec(GPU& gpu_) - : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() { avcodec_close(av_codec_ctx); } +void Codec::Initialize() { + AVCodecID codec{AV_CODEC_ID_NONE}; + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + codec = AV_CODEC_ID_H264; + break; + case NvdecCommon::VideoCodec::Vp9: + codec = AV_CODEC_ID_VP9; + break; + default: + return; + } + av_codec = avcodec_find_decoder(codec); + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + return; +} + void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { if (current_codec != codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); } } -void Codec::StateWrite(u32 offset, u64 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); - std::memcpy(state_offset, &arguments, sizeof(u64)); -} - void Codec::Decode() { - bool is_first_frame = false; + const bool is_first_frame = !initialized; if (!initialized) { - if (current_codec == NvdecCommon::VideoCodec::H264) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); - } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); - return; - } - - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - - // TODO(ameerj): libavcodec gpu hw acceleration - - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - return; - } - initialized = true; - is_first_frame = true; + Initialize(); } - bool vp9_hidden_frame = false; + bool vp9_hidden_frame = false; AVPacket packet{}; av_init_packet(&packet); std::vector<u8> frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() { } packet.data = frame_data.data(); - packet.size = static_cast<int>(frame_data.size()); + packet.size = static_cast<s32>(frame_data.size()); avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } +std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case NvdecCommon::VideoCodec::None: + return "None"; + case NvdecCommon::VideoCodec::H264: + return "H264"; + case NvdecCommon::VideoCodec::Vp8: + return "VP8"; + case NvdecCommon::VideoCodec::H265: + return "H265"; + case NvdecCommon::VideoCodec::Vp9: + return "VP9"; + default: + return "Unknown"; + } +}; + } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 8a2a6c360..96c823c76 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -34,15 +34,15 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu); + explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); ~Codec(); + /// Initialize the codec, returning success or failure + void Initialize(); + /// Sets NVDEC video stream codec void SetTargetCodec(NvdecCommon::VideoCodec codec); - /// Populate NvdecRegisters state with argument value at the provided offset - void StateWrite(u32 offset, u64 arguments); - /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -51,6 +51,8 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; private: bool initialized{}; @@ -60,10 +62,10 @@ private: AVCodecContext* av_codec_ctx{nullptr}; GPU& gpu; + const NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; - NvdecCommon::NvdecRegisters state{}; std::queue<AVFramePtr> av_frames{}; }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed98..5fb6d45ee 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,134 +45,129 @@ H264::~H264() = default; const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { - H264DecoderContext context{}; + H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); - const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); + const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.frame_data_size); - + frame.resize(context.stream_len); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); - } else { - /// Encode header - H264BitWriter writer{}; - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - const auto chroma_format_idc = - static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); - writer.WriteUe(chroma_format_idc); - if (chroma_format_idc == 3) { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag - writer.WriteBit(false); // Scaling matrix present flag - - const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); - writer.WriteUe(order_cnt_type); - if (order_cnt_type == 0) { - writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); - } else if (order_cnt_type == 1) { - writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / - (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + return frame; + } - writer.WriteUe(16); + // Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const u32 chroma_format_idc = + static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { writer.WriteBit(false); - writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); - writer.WriteUe(pic_height - 1); - writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - - if (!context.h264_parameter_set.frame_mbs_only_flag) { - writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); - } + } - writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); - writer.WriteBit(false); // Frame cropping flag - writer.WriteBit(false); // VUI parameter present flag + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag - writer.End(); + writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); - // H264 PPS - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); + const auto order_cnt_type = + static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + writer.WriteSe(0); + writer.WriteSe(0); writer.WriteUe(0); - writer.WriteUe(0); + } - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); - writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); - writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); - writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); - s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); - pic_init_qp = (pic_init_qp << 26) >> 26; - writer.WriteSe(pic_init_qp); - writer.WriteSe(0); - s32 chroma_qp_index_offset = - static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); - chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - writer.WriteSe(chroma_qp_index_offset); - writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); - writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); - writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); - writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); + } + writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); + writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); + s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); + std::span<const u8> matrix{context.weight_scale}; + writer.WriteScalingList(matrix, index * 16, 16); + } - for (s32 index = 0; index < 6; index++) { + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); - const auto matrix_x4 = - std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); - writer.WriteScalingList(matrix_x4, index * 16, 16); - } - - if (context.h264_parameter_set.transform_8x8_mode_flag) { - for (s32 index = 0; index < 2; index++) { - writer.WriteBit(true); - const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), - context.scaling_matrix_8.end()); - - writer.WriteScalingList(matrix_x8, index * 64, 64); - } + std::span<const u8> matrix{context.weight_scale_8x8}; + writer.WriteScalingList(matrix, index * 64, 64); } + } - s32 chroma_qp_index_offset2 = - static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); - chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + s32 chroma_qp_index_offset2 = + static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); - writer.WriteSe(chroma_qp_index_offset2); + writer.WriteSe(chroma_qp_index_offset2); - writer.End(); + writer.End(); - const auto& encoded_header = writer.GetByteArray(); - frame.resize(encoded_header.size() + context.frame_data_size); - std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.stream_len); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), - context.frame_data_size); - } + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), context.stream_len); return frame; } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { std::vector<u8> scan(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3..bfe84a472 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@ #pragma once +#include <span> #include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); + void WriteScalingList(std::span<const u8> list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -78,40 +80,110 @@ public: const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: + std::vector<u8> frame; + GPU& gpu; + struct H264ParameterSet { - u32 log2_max_pic_order_cnt{}; - u32 delta_pic_order_always_zero_flag{}; - u32 frame_mbs_only_flag{}; - u32 pic_width_in_mbs{}; - u32 pic_height_in_map_units{}; - INSERT_PADDING_WORDS(1); - u32 entropy_coding_mode_flag{}; - u32 bottom_field_pic_order_flag{}; - u32 num_refidx_l0_default_active{}; - u32 num_refidx_l1_default_active{}; - u32 deblocking_filter_control_flag{}; - u32 redundant_pic_count_flag{}; - u32 transform_8x8_mode_flag{}; - INSERT_PADDING_WORDS(9); - u64 flags{}; - u32 frame_number{}; - u32 frame_number2{}; + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_map_units; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + u32 luma_top_offset; ///< 0x3C + u32 luma_bot_offset; ///< 0x40 + u32 luma_frame_offset; ///< 0x44 + u32 chroma_top_offset; ///< 0x48 + u32 chroma_bot_offset; ///< 0x4C + u32 chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; }; - static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); struct H264DecoderContext { - INSERT_PADDING_BYTES(0x48); - u32 frame_data_size{}; - INSERT_PADDING_BYTES(0xc); - H264ParameterSet h264_parameter_set{}; - INSERT_PADDING_BYTES(0x100); - std::array<u8, 0x60> scaling_matrix_4; - std::array<u8, 0x80> scaling_matrix_8; + INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 + u32 stream_len; ///< 0x0048 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C + H264ParameterSet h264_parameter_set; ///< 0x0058 + INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 + std::array<u8, 0x60> weight_scale; ///< 0x01C0 + std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 }; - static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - - std::vector<u8> frame; - GPU& gpu; + static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); + ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); + ASSERT_POSITION(frame_mbs_only_flag, 0x08); + ASSERT_POSITION(pic_width_in_mbs, 0x0C); + ASSERT_POSITION(frame_height_in_map_units, 0x10); + ASSERT_POSITION(tile_format, 0x14); + ASSERT_POSITION(entropy_coding_mode_flag, 0x18); + ASSERT_POSITION(pic_order_present_flag, 0x1C); + ASSERT_POSITION(num_refidx_l0_default_active, 0x20); + ASSERT_POSITION(num_refidx_l1_default_active, 0x24); + ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); + ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); + ASSERT_POSITION(transform_8x8_mode_flag, 0x30); + ASSERT_POSITION(pitch_luma, 0x34); + ASSERT_POSITION(pitch_chroma, 0x38); + ASSERT_POSITION(luma_top_offset, 0x3C); + ASSERT_POSITION(luma_bot_offset, 0x40); + ASSERT_POSITION(luma_frame_offset, 0x44); + ASSERT_POSITION(chroma_top_offset, 0x48); + ASSERT_POSITION(chroma_bot_offset, 0x4C); + ASSERT_POSITION(chroma_frame_offset, 0x50); + ASSERT_POSITION(hist_buffer_size, 0x54); + ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(stream_len, 0x48); + ASSERT_POSITION(h264_parameter_set, 0x58); + ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb31418..902bc2a98 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { - PictureInfo picture_info{}; + PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { - EntropyProbs entropy{}; + EntropyProbs entropy; gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c..2da14f3ca 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU; namespace Decoder { struct Vp9FrameDimensions { - s16 width{}; - s16 height{}; - s16 luma_pitch{}; - s16 chroma_pitch{}; + s16 width; + s16 height; + s16 luma_pitch; + s16 chroma_pitch; }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode { }; struct Segmentation { - u8 enabled{}; - u8 update_map{}; - u8 temporal_update{}; - u8 abs_delta{}; - std::array<u32, 8> feature_mask{}; - std::array<std::array<s16, 4>, 8> feature_data{}; + u8 enabled; + u8 update_map; + u8 temporal_update; + u8 abs_delta; + std::array<u32, 8> feature_mask; + std::array<std::array<s16, 4>, 8> feature_data; }; static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); struct LoopFilter { - u8 mode_ref_delta_enabled{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; + u8 mode_ref_delta_enabled; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; }; static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { - std::array<u8, 36> y_mode_prob{}; - std::array<u8, 64> partition_prob{}; - std::array<u8, 1728> coef_probs{}; - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 3> skip_probs{}; - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 20> classes{}; - std::array<u8, 2> class_0{}; - std::array<u8, 20> prob_bits{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; + std::array<u8, 36> y_mode_prob; ///< 0x0000 + std::array<u8, 64> partition_prob; ///< 0x0024 + std::array<u8, 1728> coef_probs; ///< 0x0064 + std::array<u8, 8> switchable_interp_prob; ///< 0x0724 + std::array<u8, 28> inter_mode_prob; ///< 0x072C + std::array<u8, 4> intra_inter_prob; ///< 0x0748 + std::array<u8, 5> comp_inter_prob; ///< 0x074C + std::array<u8, 10> single_ref_prob; ///< 0x0751 + std::array<u8, 5> comp_ref_prob; ///< 0x075B + std::array<u8, 6> tx_32x32_prob; ///< 0x0760 + std::array<u8, 4> tx_16x16_prob; ///< 0x0766 + std::array<u8, 2> tx_8x8_prob; ///< 0x076A + std::array<u8, 3> skip_probs; ///< 0x076C + std::array<u8, 3> joints; ///< 0x076F + std::array<u8, 2> sign; ///< 0x0772 + std::array<u8, 20> classes; ///< 0x0774 + std::array<u8, 2> class_0; ///< 0x0788 + std::array<u8, 20> prob_bits; ///< 0x078A + std::array<u8, 12> class_0_fr; ///< 0x079E + std::array<u8, 6> fr; ///< 0x07AA + std::array<u8, 2> class_0_hp; ///< 0x07B0 + std::array<u8, 2> high_precision; ///< 0x07B2 }; static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame{}; - bool intra_only{}; - bool last_frame_was_key{}; - bool frame_size_changed{}; - bool error_resilient_mode{}; - bool last_frame_shown{}; - bool show_frame{}; - std::array<s8, 4> ref_frame_sign_bias{}; - s32 base_q_index{}; - s32 y_dc_delta_q{}; - s32 uv_dc_delta_q{}; - s32 uv_ac_delta_q{}; - bool lossless{}; - s32 transform_mode{}; - bool allow_high_precision_mv{}; - s32 interp_filter{}; - s32 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - s32 log2_tile_cols{}; - s32 log2_tile_rows{}; - bool segment_enabled{}; - bool segment_map_update{}; - bool segment_map_temporal_update{}; - s32 segment_abs_delta{}; - std::array<u32, 8> segment_feature_enable{}; - std::array<std::array<s16, 4>, 8> segment_feature_data{}; - bool mode_ref_delta_enabled{}; - bool use_prev_in_find_mv_refs{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; - Vp9EntropyProbs entropy{}; - Vp9FrameDimensions frame_size{}; - u8 first_level{}; - u8 sharpness_level{}; - u32 bitstream_size{}; - std::array<u64, 4> frame_offsets{}; - std::array<bool, 4> refresh_frame{}; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool frame_size_changed; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + std::array<s8, 4> ref_frame_sign_bias; + s32 base_q_index; + s32 y_dc_delta_q; + s32 uv_dc_delta_q; + s32 uv_ac_delta_q; + bool lossless; + s32 transform_mode; + bool allow_high_precision_mv; + s32 interp_filter; + s32 reference_mode; + s8 comp_fixed_ref; + std::array<s8, 2> comp_var_ref; + s32 log2_tile_cols; + s32 log2_tile_rows; + bool segment_enabled; + bool segment_map_update; + bool segment_map_temporal_update; + s32 segment_abs_delta; + std::array<u32, 8> segment_feature_enable; + std::array<std::array<s16, 4>, 8> segment_feature_data; + bool mode_ref_delta_enabled; + bool use_prev_in_find_mv_refs; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; + Vp9EntropyProbs entropy; + Vp9FrameDimensions frame_size; + u8 first_level; + u8 sharpness_level; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; + std::array<bool, 4> refresh_frame; }; struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer { }; struct PictureInfo { - INSERT_PADDING_WORDS(12); - u32 bitstream_size{}; - INSERT_PADDING_WORDS(5); - Vp9FrameDimensions last_frame_size{}; - Vp9FrameDimensions golden_frame_size{}; - Vp9FrameDimensions alt_frame_size{}; - Vp9FrameDimensions current_frame_size{}; - u32 vp9_flags{}; - std::array<s8, 4> ref_frame_sign_bias{}; - u8 first_level{}; - u8 sharpness_level{}; - u8 base_q_index{}; - u8 y_dc_delta_q{}; - u8 uv_ac_delta_q{}; - u8 uv_dc_delta_q{}; - u8 lossless{}; - u8 tx_mode{}; - u8 allow_high_precision_mv{}; - u8 interp_filter{}; - u8 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - u8 log2_tile_cols{}; - u8 log2_tile_rows{}; - Segmentation segmentation{}; - LoopFilter loop_filter{}; - INSERT_PADDING_BYTES(5); - u32 surface_params{}; - INSERT_PADDING_WORDS(3); + INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 + u32 bitstream_size; ///< 0x30 + INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 + Vp9FrameDimensions last_frame_size; ///< 0x48 + Vp9FrameDimensions golden_frame_size; ///< 0x50 + Vp9FrameDimensions alt_frame_size; ///< 0x58 + Vp9FrameDimensions current_frame_size; ///< 0x60 + u32 vp9_flags; ///< 0x68 + std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C + u8 first_level; ///< 0x70 + u8 sharpness_level; ///< 0x71 + u8 base_q_index; ///< 0x72 + u8 y_dc_delta_q; ///< 0x73 + u8 uv_ac_delta_q; ///< 0x74 + u8 uv_dc_delta_q; ///< 0x75 + u8 lossless; ///< 0x76 + u8 tx_mode; ///< 0x77 + u8 allow_high_precision_mv; ///< 0x78 + u8 interp_filter; ///< 0x79 + u8 reference_mode; ///< 0x7A + s8 comp_fixed_ref; ///< 0x7B + std::array<s8, 2> comp_var_ref; ///< 0x7C + u8 log2_tile_cols; ///< 0x7E + u8 log2_tile_rows; ///< 0x7F + Segmentation segmentation; ///< 0x80 + LoopFilter loop_filter; ///< 0xE4 + INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB + u32 surface_params; ///< 0xF0 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 [[nodiscard]] Vp9PictureInfo Convert() const { return { @@ -176,6 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .show_frame = false, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo { !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, + .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, .bitstream_size = bitstream_size, + .frame_offsets{}, + .refresh_frame{}, }; } }; static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { - INSERT_PADDING_BYTES(1024); - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - INSERT_PADDING_BYTES(80); - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> y_mode_prob_e8{}; - std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; - INSERT_PADDING_BYTES(64); - std::array<u8, 64> partition_prob{}; - INSERT_PADDING_BYTES(10); - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 3> skip_probs{}; - INSERT_PADDING_BYTES(1); - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 2> class_0{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; - std::array<u8, 20> classes{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 20> pred_bits{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - INSERT_PADDING_BYTES(17); - std::array<u8, 2304> coef_probs{}; + INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 + std::array<u8, 28> inter_mode_prob; ///< 0x0400 + std::array<u8, 4> intra_inter_prob; ///< 0x041C + INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 + std::array<u8, 2> tx_8x8_prob; ///< 0x0470 + std::array<u8, 4> tx_16x16_prob; ///< 0x0472 + std::array<u8, 6> tx_32x32_prob; ///< 0x0476 + std::array<u8, 4> y_mode_prob_e8; ///< 0x047C + std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 + INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 + std::array<u8, 64> partition_prob; ///< 0x04E0 + INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 + std::array<u8, 8> switchable_interp_prob; ///< 0x052A + std::array<u8, 5> comp_inter_prob; ///< 0x0532 + std::array<u8, 3> skip_probs; ///< 0x0537 + INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A + std::array<u8, 3> joints; ///< 0x053B + std::array<u8, 2> sign; ///< 0x053E + std::array<u8, 2> class_0; ///< 0x0540 + std::array<u8, 6> fr; ///< 0x0542 + std::array<u8, 2> class_0_hp; ///< 0x0548 + std::array<u8, 2> high_precision; ///< 0x054A + std::array<u8, 20> classes; ///< 0x054C + std::array<u8, 12> class_0_fr; ///< 0x0560 + std::array<u8, 20> pred_bits; ///< 0x056C + std::array<u8, 10> single_ref_prob; ///< 0x0580 + std::array<u8, 5> comp_ref_prob; ///< 0x058A + INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F + std::array<u8, 2304> coef_probs; ///< 0x05A0 void Convert(Vp9EntropyProbs& fc) { fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement { }; struct FrameContexts { - s64 from{}; - bool adapted{}; - Vp9EntropyProbs probs{}; + s64 from; + bool adapted; + Vp9EntropyProbs probs; }; +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(PictureInfo, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION + }; // namespace Decoder }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd..b5e3b70fc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@ namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} +#define NVDEC_REG_INDEX(field_name) \ + (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { - if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast<u32>(method), argument); - } else { - codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); - } +void Nvdec::ProcessMethod(u32 method, u32 argument) { + state.reg_array[method] = static_cast<u64>(argument) << 8; switch (method) { - case Method::SetVideoCodec: + case NVDEC_REG_INDEX(set_codec_id): codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); break; - case Method::Execute: + case NVDEC_REG_INDEX(execute): Execute(); break; } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b8..6e1da0b04 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU; class Nvdec { public: - enum class Method : u32 { - SetVideoCodec = 0x80, - Execute = 0xc0, - }; - explicit Nvdec(GPU& gpu); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); + void ProcessMethod(u32 method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private: void Execute(); GPU& gpu; + NvdecCommon::NvdecRegisters state; std::unique_ptr<Codec> codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d..6a24e00a0 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@ #pragma once +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra::NvdecCommon { -struct NvdecRegisters { - INSERT_PADDING_WORDS(256); - u64 set_codec_id{}; - INSERT_PADDING_WORDS(254); - u64 set_platform_id{}; - u64 picture_info_offset{}; - u64 frame_bitstream_offset{}; - u64 frame_number{}; - u64 h264_slice_data_offsets{}; - u64 h264_mv_dump_offset{}; - INSERT_PADDING_WORDS(6); - u64 frame_stats_offset{}; - u64 h264_last_surface_luma_offset{}; - u64 h264_last_surface_chroma_offset{}; - std::array<u64, 17> surface_luma_offset{}; - std::array<u64, 17> surface_chroma_offset{}; - INSERT_PADDING_WORDS(132); - u64 vp9_entropy_probs_offset{}; - u64 vp9_backward_updates_offset{}; - u64 vp9_last_frame_segmap_offset{}; - u64 vp9_curr_frame_segmap_offset{}; - INSERT_PADDING_WORDS(2); - u64 vp9_last_frame_mvs_offset{}; - u64 vp9_curr_frame_mvs_offset{}; - INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { Vp9 = 0x9, }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { + static constexpr std::size_t NUM_REGS = 0x178; + + union { + struct { + INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 + VideoCodec set_codec_id; ///< 0x0400 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 + u64 execute; ///< 0x0600 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 + struct { ///< 0x0800 + union { + BitField<0, 3, VideoCodec> codec; + BitField<4, 1, u64> gp_timer_on; + BitField<13, 1, u64> mb_timer_on; + BitField<14, 1, u64> intra_frame_pslc; + BitField<17, 1, u64> all_intra_frame; + }; + } control_params; + u64 picture_info_offset; ///< 0x0808 + u64 frame_bitstream_offset; ///< 0x0810 + u64 frame_number; ///< 0x0818 + u64 h264_slice_data_offsets; ///< 0x0820 + u64 h264_mv_dump_offset; ///< 0x0828 + INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 + u64 frame_stats_offset; ///< 0x0848 + u64 h264_last_surface_luma_offset; ///< 0x0850 + u64 h264_last_surface_chroma_offset; ///< 0x0858 + std::array<u64, 17> surface_luma_offset; ///< 0x0860 + std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 + INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + u64 vp9_entropy_probs_offset; ///< 0x0B80 + u64 vp9_backward_updates_offset; ///< 0x0B88 + u64 vp9_last_frame_segmap_offset; ///< 0x0B90 + u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 + u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 + u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 + }; + std::array<u64, NUM_REGS> reg_array; + }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION + } // namespace Tegra::NvdecCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2208e1922..c9cff7450 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,7 +18,10 @@ set(SHADER_FILES vulkan_uint8.comp ) -find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) +find_program(GLSLANGVALIDATOR "glslangValidator") +if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND") + message(FATAL_ERROR "Required program `glslangValidator` not found.") +endif() set(GLSL_FLAGS "") set(QUIET_FLAG "--quiet") diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 320ee8d30..63d8ad42a 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -42,6 +42,8 @@ public: [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; + [[nodiscard]] virtual std::string GetDeviceVendor() const = 0; + // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f4532ca7..3b00614e7 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -202,13 +202,13 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); throw std::runtime_error{"Insufficient version"}; } - const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); - const bool is_nvidia = vendor == "NVIDIA Corporation"; - const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; + const bool is_nvidia = vendor_name == "NVIDIA Corporation"; + const bool is_amd = vendor_name == "ATI Technologies Inc."; + const bool is_intel = vendor_name == "Intel"; #ifdef __unix__ const bool is_linux = true; @@ -275,6 +275,56 @@ Device::Device() { } } +std::string Device::GetVendorName() const { + if (vendor_name == "NVIDIA Corporation") { + return "NVIDIA"; + } + if (vendor_name == "ATI Technologies Inc.") { + return "AMD"; + } + if (vendor_name == "Intel") { + // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris. + // Simply return `INTEL` for those as well as the Windows driver. + return "INTEL"; + } + if (vendor_name == "Intel Open Source Technology Center") { + return "I965"; + } + if (vendor_name == "Mesa Project") { + return "I915"; + } + if (vendor_name == "Mesa/X.org") { + // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return + // MESA instead of one of those driver names. + return "MESA"; + } + if (vendor_name == "AMD") { + return "RADEONSI"; + } + if (vendor_name == "nouveau") { + return "NOUVEAU"; + } + if (vendor_name == "X.Org") { + return "R600"; + } + if (vendor_name == "Collabora Ltd") { + return "ZINK"; + } + if (vendor_name == "Intel Corporation") { + return "OPENSWR"; + } + if (vendor_name == "Microsoft Corporation") { + return "D3D12"; + } + if (vendor_name == "NVIDIA") { + // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default + // strategy would have returned `NVIDIA` here for this driver, the same result as the + // proprietary driver. + return "TEGRA"; + } + return vendor_name; +} + Device::Device(std::nullptr_t) { max_uniform_buffers.fill(std::numeric_limits<u32>::max()); uniform_buffer_alignment = 4; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index f24bd0c7b..2c2b13767 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -22,6 +22,8 @@ public: explicit Device(); explicit Device(std::nullptr_t); + [[nodiscard]] std::string GetVendorName() const; + u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; } @@ -130,6 +132,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); + std::string vendor_name; std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; size_t uniform_buffer_alignment{}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e892bd9ba..ff0f03e99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -342,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, VideoCommon::SubresourceLayers subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(0), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(subresource.base_layer), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyOrigin{ @@ -367,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 VideoCommon::SubresourceLayers dst_subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(1), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(dst_subresource.num_layers), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyRegion{ diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc19a110f..0b66f8332 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -70,6 +70,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetVendorName(); + } + private: /// Initializes the OpenGL state and creates persistent objects. void InitOpenGLObjects(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 72071316c..d7d17e110 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,6 +47,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetDriverName(); + } + private: void Report() const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c7cfd02b6..d8dbd3824 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1057,9 +1057,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector<ImageId> right_aliased_ids; std::vector<ImageId> bad_overlap_ids; ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { - if (info.type != overlap.info.type) { - return; - } if (info.type == ImageType::Linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 23814afd2..f214510da 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want return (supported_usage & wanted_usage) == wanted_usage; } +std::string Device::GetDriverName() const { + switch (driver_id) { + case VK_DRIVER_ID_AMD_PROPRIETARY: + return "AMD"; + case VK_DRIVER_ID_AMD_OPEN_SOURCE: + return "AMDVLK"; + case VK_DRIVER_ID_MESA_RADV: + return "RADV"; + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + return "NVIDIA"; + case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS: + return "INTEL"; + case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA: + return "ANV"; + case VK_DRIVER_ID_MESA_LLVMPIPE: + return "LAVAPIPE"; + default: + return vendor_name; + } +} + void Device::CheckSuitability(bool requires_swapchain) const { std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; bool has_swapchain = false; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 88b298196..96c0f8c60 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -45,6 +45,9 @@ public: /// Reports a shader to Nsight Aftermath. void SaveShader(const std::vector<u32>& spirv) const; + /// Returns the name of the VkDriverId reported from Vulkan. + std::string GetDriverName() const; + /// Returns the dispatch loader with direct function pointers of the device. const vk::DeviceDispatch& GetDispatchLoader() const { return dld; diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index efdc6aa50..7a6f84d96 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) { } void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); ev->accept(); } void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); ev->accept(); } void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); ev->accept(); } void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, + ev->angleDelta().y() / 120); ev->accept(); } diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index da956c99b..e44907be8 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -521,7 +521,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); remove_menu->addSeparator(); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); - QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS")); + QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); + QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); + QAction* dump_romfs_sdmc = dump_romfs_menu->addAction(tr("Dump RomFS to SDMC")); QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard")); QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry")); context_menu.addSeparator(); @@ -570,8 +572,12 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); }); - connect(dump_romfs, &QAction::triggered, - [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); }); + connect(dump_romfs, &QAction::triggered, [this, program_id, path]() { + emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal); + }); + connect(dump_romfs_sdmc, &QAction::triggered, [this, program_id, path]() { + emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::SDMC); + }); connect(copy_tid, &QAction::triggered, [this, program_id]() { emit CopyTIDRequested(program_id); }); connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() { diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index b630e34ff..50402da51 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -45,6 +45,11 @@ enum class GameListRemoveTarget { CustomConfiguration, }; +enum class DumpRomFSTarget { + Normal, + SDMC, +}; + enum class InstalledEntryType { Game, Update, @@ -92,7 +97,7 @@ signals: void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type); void RemoveFileRequested(u64 program_id, GameListRemoveTarget target, const std::string& game_path); - void DumpRomFSRequested(u64 program_id, const std::string& game_path); + void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target); void CopyTIDRequested(u64 program_id); void NavigateToGamedbEntryRequested(u64 program_id, const CompatibilityList& compatibility_list); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 20f65d233..f462cd072 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -104,6 +104,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "input_common/main.h" #include "util/overlay_dialog.h" #include "video_core/gpu.h" +#include "video_core/renderer_base.h" #include "video_core/shader_notify.h" #include "yuzu/about_dialog.h" #include "yuzu/bootmanager.h" @@ -1426,7 +1427,8 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S const auto instruction_set_suffix = is_64bit ? " (64-bit)" : " (32-bit)"; title_name += instruction_set_suffix; LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version); - UpdateWindowTitle(title_name, title_version); + const auto gpu_vendor = system.GPU().Renderer().GetDeviceVendor(); + UpdateWindowTitle(title_name, title_version, gpu_vendor); loading_screen->Prepare(system.GetAppLoader()); loading_screen->show(); @@ -1880,7 +1882,8 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g } } -void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) { +void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path, + DumpRomFSTarget target) { const auto failed = [this] { QMessageBox::warning(this, tr("RomFS Extraction Failed!"), tr("There was an error copying the RomFS files or the user " @@ -1908,7 +1911,10 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa return; } - const auto dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir); + const auto dump_dir = + target == DumpRomFSTarget::Normal + ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir) + : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents"; const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id); const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir); @@ -1918,7 +1924,8 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa if (*romfs_title_id == program_id) { const u64 ivfc_offset = loader->ReadRomFSIVFCOffset(); const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed}; - romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program); + romfs = + pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program, nullptr, false); } else { romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS(); } @@ -2855,8 +2862,8 @@ void GMainWindow::MigrateConfigFiles() { } } -void GMainWindow::UpdateWindowTitle(const std::string& title_name, - const std::string& title_version) { +void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version, + std::string_view gpu_vendor) { const auto branch_name = std::string(Common::g_scm_branch); const auto description = std::string(Common::g_scm_desc); const auto build_id = std::string(Common::g_build_id); @@ -2869,7 +2876,8 @@ void GMainWindow::UpdateWindowTitle(const std::string& title_name, if (title_name.empty()) { setWindowTitle(QString::fromStdString(window_title)); } else { - const auto run_title = fmt::format("{} | {} | {}", window_title, title_name, title_version); + const auto run_title = + fmt::format("{} | {} | {} | {}", window_title, title_name, title_version, gpu_vendor); setWindowTitle(QString::fromStdString(run_title)); } } diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 11f152cbe..45c8310e1 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -34,6 +34,7 @@ class QProgressDialog; class WaitTreeWidget; enum class GameListOpenTarget; enum class GameListRemoveTarget; +enum class DumpRomFSTarget; enum class InstalledEntryType; class GameListPlaceholder; @@ -244,7 +245,7 @@ private slots: void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type); void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target, const std::string& game_path); - void OnGameListDumpRomFS(u64 program_id, const std::string& game_path); + void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target); void OnGameListCopyTID(u64 program_id); void OnGameListNavigateToGamedbEntry(u64 program_id, const CompatibilityList& compatibility_list); @@ -287,8 +288,8 @@ private: InstallResult InstallNSPXCI(const QString& filename); InstallResult InstallNCA(const QString& filename); void MigrateConfigFiles(); - void UpdateWindowTitle(const std::string& title_name = {}, - const std::string& title_version = {}); + void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, + std::string_view gpu_vendor = {}); void UpdateStatusBar(); void UpdateStatusButtons(); void UpdateUISettings(); |