diff options
36 files changed, 613 insertions, 224 deletions
diff --git a/externals/dynarmic b/externals/dynarmic -Subproject a1cbea7948372989218a4e6159a95998d65876a +Subproject befe547d5631024a70d81d2ccee808bbfcb3854 diff --git a/src/audio_core/renderer/command/resample/upsample.cpp b/src/audio_core/renderer/command/resample/upsample.cpp index 6c3ff31f7..5f7db12ca 100644 --- a/src/audio_core/renderer/command/resample/upsample.cpp +++ b/src/audio_core/renderer/command/resample/upsample.cpp @@ -20,25 +20,25 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, const u32 target_sample_count, const u32 source_sample_count, UpsamplerState* state) { constexpr u32 WindowSize = 10; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow1{ - 51.93359375f, -18.80078125f, 9.73046875f, -5.33203125f, 2.84375f, - -1.41015625f, 0.62109375f, -0.2265625f, 0.0625f, -0.00390625f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc1{ + 0.95376587f, -0.12872314f, 0.060028076f, -0.032470703f, 0.017669678f, + -0.009124756f, 0.004272461f, -0.001739502f, 0.000579834f, -0.000091552734f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow2{ - 105.35546875f, -24.52734375f, 11.9609375f, -6.515625f, 3.52734375f, - -1.796875f, 0.828125f, -0.32421875f, 0.1015625f, -0.015625f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc2{ + 0.8230896f, -0.19161987f, 0.093444824f, -0.05090332f, 0.027557373f, + -0.014038086f, 0.0064697266f, -0.002532959f, 0.00079345703f, -0.00012207031f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow3{ - 122.08203125f, -16.47656250f, 7.68359375f, -4.15625000f, 2.26171875f, - -1.16796875f, 0.54687500f, -0.22265625f, 0.07421875f, -0.01171875f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc3{ + 0.6298828f, -0.19274902f, 0.09725952f, -0.05319214f, 0.028625488f, + -0.014373779f, 0.006500244f, -0.0024719238f, 0.0007324219f, -0.000091552734f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow4{ - 23.73437500f, -9.62109375f, 5.07812500f, -2.78125000f, 1.46875000f, - -0.71484375f, 0.30859375f, -0.10546875f, 0.02734375f, 0.00000000f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc4{ + 0.4057312f, -0.1468811f, 0.07601929f, -0.041656494f, 0.022216797f, + -0.011016846f, 0.004852295f, -0.0017700195f, 0.00048828125f, -0.000030517578f, }; - constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow5{ - 80.62500000f, -24.67187500f, 12.44921875f, -6.80859375f, 3.66406250f, - -1.83984375f, 0.83203125f, -0.31640625f, 0.09375000f, -0.01171875f, + constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc5{ + 0.1854248f, -0.075164795f, 0.03967285f, -0.021728516f, 0.011474609f, + -0.005584717f, 0.0024108887f, -0.0008239746f, 0.00021362305f, 0.0f, }; if (!state->initialized) { @@ -91,52 +91,31 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize); }; - auto calculate_sample = [&state](std::span<const Common::FixedPoint<24, 8>> coeffs1, - std::span<const Common::FixedPoint<24, 8>> coeffs2) -> s32 { + auto calculate_sample = [&state](std::span<const Common::FixedPoint<17, 15>> coeffs1, + std::span<const Common::FixedPoint<17, 15>> coeffs2) -> s32 { auto output_index{state->history_output_index}; - auto start_pos{output_index - state->history_start_index + 1U}; - auto end_pos{10U}; + u64 result{0}; - if (start_pos < 10) { - end_pos = start_pos; - } - - u64 prev_contrib{0}; - u32 coeff_index{0}; - for (; coeff_index < end_pos; coeff_index++, output_index--) { - prev_contrib += static_cast<u64>(state->history[output_index].to_raw()) * - coeffs1[coeff_index].to_raw(); - } + for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) { + result += static_cast<u64>(state->history[output_index].to_raw()) * + coeffs1[coeff_index].to_raw(); - auto end_index{state->history_end_index}; - for (; start_pos < 9; start_pos++, coeff_index++, end_index--) { - prev_contrib += static_cast<u64>(state->history[end_index].to_raw()) * - coeffs1[coeff_index].to_raw(); + output_index = output_index == state->history_start_index ? state->history_end_index + : output_index - 1; } output_index = static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize); - start_pos = state->history_end_index - output_index + 1U; - end_pos = 10U; - if (start_pos < 10) { - end_pos = start_pos; - } - - u64 next_contrib{0}; - coeff_index = 0; - for (; coeff_index < end_pos; coeff_index++, output_index++) { - next_contrib += static_cast<u64>(state->history[output_index].to_raw()) * - coeffs2[coeff_index].to_raw(); - } + for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) { + result += static_cast<u64>(state->history[output_index].to_raw()) * + coeffs2[coeff_index].to_raw(); - auto start_index{state->history_start_index}; - for (; start_pos < 9; start_pos++, start_index++, coeff_index++) { - next_contrib += static_cast<u64>(state->history[start_index].to_raw()) * - coeffs2[coeff_index].to_raw(); + output_index = output_index == state->history_end_index ? state->history_start_index + : output_index + 1; } - return static_cast<s32>(((prev_contrib >> 15) + (next_contrib >> 15)) >> 8); + return static_cast<s32>(result >> (8 + 15)); }; switch (state->ratio.to_int_floor()) { @@ -150,23 +129,23 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, break; case 1: - output[write_index] = calculate_sample(SincWindow3, SincWindow4); + output[write_index] = calculate_sample(WindowedSinc1, WindowedSinc5); break; case 2: - output[write_index] = calculate_sample(SincWindow2, SincWindow1); + output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4); break; case 3: - output[write_index] = calculate_sample(SincWindow5, SincWindow5); + output[write_index] = calculate_sample(WindowedSinc3, WindowedSinc3); break; case 4: - output[write_index] = calculate_sample(SincWindow1, SincWindow2); + output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2); break; case 5: - output[write_index] = calculate_sample(SincWindow4, SincWindow3); + output[write_index] = calculate_sample(WindowedSinc5, WindowedSinc1); break; } state->sample_index = static_cast<u8>((state->sample_index + 1) % 6); @@ -183,11 +162,11 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, break; case 1: - output[write_index] = calculate_sample(SincWindow2, SincWindow1); + output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4); break; case 2: - output[write_index] = calculate_sample(SincWindow1, SincWindow2); + output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2); break; } state->sample_index = static_cast<u8>((state->sample_index + 1) % 3); @@ -204,12 +183,12 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input, break; case 1: - output[write_index] = calculate_sample(SincWindow1, SincWindow2); + output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2); break; case 2: increment(); - output[write_index] = calculate_sample(SincWindow2, SincWindow1); + output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4); break; } state->sample_index = static_cast<u8>((state->sample_index + 1) % 3); diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 1638b79f5..b1a2aa8b2 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -129,6 +129,10 @@ void UpdateRescalingInfo() { info.up_scale = 1; info.down_shift = 0; break; + case ResolutionSetup::Res3_2X: + info.up_scale = 3; + info.down_shift = 1; + break; case ResolutionSetup::Res2X: info.up_scale = 2; info.down_shift = 0; @@ -149,6 +153,14 @@ void UpdateRescalingInfo() { info.up_scale = 6; info.down_shift = 0; break; + case ResolutionSetup::Res7X: + info.up_scale = 7; + info.down_shift = 0; + break; + case ResolutionSetup::Res8X: + info.up_scale = 8; + info.down_shift = 0; + break; default: ASSERT(false); info.up_scale = 1; diff --git a/src/common/settings.h b/src/common/settings.h index a457e3f23..80b2eeabc 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -56,11 +56,14 @@ enum class ResolutionSetup : u32 { Res1_2X = 0, Res3_4X = 1, Res1X = 2, - Res2X = 3, - Res3X = 4, - Res4X = 5, - Res5X = 6, - Res6X = 7, + Res3_2X = 3, + Res2X = 4, + Res3X = 5, + Res4X = 6, + Res5X = 7, + Res6X = 8, + Res7X = 9, + Res8X = 10, }; enum class ScalingFilter : u32 { diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 0e7b5f943..6bac6722f 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -142,16 +142,24 @@ void CoreTiming::ScheduleLoopingEvent(std::chrono::nanoseconds start_time, } void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, - std::uintptr_t user_data) { - std::scoped_lock scope{basic_lock}; - const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { - return e.type.lock().get() == event_type.get() && e.user_data == user_data; - }); - - // Removing random items breaks the invariant so we have to re-establish it. - if (itr != event_queue.end()) { - event_queue.erase(itr, event_queue.end()); - std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + std::uintptr_t user_data, bool wait) { + { + std::scoped_lock lk{basic_lock}; + const auto itr = + std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { + return e.type.lock().get() == event_type.get() && e.user_data == user_data; + }); + + // Removing random items breaks the invariant so we have to re-establish it. + if (itr != event_queue.end()) { + event_queue.erase(itr, event_queue.end()); + std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); + } + } + + // Force any in-progress events to finish + if (wait) { + std::scoped_lock lk{advance_lock}; } } @@ -190,20 +198,6 @@ u64 CoreTiming::GetClockTicks() const { return CpuCyclesToClockCycles(ticks); } -void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { - std::scoped_lock lock{basic_lock}; - - const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { - return e.type.lock().get() == event_type.get(); - }); - - // Removing random items breaks the invariant so we have to re-establish it. - if (itr != event_queue.end()) { - event_queue.erase(itr, event_queue.end()); - std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); - } -} - std::optional<s64> CoreTiming::Advance() { std::scoped_lock lock{advance_lock, basic_lock}; global_timer = GetGlobalTimeNs().count(); diff --git a/src/core/core_timing.h b/src/core/core_timing.h index b5925193c..da366637b 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -98,10 +98,13 @@ public: const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data = 0, bool absolute_time = false); - void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data); + void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data, + bool wait = true); - /// We only permit one event of each type in the queue at a time. - void RemoveEvent(const std::shared_ptr<EventType>& event_type); + void UnscheduleEventWithoutWait(const std::shared_ptr<EventType>& event_type, + std::uintptr_t user_data) { + UnscheduleEvent(event_type, user_data, false); + } void AddTicks(u64 ticks_to_add); diff --git a/src/core/hle/kernel/k_hardware_timer.cpp b/src/core/hle/kernel/k_hardware_timer.cpp index 6bba79ea0..4dcd53821 100644 --- a/src/core/hle/kernel/k_hardware_timer.cpp +++ b/src/core/hle/kernel/k_hardware_timer.cpp @@ -18,7 +18,8 @@ void KHardwareTimer::Initialize() { } void KHardwareTimer::Finalize() { - this->DisableInterrupt(); + m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this)); + m_wakeup_time = std::numeric_limits<s64>::max(); m_event_type.reset(); } @@ -59,7 +60,8 @@ void KHardwareTimer::EnableInterrupt(s64 wakeup_time) { } void KHardwareTimer::DisableInterrupt() { - m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this)); + m_kernel.System().CoreTiming().UnscheduleEventWithoutWait(m_event_type, + reinterpret_cast<uintptr_t>(this)); m_wakeup_time = std::numeric_limits<s64>::max(); } diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index d1cbadde4..f4416f5b2 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -312,8 +312,6 @@ void NVFlinger::Compose() { } s64 NVFlinger::GetNextTicks() const { - static constexpr s64 max_hertz = 120LL; - const auto& settings = Settings::values; auto speed_scale = 1.f; if (settings.use_multi_core.GetValue()) { @@ -327,9 +325,11 @@ s64 NVFlinger::GetNextTicks() const { } } - const auto next_ticks = ((1000000000 * (1LL << swap_interval)) / max_hertz); + // As an extension, treat nonpositive swap interval as framerate multiplier. + const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval) + : 60.f / static_cast<f32>(swap_interval); - return static_cast<s64>(speed_scale * static_cast<float>(next_ticks)); + return static_cast<s64>(speed_scale * (1000000000.f / effective_fps)); } } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 9b22397db..3828cf272 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -133,7 +133,7 @@ private: /// layers. u32 next_buffer_queue_id = 1; - u32 swap_interval = 1; + s32 swap_interval = 1; /// Event that handles screen composition. std::shared_ptr<Core::Timing::EventType> multi_composition_event; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a1e41faff..4e605fae4 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -383,6 +383,10 @@ struct Memory::Impl { return; } + if (Settings::IsFastmemEnabled()) { + system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug); + } + // Iterate over a contiguous CPU address space, marking/unmarking the region. // The region is at a granularity of CPU pages. diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f617665de..b474eb363 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,8 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/blit_image.cpp + renderer_opengl/blit_image.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_compute_pipeline.cpp diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 2437121ce..1d22d25f1 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { LOG_WARNING(HW_GPU, "(STUBBED) called"); break; } + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { + DrawTexture(); + break; + } default: break; } @@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) { ProcessDraw(true, 1); } +void DrawManager::DrawTexture() { + const auto& regs{maxwell3d->regs}; + draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f; + draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f; + const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f; + const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f; + const bool lower_left{regs.window_origin.mode != + Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft}; + if (lower_left) { + draw_texture_state.dst_y0 -= dst_height; + } + draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; + draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; + draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; + draw_texture_state.src_x1 = + (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width + + draw_texture_state.src_x0; + draw_texture_state.src_y1 = + (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + + draw_texture_state.src_y0; + draw_texture_state.src_sampler = regs.draw_texture.src_sampler; + draw_texture_state.src_texture = regs.draw_texture.src_texture; + + maxwell3d->rasterizer->DrawTexture(); +} + void DrawManager::UpdateTopology() { const auto& regs{maxwell3d->regs}; switch (regs.primitive_topology_control) { diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 58d1b2d59..7c22c49f1 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,19 @@ public: std::vector<u8> inline_index_draw_indexes; }; + struct DrawTextureState { + f32 dst_x0; + f32 dst_y0; + f32 dst_x1; + f32 dst_y1; + f32 src_x0; + f32 src_y0; + f32 src_x1; + f32 src_y1; + u32 src_sampler; + u32 src_texture; + }; + struct IndirectParams { bool is_indexed; bool include_count; @@ -64,6 +77,10 @@ public: return draw_state; } + const DrawTextureState& GetDrawTextureState() const { + return draw_texture_state; + } + IndirectParams& GetIndirectParams() { return indirect_state; } @@ -81,6 +98,8 @@ private: void DrawIndexSmall(u32 argument); + void DrawTexture(); + void UpdateTopology(); void ProcessDraw(bool draw_indexed, u32 instance_count); @@ -89,6 +108,7 @@ private: Maxwell3D* maxwell3d{}; State draw_state{}; + DrawTextureState draw_texture_state{}; IndirectParams indirect_state{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 97f547789..ae9da6290 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) { case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): case MAXWELL3D_REG_INDEX(vertex_array_instance_first): case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): case MAXWELL3D_REG_INDEX(wait_for_idle): case MAXWELL3D_REG_INDEX(shadow_ram_control): case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): @@ -467,7 +468,7 @@ void Maxwell3D::ProcessMacroBind(u32 data) { } void Maxwell3D::ProcessFirmwareCall4() { - LOG_WARNING(HW_GPU, "(STUBBED) called"); + LOG_DEBUG(HW_GPU, "(STUBBED) called"); // Firmware call 4 is a blob that changes some registers depending on its parameters. // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0b2fd2928..c89969bb4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1599,6 +1599,20 @@ public: }; static_assert(sizeof(TIRModulationCoeff) == 0x4); + struct DrawTexture { + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 dx_du; + s64 dy_dv; + u32 src_sampler; + u32 src_texture; + s32 src_x0; + s32 src_y0; + }; + static_assert(sizeof(DrawTexture) == 0x30); + struct ReduceColorThreshold { union { BitField<0, 8, u32> all_hit_once; @@ -2751,7 +2765,7 @@ public: u32 reserved_sw_method2; ///< 0x102C std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030 std::array<u32, 15> spare_nop; ///< 0x1044 - INSERT_PADDING_BYTES_NOINIT(0x30); + DrawTexture draw_texture; ///< 0x1080 std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0 ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0 diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f275b2aa9..e968ae220 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(GLSL_INCLUDES set(SHADER_FILES astc_decoder.comp + blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag @@ -36,7 +37,6 @@ set(SHADER_FILES smaa_blending_weight_calculation.frag smaa_neighborhood_blending.vert smaa_neighborhood_blending.frag - vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.comp diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag index c0c832296..c0c832296 100644 --- a/src/video_core/host_shaders/vulkan_blit_color_float.frag +++ b/src/video_core/host_shaders/blit_color_float.frag diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert index 2c976b19f..d16d98995 100644 --- a/src/video_core/host_shaders/full_screen_triangle.vert +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -4,13 +4,20 @@ #version 450 #ifdef VULKAN +#define VERTEX_ID gl_VertexIndex #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { #define END_PUSH_CONSTANTS }; #define UNIFORM(n) +#define FLIPY 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define VERTEX_ID gl_VertexID #define BEGIN_PUSH_CONSTANTS #define END_PUSH_CONSTANTS +#define FLIPY -1 #define UNIFORM(n) layout (location = n) uniform +out gl_PerVertex { + vec4 gl_Position; +}; #endif BEGIN_PUSH_CONSTANTS @@ -21,8 +28,8 @@ END_PUSH_CONSTANTS layout(location = 0) out vec2 texcoord; void main() { - float x = float((gl_VertexIndex & 1) << 2); - float y = float((gl_VertexIndex & 2) << 1); - gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + float x = float((VERTEX_ID & 1) << 2); + float y = float((VERTEX_ID & 2) << 1); + gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0); texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); } diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1735b6164..33e2610bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,6 +47,9 @@ public: /// Dispatches an indirect draw invocation virtual void DrawIndirect() {} + /// Dispatches an draw texture invocation + virtual void DrawTexture() = 0; + /// Clear the current framebuffer virtual void Clear(u32 layer_count) = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 2c11345d7..2b5c7defa 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp RasterizerNull::~RasterizerNull() = default; void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} +void RasterizerNull::DrawTexture() {} void RasterizerNull::Clear(u32 layer_count) {} void RasterizerNull::DispatchCompute() {} void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 2112aa70e..51f896e43 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -31,6 +31,7 @@ public: ~RasterizerNull() override; void Draw(bool is_indexed, u32 instance_count) override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp new file mode 100644 index 000000000..9a560a73b --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <algorithm> + +#include "video_core/host_shaders/blit_color_float_frag.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { + +BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_) + : program_manager(program_manager_), + full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)), + blit_color_to_color_frag( + CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthRangeIndexed(0, 0.0, 0.0); + + program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle); + glProgramUniform2f(full_screen_vert.handle, 0, + static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width), + static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height)); + glProgramUniform2f(full_screen_vert.handle, 1, + static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)); + glViewport(std::min(dst_region.start.x, dst_region.end.x), + std::min(dst_region.start.y, dst_region.end.y), + std::abs(dst_region.end.x - dst_region.start.x), + std::abs(dst_region.end.y - dst_region.start.y)); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer); + glBindSampler(0, src_sampler); + glBindTextureUnit(0, src_image_view); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h new file mode 100644 index 000000000..5a2b12d16 --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <glad/glad.h> + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; +using VideoCommon::Region2D; + +class ProgramManager; +class Framebuffer; +class ImageView; + +class BlitImageHelper { +public: + explicit BlitImageHelper(ProgramManager& program_manager); + ~BlitImageHelper(); + + void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size); + +private: + ProgramManager& program_manager; + + OGLProgram full_screen_vert; + OGLProgram blit_color_to_color_frag; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index cee5c3247..22ed16ebf 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; + has_draw_texture = GLAD_GL_NV_draw_texture; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2a72d84be..3ff8cad83 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -4,6 +4,8 @@ #pragma once #include <cstddef> +#include <string> + #include "common/common_types.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/stage.h" @@ -146,6 +148,10 @@ public: return has_sparse_texture_2; } + bool HasDrawTexture() const { + return has_draw_texture; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -216,6 +222,7 @@ private: bool has_shader_int64{}; bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; + bool has_draw_texture{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 181857d9c..7bced675c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), + blit_image(program_manager_) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -320,6 +321,47 @@ void RasterizerOpenGL::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerOpenGL::DrawTexture() { + MICROPROFILE_SCOPE(OpenGL_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + SyncState(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + + if (device.HasDrawTexture()) { + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0, + draw_texture_state.dst_y0, draw_texture_state.dst_x1, + draw_texture_state.dst_y1, 0, + draw_texture_state.src_x0 / static_cast<float>(texture.size.width), + draw_texture_state.src_y0 / static_cast<float>(texture.size.height), + draw_texture_state.src_x1 / static_cast<float>(texture.size.width), + draw_texture_state.src_y1 / static_cast<float>(texture.size.height)); + } else { + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(), + sampler->Handle(), dst_region, src_region, texture.size); + } + + ++num_queued_commands; +} + void RasterizerOpenGL::DispatchCompute() { gpu_memory->FlushCaching(); ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be4f76c18..0c45832ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/blit_image.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" @@ -70,6 +71,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -224,6 +226,8 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; + BlitImageHelper blit_image; + boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index d9c29d8b7..98841ae65 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,2 +1,123 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +}; + +ProgramManager::ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } +} + +void ProgramManager::BindComputeProgram(GLuint program) { + glUseProgram(program); + is_compute_bound = true; +} + +void ProgramManager::BindComputeAssemblyProgram(GLuint program) { + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + UnbindPipeline(); +} + +void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { + static constexpr std::array<GLenum, 5> stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); +} + +void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } + } + BindPipeline(); +} + +void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, + u32 stage_mask) { + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + UnbindPipeline(); +} + +void ProgramManager::RestoreGuestCompute() {} + +void ProgramManager::BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); +} + +void ProgramManager::UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); +} + +void ProgramManager::UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index a84f5aeb3..07ffab77f 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,8 +6,6 @@ #include <array> #include <span> -#include <glad/glad.h> - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -16,121 +14,28 @@ namespace OpenGL { class ProgramManager { static constexpr size_t NUM_STAGES = 5; - static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ - GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, - GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, - }; - public: - explicit ProgramManager(const Device& device) { - glCreateProgramPipelines(1, &pipeline.handle); - if (device.UseAssemblyShaders()) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } - } - - void BindComputeProgram(GLuint program) { - glUseProgram(program); - is_compute_bound = true; - } - - void BindComputeAssemblyProgram(GLuint program) { - if (current_assembly_compute_program != program) { - current_assembly_compute_program = program; - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } - UnbindPipeline(); - } - - void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { - static constexpr std::array<GLenum, 5> stage_enums{ - GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, - GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, - }; - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); - } - } - BindPipeline(); - } - - void BindPresentPrograms(GLuint vertex, GLuint fragment) { - if (current_programs[0] != vertex) { - current_programs[0] = vertex; - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); - } - if (current_programs[4] != fragment) { - current_programs[4] = fragment; - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); - } - glUseProgramStages( - pipeline.handle, - GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); - current_programs[1] = 0; - current_programs[2] = 0; - current_programs[3] = 0; - - if (current_stage_mask != 0) { - current_stage_mask = 0; - for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { - glDisable(program_type); - } - } - BindPipeline(); - } + explicit ProgramManager(const Device& device); + + void BindComputeProgram(GLuint program); + + void BindComputeAssemblyProgram(GLuint program); + + void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs); + + void BindPresentPrograms(GLuint vertex, GLuint fragment); void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, - u32 stage_mask) { - const u32 changed_mask = current_stage_mask ^ stage_mask; - current_stage_mask = stage_mask; - - if (changed_mask != 0) { - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (((changed_mask >> stage) & 1) != 0) { - if (((stage_mask >> stage) & 1) != 0) { - glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } else { - glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } - } - } - } - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); - } - } - UnbindPipeline(); - } - - void RestoreGuestCompute() {} + u32 stage_mask); + + void RestoreGuestCompute(); private: - void BindPipeline() { - if (!is_pipeline_bound) { - is_pipeline_bound = true; - glBindProgramPipeline(pipeline.handle); - } - UnbindCompute(); - } - - void UnbindPipeline() { - if (is_pipeline_bound) { - is_pipeline_bound = false; - glBindProgramPipeline(0); - } - UnbindCompute(); - } - - void UnbindCompute() { - if (is_compute_bound) { - is_compute_bound = false; - glUseProgram(0); - } - } + void BindPipeline(); + + void UnbindPipeline(); + + void UnbindCompute(); OGLPipeline pipeline; bool is_pipeline_bound{}; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 3f2b139e0..dd00d3edf 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,13 +4,13 @@ #include <algorithm> #include "common/settings.h" +#include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" -#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri } void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region) { + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; - const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x); - const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y); + const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width); + const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height); const PushConstants push_constants{ .tex_scale = {scale_x, scale_y}, - .tex_offset = {static_cast<float>(src_region.start.x), - static_cast<float>(src_region.start.y)}, + .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)}, }; cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); @@ -347,6 +350,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) { .height = is_rescaled ? resolution.ScaleUp(height) : height, }; } + +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = flags, + .dstAccessMask = flags, + .oldLayout = source_layout, + .newLayout = target_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, barrier); +} + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { + const VkRenderPass render_pass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = render_pass, + .framebuffer = framebuffer_handle, + .renderArea{ + .offset{}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, @@ -365,7 +413,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(two_textures_set_layout.address()))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), - blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), @@ -404,6 +452,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView scheduler.InvalidateState(); } +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, + src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); + BeginRenderPass(cmdbuf, dst_framebuffer); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); +} + void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 5df679fb4..be8a9a2f6 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -10,6 +10,8 @@ namespace Vulkan { +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; using VideoCommon::Region2D; class Device; @@ -36,6 +38,10 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size); + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b75b8eec6..86ef0daeb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -266,6 +266,35 @@ void RasterizerVulkan::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerVulkan::DrawTexture() { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + UpdateDynamicStates(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), + texture.ImageHandle(), sampler->Handle(), dst_region, src_region, + texture.size); +} + void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 472cc64d9..a0508b57c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -66,6 +66,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87152c8e9..1b01990a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -149,6 +149,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { } template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept { + const auto image_view_id = VisitImageView(channel_state->graphics_image_table, + channel_state->graphics_image_view_ids, index); + return slot_image_views[image_view_id]; +} + +template <class P> void TextureCache<P>::MarkModification(ImageId id) noexcept { MarkModification(slot_images[id]); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4eea1f609..485eaabaa 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -129,6 +129,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Get the imageview from the graphics descriptor table in the specified index + [[nodiscard]] ImageView& GetImageView(u32 index) noexcept; + /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index aa02cc63c..bb9910a53 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -366,6 +366,11 @@ </item> <item> <property name="text"> + <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string> + </property> + </item> + <item> + <property name="text"> <string>2X (1440p/2160p)</string> </property> </item> @@ -389,6 +394,16 @@ <string>6X (4320p/6480p)</string> </property> </item> + <item> + <property name="text"> + <string>7X (5040p/7560p)</string> + </property> + </item> + <item> + <property name="text"> + <string>8X (5760p/8640p)</string> + </property> + </item> </widget> </item> </layout> |