diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 31 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 13 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 83 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.h | 18 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_resource_pool.cpp | 14 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_resource_pool.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_state_tracker.cpp | 34 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_state_tracker.h | 5 | ||||
-rw-r--r-- | src/video_core/shader/async_shaders.cpp | 1 |
13 files changed, 152 insertions, 71 deletions
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 31eb54123..12434db67 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -763,6 +763,37 @@ void Image::DownloadMemory(ImageBufferMap& map, } } +GLuint Image::StorageHandle() noexcept { + switch (info.format) { + case PixelFormat::A8B8G8R8_SRGB: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC2_SRGB: + case PixelFormat::BC3_SRGB: + case PixelFormat::BC7_SRGB: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5_SRGB: + if (store_view.handle != 0) { + return store_view.handle; + } + store_view.Create(); + glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0, + info.resources.levels, 0, info.resources.layers); + return store_view.handle; + default: + return texture.handle; + } +} + void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) { // Compressed formats don't have a pixel format or type const bool is_compressed = gl_format == GL_NONE; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 874cf54f4..a6172f009 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -145,6 +145,8 @@ public: void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); + GLuint StorageHandle() noexcept; + GLuint Handle() const noexcept { return texture.handle; } @@ -155,8 +157,8 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLTextureView store_view; OGLBuffer buffer; + OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 1b58e8617..31ec68505 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -93,7 +93,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, glUniform1ui(7, params.block_height_mask); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } @@ -134,7 +134,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, glUniform1ui(9, params.block_depth_mask); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); } @@ -164,7 +164,8 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, glUniform2i(LOC_DESTINATION, 0, 0); glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); glUniform1ui(LOC_PITCH, pitch); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, + format); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; const size_t input_offset = swizzle.buffer_offset + map.offset; @@ -195,9 +196,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); - glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, - GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); - glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), + copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5be6dabd9..362278f01 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -12,14 +12,15 @@ #include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { namespace { -constexpr std::size_t POINT = 0; -constexpr std::size_t LINE = 1; -constexpr std::size_t POLYGON = 2; +constexpr size_t POINT = 0; +constexpr size_t LINE = 1; +constexpr size_t POLYGON = 2; constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POINT, // Points LINE, // Lines @@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace -void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { - const std::array enabled_lut = {regs.polygon_offset_point_enable, - regs.polygon_offset_line_enable, - regs.polygon_offset_fill_enable}; +void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, + bool has_extended_dynamic_state) { + const Maxwell& regs = maxwell3d.regs; + const std::array enabled_lut{ + regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable, + }; const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); raw1 = 0; @@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta raw2 = 0; const auto test_func = - regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; + regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); point_size = Common::BitCast<u32>(regs.point_size); - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - binding_divisors[index] = - regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { + maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + } } - - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast<u32>(input.type.Value())); - attribute.size.Assign(static_cast<u32>(input.size.Value())); - attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { + maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast<u32>(input.type.Value())); + attribute.size.Assign(static_cast<u32>(input.size.Value())); + } } - - for (std::size_t index = 0; index < std::size(attachments); ++index) { - attachments[index].Fill(regs, index); + if (maxwell3d.dirty.flags[Dirty::Blending]) { + maxwell3d.dirty.flags[Dirty::Blending] = false; + for (size_t index = 0; index < attachments.size(); ++index) { + attachments[index].Refresh(regs, index); + } + } + if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) { + maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false; + const auto& transform = regs.viewport_transform; + std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) { + return static_cast<u16>(viewport.swizzle.raw); + }); } - - const auto& transform = regs.viewport_transform; - std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(), - [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); }); - if (!has_extended_dynamic_state) { no_extended_dynamic_state.Assign(1); - dynamic_state.Fill(regs); + dynamic_state.Refresh(regs); } } -void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { +void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; raw = 0; @@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } -void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { +void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { // Flip front face @@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { }); } -std::size_t FixedPipelineState::Hash() const noexcept { +size_t FixedPipelineState::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); - return static_cast<std::size_t>(hash); + return static_cast<size_t>(hash); } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 465a55fdb..a0eb83a68 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -58,7 +58,7 @@ struct FixedPipelineState { BitField<30, 1, u32> enable; }; - void Fill(const Maxwell& regs, std::size_t index); + void Refresh(const Maxwell& regs, size_t index); constexpr std::array<bool, 4> Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; @@ -96,8 +96,6 @@ struct FixedPipelineState { BitField<6, 14, u32> offset; BitField<20, 3, u32> type; BitField<23, 6, u32> size; - // Not really an element of a vertex attribute, but it can be packed here - BitField<29, 1, u32> binding_index_enabled; constexpr Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); @@ -108,7 +106,7 @@ struct FixedPipelineState { } }; - template <std::size_t Position> + template <size_t Position> union StencilFace { BitField<Position + 0, 3, u32> action_stencil_fail; BitField<Position + 3, 3, u32> action_depth_fail; @@ -152,7 +150,7 @@ struct FixedPipelineState { // Vertex stride is a 12 bits value, we have 4 bits to spare per element std::array<u16, Maxwell::NumVertexArrays> vertex_strides; - void Fill(const Maxwell& regs); + void Refresh(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { return UnpackComparisonOp(depth_test_func); @@ -199,9 +197,9 @@ struct FixedPipelineState { std::array<u16, Maxwell::NumViewports> viewport_swizzles; DynamicState dynamic_state; - void Fill(const Maxwell& regs, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const FixedPipelineState& rhs) const noexcept; @@ -209,8 +207,8 @@ struct FixedPipelineState { return !operator==(rhs); } - std::size_t Size() const noexcept { - const std::size_t total_size = sizeof *this; + size_t Size() const noexcept { + const size_t total_size = sizeof *this; return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); } }; @@ -224,7 +222,7 @@ namespace std { template <> struct hash<Vulkan::FixedPipelineState> { - std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { + size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { return k.Hash(); } }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d50dca604..fc6dd83eb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (state.attributes[index].binding_index_enabled == 0) { - continue; - } const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 684d4e3a6..dfd38f575 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -267,8 +267,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - GraphicsPipelineCacheKey key; - key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); + graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; @@ -276,14 +275,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { texture_cache.UpdateRenderTargets(false); const auto shaders = pipeline_cache.GetShaders(); - key.shaders = GetShaderAddresses(shaders); + graphics_key.shaders = GetShaderAddresses(shaders); + SetupShaderDescriptors(shaders, is_indexed); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - key.renderpass = framebuffer->RenderPass(); + graphics_key.renderpass = framebuffer->RenderPass(); - auto* const pipeline = - pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); + VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( + graphics_key, framebuffer->NumColorBuffers(), async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7fc6741da..acea1ba2d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -20,6 +20,7 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -173,6 +174,8 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; + GraphicsPipelineCacheKey graphics_key; + TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index ee274ac59..a8bf7bda8 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -17,21 +17,21 @@ ResourcePool::~ResourcePool() = default; size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results master_semaphore.Refresh(); - - const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> { + const u64 gpu_tick = master_semaphore.KnownGpuTick(); + const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { for (size_t iterator = begin; iterator < end; ++iterator) { - if (master_semaphore.IsFree(ticks[iterator])) { + if (gpu_tick >= ticks[iterator]) { ticks[iterator] = master_semaphore.CurrentTick(); return iterator; } } - return {}; + return std::nullopt; }; // Try to find a free resource from the hinted position to the end. - auto found = search(free_iterator, ticks.size()); + std::optional<size_t> found = search(hint_iterator, ticks.size()); if (!found) { // Search from beginning to the hinted position. - found = search(0, free_iterator); + found = search(0, hint_iterator); if (!found) { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); @@ -41,7 +41,7 @@ size_t ResourcePool::CommitResource() { } } // Free iterator is hinted to the resource after the one that's been commited. - free_iterator = (*found + 1) % ticks.size(); + hint_iterator = (*found + 1) % ticks.size(); return *found; } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index a018c7ec2..9d0bb3b4d 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -36,7 +36,7 @@ private: MasterSemaphore& master_semaphore; size_t grow_step = 0; ///< Number of new resources created after an overflow - size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found + size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector<u64> ticks; ///< Ticks for each resource }; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index e81fad007..956f86845 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -18,9 +18,7 @@ #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace Vulkan { - namespace { - using namespace Dirty; using namespace VideoCommon::Dirty; using Tegra::Engines::Maxwell3D; @@ -128,6 +126,34 @@ void SetupDirtyStencilTestEnable(Tables& tables) { tables[0][OFF(stencil_enable)] = StencilTestEnable; } +void SetupDirtyBlending(Tables& tables) { + tables[0][OFF(color_mask_common)] = Blending; + tables[0][OFF(independent_blend_enable)] = Blending; + FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending); + FillBlock(tables[0], OFF(blend), NUM(blend), Blending); + FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); +} + +void SetupDirtyInstanceDivisors(Tables& tables) { + static constexpr size_t divisor_offset = 3; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; + tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = + InstanceDivisors; + } +} + +void SetupDirtyVertexAttributes(Tables& tables) { + FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); +} + +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; + } +} } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) @@ -148,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyFrontFace(tables); SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); + SetupDirtyBlending(tables); + SetupDirtyInstanceDivisors(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyViewportSwizzles(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index c335d2bdf..84e918a71 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -35,6 +35,11 @@ enum : u8 { StencilOp, StencilTestEnable, + Blending, + InstanceDivisors, + VertexAttributes, + ViewportSwizzles, + Last }; static_assert(Last <= std::numeric_limits<u8>::max()); diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 3b40db9bc..02adcf9c7 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -64,6 +64,7 @@ void AsyncShaders::FreeWorkers() { void AsyncShaders::KillWorkers() { is_thread_exiting.store(true); + cv.notify_all(); for (auto& thread : worker_threads) { thread.detach(); } |