summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-05-26 23:32:59 +0200
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:33 +0200
commitadb591a757ccb289634920d51cf519b515ca32b6 (patch)
tree987c5cd0a346e69633791ad5ec355b6104ab036e /src/video_core
parentglasm: Implement Y direction (diff)
downloadyuzu-adb591a757ccb289634920d51cf519b515ca32b6.tar
yuzu-adb591a757ccb289634920d51cf519b515ca32b6.tar.gz
yuzu-adb591a757ccb289634920d51cf519b515ca32b6.tar.bz2
yuzu-adb591a757ccb289634920d51cf519b515ca32b6.tar.lz
yuzu-adb591a757ccb289634920d51cf519b515ca32b6.tar.xz
yuzu-adb591a757ccb289634920d51cf519b515ca32b6.tar.zst
yuzu-adb591a757ccb289634920d51cf519b515ca32b6.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h12
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp30
11 files changed, 120 insertions, 67 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2d0ef1307..334ed470f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ const GLuint base_binding = graphics_base_storage_bindings[stage];
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else {
- const GLuint base_binding = graphics_base_storage_bindings[stage];
- const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ if (size != 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ }
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else if (size == 0) {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 4986c65fd..bc16abafb 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -147,6 +147,10 @@ public:
image_handles = image_handles_;
}
+ void SetEnableStorageBuffers(bool use_storage_buffers_) {
+ use_storage_buffers = use_storage_buffers_;
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -160,6 +164,8 @@ private:
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
+ bool use_storage_buffers = false;
+
u32 max_attributes = 0;
std::array<GLuint, 5> graphics_base_uniform_bindings{};
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 700ebd8b8..5cf5f97a9 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -17,6 +17,15 @@ using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
size_t ComputePipelineKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
@@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
return std::memcmp(this, &rhs, sizeof *this) == 0;
}
-ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
- Tegra::MemoryManager& gpu_memory_,
+ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::KeplerCompute& kepler_compute_,
ProgramManager& program_manager_, const Shader::Info& info_,
OGLProgram source_program_, OGLAssemblyProgram assembly_program_)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_},
source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} {
- for (const auto& desc : info.texture_buffer_descriptors) {
- num_texture_buffers += desc.count;
- }
- for (const auto& desc : info.image_buffer_descriptors) {
- num_image_buffers += desc.count;
- }
- u32 num_textures = num_texture_buffers;
- for (const auto& desc : info.texture_descriptors) {
- num_textures += desc.count;
- }
+
+ num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
+ num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+
+ const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
ASSERT(num_textures <= MAX_TEXTURES);
- u32 num_images = num_image_buffers;
- for (const auto& desc : info.image_descriptors) {
- num_images += desc.count;
- }
+ const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
ASSERT(num_images <= MAX_IMAGES);
+
+ const bool is_glasm{assembly_program.handle != 0};
+ const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+ use_storage_buffers =
+ !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory = !use_storage_buffers &&
+ std::ranges::any_of(info.storage_buffers_descriptors,
+ [](const auto& desc) { return desc.is_written; });
}
void ComputePipeline::Configure() {
@@ -150,6 +159,7 @@ void ComputePipeline::Configure() {
buffer_cache.UpdateComputeBuffers();
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index e3b94e2f3..dd6b62ef2 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -28,6 +28,7 @@ struct Info;
namespace OpenGL {
+class Device;
class ProgramManager;
struct ComputePipelineKey {
@@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
class ComputePipeline {
public:
- explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
- Tegra::MemoryManager& gpu_memory_,
+ explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::KeplerCompute& kepler_compute_,
ProgramManager& program_manager_, const Shader::Info& info_,
OGLProgram source_program_, OGLAssemblyProgram assembly_program_);
void Configure();
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
@@ -70,6 +75,9 @@ private:
u32 num_texture_buffers{};
u32 num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 18bbc4c1f..01da2bb57 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -135,13 +135,13 @@ Device::Device() {
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
-
max_uniform_buffers = BuildMaxUniformBuffers();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
+ max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -236,22 +236,6 @@ std::string Device::GetVendorName() const {
return vendor_name;
}
-Device::Device(std::nullptr_t) {
- max_uniform_buffers.fill(std::numeric_limits<u32>::max());
- uniform_buffer_alignment = 4;
- shader_storage_alignment = 4;
- max_vertex_attributes = 16;
- max_varyings = 15;
- max_compute_shared_memory_size = 0x10000;
- has_warp_intrinsics = true;
- has_shader_ballot = true;
- has_vertex_viewport_layer = true;
- has_image_load_formatted = true;
- has_texture_shadow_lod = true;
- has_variable_aoffi = true;
- has_depth_buffer_float = true;
-}
-
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 152a3acd3..d67f5693c 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -13,7 +13,6 @@ namespace OpenGL {
class Device {
public:
explicit Device();
- explicit Device(std::nullptr_t);
[[nodiscard]] std::string GetVendorName() const;
@@ -41,6 +40,10 @@ public:
return max_compute_shared_memory_size;
}
+ u32 GetMaxGLASMStorageBufferBlocks() const {
+ return max_glasm_storage_buffer_blocks;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -124,6 +127,7 @@ private:
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
+ u32 max_glasm_storage_buffer_blocks{};
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 32df35202..19d85c482 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
template <typename Range>
-u32 AccumulateCount(Range&& range) {
+u32 AccumulateCount(const Range& range) {
u32 num{};
for (const auto& desc : range) {
num += desc.count;
@@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc
return std::memcmp(this, &rhs, Size()) == 0;
}
-GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
- Tegra::MemoryManager& gpu_memory_,
+GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
OGLProgram program_,
@@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
}
u32 num_textures{};
u32 num_images{};
+ u32 num_storage_buffers{};
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
const auto& info{stage_infos[stage]};
if (stage < 4) {
@@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
num_textures += AccumulateCount(info.texture_descriptors);
num_images += AccumulateCount(info.image_descriptors);
+ num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+
+ writes_global_memory |= std::ranges::any_of(
+ info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
- if (assembly_programs[0].handle != 0 && xfb_state) {
+ const bool assembly_shaders{assembly_programs[0].handle != 0};
+ use_storage_buffers =
+ !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory &= !use_storage_buffers;
+
+ if (assembly_shaders && xfb_state) {
GenerateTransformFeedbackState(*xfb_state);
}
}
@@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) {
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
const auto& regs{maxwell3d.regs};
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 62f700cf5..c1113e180 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -20,6 +20,7 @@
namespace OpenGL {
+class Device;
class ProgramManager;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
class GraphicsPipeline {
public:
- explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_,
- Tegra::MemoryManager& gpu_memory_,
+ explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
OGLProgram program_,
@@ -77,6 +78,10 @@ public:
}
}
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
private:
void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state);
@@ -99,6 +104,9 @@ private:
std::array<u32, 5> num_texture_buffers{};
std::array<u32, 5> num_image_buffers{};
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};
GLsizei num_xfb_strides{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eec01e8c2..5d4e80364 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
EndTransformFeedback();
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute() {
- ComputePipeline* const program{shader_cache.CurrentComputePipeline()};
- if (!program) {
+ ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
+ if (!pipeline) {
return;
}
- program->Configure();
+ pipeline->Configure();
const auto& qmd{kepler_compute.launch_description};
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() {
// Make sure memory stored from the previous GL command stream is visible
// This is only needed on assembly shaders where we write to GPU memory with raw pointers
- // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used
- // and prefer using NV_shader_storage_buffer_object when possible
- if (Settings::values.use_assembly_shaders.GetValue()) {
+ if (has_written_global_memory) {
+ has_written_global_memory = false;
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
}
glFlush();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index afd43b2ee..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -225,7 +225,8 @@ private:
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
- std::size_t num_queued_commands = 0;
+ size_t num_queued_commands = 0;
+ bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3aa5ac31d..287f497b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) {
}
Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
- const Shader::IR::Program& program) {
+ const Shader::IR::Program& program,
+ bool glasm_use_storage_buffers) {
Shader::RuntimeInfo info;
switch (program.stage) {
case Shader::Stage::TessellationEval:
@@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
info.input_topology = Shader::InputTopology::TrianglesAdjacency;
break;
}
+ info.glasm_use_storage_buffers = glasm_use_storage_buffers;
return info;
}
@@ -435,7 +437,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs,
bool build_in_parallel) {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
- size_t env_index{0};
+ size_t env_index{};
+ u32 total_storage_buffers{};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
if (key.unique_hashes[index] == 0) {
@@ -447,7 +450,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
+
+ for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
+ }
}
+ const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
+ const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
+
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
OGLProgram source_program;
@@ -466,7 +476,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const size_t stage_index{index - 1};
infos[stage_index] = &program.info;
- const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)};
+ const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)};
if (device.UseAssemblyShaders()) {
const std::string code{EmitGLASM(profile, runtime_info, program, binding)};
assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index));
@@ -479,7 +489,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
LinkProgram(source_program.handle);
}
return std::make_unique<GraphicsPipeline>(
- texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
+ device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
std::move(source_program), std::move(assembly_programs), infos,
key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
}
@@ -508,10 +518,18 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
+
+ u32 num_storage_buffers{};
+ for (const auto& desc : program.info.storage_buffers_descriptors) {
+ num_storage_buffers += desc.count;
+ }
+ Shader::RuntimeInfo info;
+ info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+
OGLAssemblyProgram asm_program;
OGLProgram source_program;
if (device.UseAssemblyShaders()) {
- const std::string code{EmitGLASM(profile, program)};
+ const std::string code{EmitGLASM(profile, info, program)};
asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
} else {
const std::vector<u32> code{EmitSPIRV(profile, program)};
@@ -519,7 +537,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
AddShader(GL_COMPUTE_SHADER, source_program.handle, code);
LinkProgram(source_program.handle);
}
- return std::make_unique<ComputePipeline>(texture_cache, buffer_cache, gpu_memory,
+ return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
kepler_compute, program_manager, program.info,
std::move(source_program), std::move(asm_program));
}