summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp12
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
5 files changed, 43 insertions, 20 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 5456d4e5b..c6325e55f 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
void EmitContext::SetupExtensions(std::string&) {
header += "#extension GL_ARB_separate_shader_objects : enable\n";
- header += "#extension GL_ARB_sparse_texture2 : enable\n";
- header += "#extension GL_EXT_texture_shadow_lod : enable\n";
- // header += "#extension GL_ARB_texture_cube_map_array : enable\n";
+ if (stage != Stage::Compute) {
+ // TODO: track this usage
+ header += "#extension GL_ARB_sparse_texture2 : enable\n";
+ header += "#extension GL_EXT_texture_shadow_lod : enable\n";
+ }
if (info.uses_int64) {
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
}
@@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) {
info.uses_subgroup_shuffles || info.uses_fswzadd) {
header += "#extension GL_ARB_shader_ballot : enable\n";
header += "#extension GL_ARB_shader_group_vote : enable\n";
+ header += "#extension GL_KHR_shader_subgroup_basic : enable\n";
+ if (!info.uses_int64) {
+ header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+ }
}
}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index e462c977c..8a018acb5 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in
}
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
- ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
- // TODO:
- // if (ctx.profile.warp_size_potentially_larger_than_guest) {
- // }
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
+ ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+ }
}
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
- ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
- // TODO:
- // if (ctx.profile.warp_size_potentially_larger_than_guest) {
- // }
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
+ ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+ }
}
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
- ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
- // TODO:
- // if (ctx.profile.warp_size_potentially_larger_than_guest) {
- // }
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+ } else {
+ const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
+ const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
+ const auto value{fmt::format("({}^{})", ballot, active_mask)};
+ ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+ }
}
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
- ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
- // TODO:
- // if (ctx.profile.warp_size_potentially_larger_than_guest) {
- // }
+ if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+ ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
+ } else {
+ ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred);
+ }
}
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 071133781..20ea42cff 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -160,6 +160,7 @@ Device::Device() {
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
+ warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9b9402c29..ff0ff2b08 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -128,6 +128,10 @@ public:
return has_amd_shader_half_float;
}
+ bool IsWarpSizePotentiallyLargerThanGuest() const {
+ return warp_size_potentially_larger_than_guest;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -161,6 +165,7 @@ private:
bool has_depth_buffer_float{};
bool has_nv_gpu_shader_5{};
bool has_amd_shader_half_float{};
+ bool warp_size_potentially_larger_than_guest{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 8a052851b..cd11ff653 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
- .warp_size_potentially_larger_than_guest = true,
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
.lower_left_origin_mode = true,
.need_declared_frag_colors = true,