summaryrefslogtreecommitdiffstats
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
authorAmeer J <52414509+ameerj@users.noreply.github.com>2023-08-03 00:15:59 +0200
committerAmeer J <52414509+ameerj@users.noreply.github.com>2023-08-06 20:54:58 +0200
commit8ce158bce6912b2263f1724e6c09d8b517ef18c3 (patch)
treeded2c5ccef3c5def9b4ce03b510c5da8e5ea48a9 /src/video_core/host_shaders
parentvulkan dims specialization (diff)
downloadyuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.gz
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.bz2
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.lz
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.xz
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.zst
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.zip
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt57
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp42
-rw-r--r--src/video_core/host_shaders/astc_decoder_spv_includes.h20
3 files changed, 16 insertions, 103 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 20e8388ee..e61d9af80 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -13,11 +13,6 @@ set(GLSL_INCLUDES
${FIDELITYFX_FILES}
)
-set(ASTC_INCLUDES
- # astc_decoder_glsl_includes.h
- astc_decoder_spv_includes.h
-)
-
set(SHADER_FILES
astc_decoder.comp
blit_color_float.frag
@@ -100,60 +95,9 @@ if (NOT GLSLANG_ERROR STREQUAL "")
set(QUIET_FLAG "")
endif()
-macro(ASTC_GEN)
- # paired list of valid astc block dimensions
- set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12)
- set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12)
- list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS)
- math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1")
- foreach(i RANGE ${NUM_ASTC_FORMATS})
- list(GET ASTC_WIDTHS ${i} ASTC_WIDTH)
- list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT)
-
- # Vulkan SPIR-V Specialization
-
- string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME)
- set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h)
- add_custom_command(
- OUTPUT
- ${SPIRV_HEADER_FILE}
- COMMAND
- ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
- MAIN_DEPENDENCY
- ${SOURCE_FILE}
- )
- set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
-
- # GLSL Specialization
- # Disabled as there was no noticeable performance uplift specializing the shaders for OGL
-
- # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h)
- # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}")
- # set(DEFINES_LINE_NUMBER 14)
- # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME)
- # add_custom_command(
- # OUTPUT
- # ${SOURCE_HEADER_FILE}
- # COMMAND
- # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME}
- # MAIN_DEPENDENCY
- # ${SOURCE_FILE}
- # DEPENDS
- # ${INPUT_FILE}
- # ${SOURCE_FILE}
- # )
- # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
- endforeach()
-endmacro()
-
foreach(FILENAME IN ITEMS ${SHADER_FILES})
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
-
- if (${FILENAME} MATCHES "astc_decoder.comp")
- ASTC_GEN()
- endif()
-
# Skip generating source headers on Vulkan exclusive files
if (NOT ${FILENAME} MATCHES "vulkan.*")
set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
@@ -207,7 +151,6 @@ endforeach()
set(SHADER_SOURCES ${SHADER_FILES})
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
-list(APPEND SHADER_SOURCES ${ASTC_INCLUDES})
add_custom_target(host_shaders
DEPENDS
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index b4bb8299f..a33c916ac 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -24,9 +24,7 @@
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
BEGIN_PUSH_CONSTANTS
-#ifndef BLOCK_WIDTH
UNIFORM(1) uvec2 block_dims;
-#endif
UNIFORM(2) uint layer_stride;
UNIFORM(3) uint block_size;
UNIFORM(4) uint x_shift;
@@ -77,15 +75,7 @@ int color_bitsread = 0;
// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode
// So the maximum would be 144 (12 x 12) elements, x 2 for two planes
#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor
-
-#ifndef BLOCK_WIDTH
-#define BLOCK_WIDTH block_dims.x
-#define BLOCK_HEIGHT block_dims.y
#define ARRAY_NUM_ELEMENTS 144
-#else
-#define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT
-#endif
-
#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4)
uvec4 result_vector[VECTOR_ARRAY_SIZE];
@@ -275,7 +265,7 @@ uint Hash52(uint p) {
}
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) {
- if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) {
+ if ((block_dims.y * block_dims.x) < 32) {
x <<= 1;
y <<= 1;
}
@@ -888,8 +878,8 @@ uint UnquantizeTexelWeight(EncodingData val) {
uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
- const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1));
- const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1));
+ const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
+ const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
const uint num_planes = is_dual_plane ? 2 : 1;
const uint area = size.x * size.y;
const uint loop_count = min(result_index, area * num_planes);
@@ -900,8 +890,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
UnquantizeTexelWeight(GetEncodingFromVector(itr));
}
for (uint plane = 0; plane < num_planes; ++plane) {
- for (uint t = 0; t < BLOCK_HEIGHT; t++) {
- for (uint s = 0; s < BLOCK_WIDTH; s++) {
+ for (uint t = 0; t < block_dims.y; t++) {
+ for (uint s = 0; s < block_dims.x; s++) {
const uint cs = Ds * s;
const uint ct = Dt * t;
const uint gs = (cs * (size.x - 1) + 32) >> 6;
@@ -944,7 +934,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
VectorIndicesFromBase(offset_base);
p.w = result_vector[array_index][vector_index];
}
- const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane;
+ const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane;
const uint array_index = offset / 4;
const uint vector_index = offset % 4;
unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
@@ -986,8 +976,8 @@ int FindLayout(uint mode) {
void FillError(ivec3 coord) {
- for (uint j = 0; j < BLOCK_HEIGHT; j++) {
- for (uint i = 0; i < BLOCK_WIDTH; i++) {
+ for (uint j = 0; j < block_dims.y; j++) {
+ for (uint i = 0; i < block_dims.x; i++) {
imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
}
}
@@ -1003,8 +993,8 @@ void FillVoidExtentLDR(ivec3 coord) {
const float r = float(r_u) / 65535.0f;
const float g = float(g_u) / 65535.0f;
const float b = float(b_u) / 65535.0f;
- for (uint j = 0; j < BLOCK_HEIGHT; j++) {
- for (uint i = 0; i < BLOCK_WIDTH; i++) {
+ for (uint j = 0; j < block_dims.y; j++) {
+ for (uint i = 0; i < block_dims.x; i++) {
imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a));
}
}
@@ -1099,7 +1089,7 @@ void DecompressBlock(ivec3 coord) {
return;
}
const uvec2 size_params = DecodeBlockSize(mode);
- if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) {
+ if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) {
FillError(coord);
return;
}
@@ -1228,21 +1218,21 @@ void DecompressBlock(ivec3 coord) {
DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
UnquantizeTexelWeights(size_params, dual_plane);
- for (uint j = 0; j < BLOCK_HEIGHT; j++) {
- for (uint i = 0; i < BLOCK_WIDTH; i++) {
+ for (uint j = 0; j < block_dims.y; j++) {
+ for (uint i = 0; i < block_dims.x; i++) {
uint local_partition = 0;
if (num_partitions > 1) {
local_partition = Select2DPartition(partition_index, i, j, num_partitions);
}
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
- const uint weight_offset = (j * BLOCK_WIDTH + i);
+ const uint weight_offset = (j * block_dims.x + i);
const uint array_index = weight_offset / 4;
const uint vector_index = weight_offset % 4;
const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
uvec4 weight_vec = uvec4(primary_weight);
if (dual_plane) {
- const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS;
+ const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS;
const uint secondary_array_index = secondary_weight_offset / 4;
const uint secondary_vector_index = secondary_weight_offset % 4;
const uint secondary_weight =
@@ -1280,7 +1270,7 @@ void main() {
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
offset += swizzle;
- const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1));
+ const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
return;
}
diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h
deleted file mode 100644
index 44ee50c5f..000000000
--- a/src/video_core/host_shaders/astc_decoder_spv_includes.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h"
-#include "video_core/host_shaders/astc_decoder_comp_spv.h"