diff options
-rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 132 |
1 files changed, 63 insertions, 69 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 5e922d1fe..4014d4bfe 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -804,11 +804,7 @@ uint UnquantizeTexelWeight(EncodingData val) { return result; } -uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; - void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { - const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); - const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); const uint num_planes = is_dual_plane ? 2 : 1; const uint area = size.x * size.y; const uint loop_count = min(result_index, area * num_planes); @@ -818,58 +814,71 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { result_vector[array_index][vector_index] = UnquantizeTexelWeight(GetEncodingFromVector(itr)); } - for (uint plane = 0; plane < num_planes; ++plane) { - for (uint t = 0; t < block_dims.y; t++) { - for (uint s = 0; s < block_dims.x; s++) { - const uint cs = Ds * s; - const uint ct = Dt * t; - const uint gs = (cs * (size.x - 1) + 32) >> 6; - const uint gt = (ct * (size.y - 1) + 32) >> 6; - const uint js = gs >> 4; - const uint fs = gs & 0xF; - const uint jt = gt >> 4; - const uint ft = gt & 0x0F; - const uint w11 = (fs * ft + 8) >> 4; - const uint w10 = ft - w11; - const uint w01 = fs - w11; - const uint w00 = 16 - fs - ft + w11; - const uvec4 w = uvec4(w00, w01, w10, w11); - const uint v0 = jt * size.x + js; - - uvec4 p = uvec4(0); - -#define VectorIndicesFromBase(offset_base) \ - const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \ - const uint array_index = offset / 4; \ +} + +uint GetUnquantizedTexelWieght(uint offset_base, uint plane, bool is_dual_plane) { + const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; + const uint array_index = offset / 4; const uint vector_index = offset % 4; + return result_vector[array_index][vector_index]; +} - if (v0 < area) { - const uint offset_base = v0; - VectorIndicesFromBase(offset_base); - p.x = result_vector[array_index][vector_index]; - } - if ((v0 + 1) < (area)) { - const uint offset_base = v0 + 1; - VectorIndicesFromBase(offset_base); - p.y = result_vector[array_index][vector_index]; - } - if ((v0 + size.x) < (area)) { - const uint offset_base = v0 + size.x; - VectorIndicesFromBase(offset_base); - p.z = result_vector[array_index][vector_index]; - } - if ((v0 + size.x + 1) < (area)) { - const uint offset_base = v0 + size.x + 1; - VectorIndicesFromBase(offset_base); - p.w = result_vector[array_index][vector_index]; - } - const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; - const uint array_index = offset / 4; - const uint vector_index = offset % 4; - unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; - } +uvec4 GetUnquantizedWeightVector(uint t, uint s, uvec2 size, uint plane_index, bool is_dual_plane) { + const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); + const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); + const uint area = size.x * size.y; + + const uint cs = Ds * s; + const uint ct = Dt * t; + const uint gs = (cs * (size.x - 1) + 32) >> 6; + const uint gt = (ct * (size.y - 1) + 32) >> 6; + const uint js = gs >> 4; + const uint fs = gs & 0xF; + const uint jt = gt >> 4; + const uint ft = gt & 0x0F; + const uint w11 = (fs * ft + 8) >> 4; + const uint w10 = ft - w11; + const uint w01 = fs - w11; + const uint w00 = 16 - fs - ft + w11; + const uvec4 w = uvec4(w00, w01, w10, w11); + const uint v0 = jt * size.x + js; + + uvec4 p0 = uvec4(0); + uvec4 p1 = uvec4(0); + + if (v0 < area) { + const uint offset_base = v0; + p0.x = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); + p1.x = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); + } + if ((v0 + 1) < (area)) { + const uint offset_base = v0 + 1; + p0.y = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); + p1.y = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); + } + if ((v0 + size.x) < (area)) { + const uint offset_base = v0 + size.x; + p0.z = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); + p1.z = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); + } + if ((v0 + size.x + 1) < (area)) { + const uint offset_base = v0 + size.x + 1; + p0.w = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); + p1.w = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); + } + + const uint primary_weight = (uint(dot(p0, w)) + 8) >> 4; + + uvec4 weight_vec = uvec4(primary_weight); + + if (is_dual_plane) { + const uint secondary_weight = (uint(dot(p1, w)) + 8) >> 4; + for (uint c = 0; c < 4; c++) { + const bool is_secondary = ((plane_index + 1u) & 3u) == c; + weight_vec[c] = is_secondary ? secondary_weight : primary_weight; } } + return weight_vec; } int FindLayout(uint mode) { @@ -1155,25 +1164,10 @@ void DecompressBlock(ivec3 coord) { } const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); - const uint weight_offset = (j * block_dims.x + i); - const uint array_index = weight_offset / 4; - const uint vector_index = weight_offset % 4; - const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; - uvec4 weight_vec = uvec4(primary_weight); - if (dual_plane) { - const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; - const uint secondary_array_index = secondary_weight_offset / 4; - const uint secondary_vector_index = secondary_weight_offset % 4; - const uint secondary_weight = - unquantized_texel_weights[secondary_array_index][secondary_vector_index]; - for (uint c = 0; c < 4; c++) { - const bool is_secondary = ((plane_index + 1u) & 3u) == c; - weight_vec[c] = is_secondary ? secondary_weight : primary_weight; - } - } + const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane); const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); - const vec4 p = (Cf / 65535.0); + const vec4 p = (Cf / 65535.0f); imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); } } |