summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt8
-rw-r--r--src/audio_core/algorithm/filter.cpp79
-rw-r--r--src/audio_core/algorithm/filter.h62
-rw-r--r--src/audio_core/algorithm/interpolate.cpp71
-rw-r--r--src/audio_core/algorithm/interpolate.h43
-rw-r--r--src/audio_core/audio_renderer.cpp5
-rw-r--r--src/audio_core/audio_renderer.h2
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp4
-rw-r--r--src/core/file_sys/card_image.cpp8
-rw-r--r--src/core/file_sys/card_image.h6
-rw-r--r--src/core/file_sys/vfs.cpp20
-rw-r--r--src/core/hle/kernel/kernel.cpp2
-rw-r--r--src/core/hle/kernel/object.h15
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/gpu.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp76
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h110
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h1
18 files changed, 434 insertions, 86 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index ec71524a3..82e4850f7 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -1,4 +1,8 @@
add_library(audio_core STATIC
+ algorithm/filter.cpp
+ algorithm/filter.h
+ algorithm/interpolate.cpp
+ algorithm/interpolate.h
audio_out.cpp
audio_out.h
audio_renderer.cpp
@@ -7,12 +11,12 @@ add_library(audio_core STATIC
codec.cpp
codec.h
null_sink.h
- stream.cpp
- stream.h
sink.h
sink_details.cpp
sink_details.h
sink_stream.h
+ stream.cpp
+ stream.h
$<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
)
diff --git a/src/audio_core/algorithm/filter.cpp b/src/audio_core/algorithm/filter.cpp
new file mode 100644
index 000000000..403b8503f
--- /dev/null
+++ b/src/audio_core/algorithm/filter.cpp
@@ -0,0 +1,79 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#define _USE_MATH_DEFINES
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <vector>
+#include "audio_core/algorithm/filter.h"
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+Filter Filter::LowPass(double cutoff, double Q) {
+ const double w0 = 2.0 * M_PI * cutoff;
+ const double sin_w0 = std::sin(w0);
+ const double cos_w0 = std::cos(w0);
+ const double alpha = sin_w0 / (2 * Q);
+
+ const double a0 = 1 + alpha;
+ const double a1 = -2.0 * cos_w0;
+ const double a2 = 1 - alpha;
+ const double b0 = 0.5 * (1 - cos_w0);
+ const double b1 = 1.0 * (1 - cos_w0);
+ const double b2 = 0.5 * (1 - cos_w0);
+
+ return {a0, a1, a2, b0, b1, b2};
+}
+
+Filter::Filter() : Filter(1.0, 0.0, 0.0, 1.0, 0.0, 0.0) {}
+
+Filter::Filter(double a0, double a1, double a2, double b0, double b1, double b2)
+ : a1(a1 / a0), a2(a2 / a0), b0(b0 / a0), b1(b1 / a0), b2(b2 / a0) {}
+
+void Filter::Process(std::vector<s16>& signal) {
+ const size_t num_frames = signal.size() / 2;
+ for (size_t i = 0; i < num_frames; i++) {
+ std::rotate(in.begin(), in.end() - 1, in.end());
+ std::rotate(out.begin(), out.end() - 1, out.end());
+
+ for (size_t ch = 0; ch < channel_count; ch++) {
+ in[0][ch] = signal[i * channel_count + ch];
+
+ out[0][ch] = b0 * in[0][ch] + b1 * in[1][ch] + b2 * in[2][ch] - a1 * out[1][ch] -
+ a2 * out[2][ch];
+
+ signal[i * 2 + ch] = std::clamp(out[0][ch], -32768.0, 32767.0);
+ }
+ }
+}
+
+/// Calculates the appropriate Q for each biquad in a cascading filter.
+/// @param total_count The total number of biquads to be cascaded.
+/// @param index 0-index of the biquad to calculate the Q value for.
+static double CascadingBiquadQ(size_t total_count, size_t index) {
+ const double pole = M_PI * (2 * index + 1) / (4.0 * total_count);
+ return 1.0 / (2.0 * std::cos(pole));
+}
+
+CascadingFilter CascadingFilter::LowPass(double cutoff, size_t cascade_size) {
+ std::vector<Filter> cascade(cascade_size);
+ for (size_t i = 0; i < cascade_size; i++) {
+ cascade[i] = Filter::LowPass(cutoff, CascadingBiquadQ(cascade_size, i));
+ }
+ return CascadingFilter{std::move(cascade)};
+}
+
+CascadingFilter::CascadingFilter() = default;
+CascadingFilter::CascadingFilter(std::vector<Filter> filters) : filters(std::move(filters)) {}
+
+void CascadingFilter::Process(std::vector<s16>& signal) {
+ for (auto& filter : filters) {
+ filter.Process(signal);
+ }
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/algorithm/filter.h b/src/audio_core/algorithm/filter.h
new file mode 100644
index 000000000..a41beef98
--- /dev/null
+++ b/src/audio_core/algorithm/filter.h
@@ -0,0 +1,62 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+/// Digital biquad filter:
+///
+/// b0 + b1 z^-1 + b2 z^-2
+/// H(z) = ------------------------
+/// a0 + a1 z^-1 + b2 z^-2
+class Filter {
+public:
+ /// Creates a low-pass filter.
+ /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0.
+ /// @param Q Determines the quality factor of this filter.
+ static Filter LowPass(double cutoff, double Q = 0.7071);
+
+ /// Passthrough filter.
+ Filter();
+
+ Filter(double a0, double a1, double a2, double b0, double b1, double b2);
+
+ void Process(std::vector<s16>& signal);
+
+private:
+ static constexpr size_t channel_count = 2;
+
+ /// Coefficients are in normalized form (a0 = 1.0).
+ double a1, a2, b0, b1, b2;
+ /// Input History
+ std::array<std::array<double, channel_count>, 3> in;
+ /// Output History
+ std::array<std::array<double, channel_count>, 3> out;
+};
+
+/// Cascade filters to build up higher-order filters from lower-order ones.
+class CascadingFilter {
+public:
+ /// Creates a cascading low-pass filter.
+ /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0.
+ /// @param cascade_size Number of biquads in cascade.
+ static CascadingFilter LowPass(double cutoff, size_t cascade_size);
+
+ /// Passthrough.
+ CascadingFilter();
+
+ explicit CascadingFilter(std::vector<Filter> filters);
+
+ void Process(std::vector<s16>& signal);
+
+private:
+ std::vector<Filter> filters;
+};
+
+} // namespace AudioCore
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp
new file mode 100644
index 000000000..11459821f
--- /dev/null
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -0,0 +1,71 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#define _USE_MATH_DEFINES
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+#include "audio_core/algorithm/interpolate.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+namespace AudioCore {
+
+/// The Lanczos kernel
+static double Lanczos(size_t a, double x) {
+ if (x == 0.0)
+ return 1.0;
+ const double px = M_PI * x;
+ return a * std::sin(px) * std::sin(px / a) / (px * px);
+}
+
+std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio) {
+ if (input.size() < 2)
+ return {};
+
+ if (ratio <= 0) {
+ LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio);
+ ratio = 1.0;
+ }
+
+ if (ratio != state.current_ratio) {
+ const double cutoff_frequency = std::min(0.5 / ratio, 0.5 * ratio);
+ state.nyquist = CascadingFilter::LowPass(std::clamp(cutoff_frequency, 0.0, 0.4), 3);
+ state.current_ratio = ratio;
+ }
+ state.nyquist.Process(input);
+
+ constexpr size_t taps = InterpolationState::lanczos_taps;
+ const size_t num_frames = input.size() / 2;
+
+ std::vector<s16> output;
+ output.reserve(static_cast<size_t>(input.size() / ratio + 4));
+
+ double& pos = state.position;
+ auto& h = state.history;
+ for (size_t i = 0; i < num_frames; ++i) {
+ std::rotate(h.begin(), h.end() - 1, h.end());
+ h[0][0] = input[i * 2 + 0];
+ h[0][1] = input[i * 2 + 1];
+
+ while (pos <= 1.0) {
+ double l = 0.0;
+ double r = 0.0;
+ for (size_t j = 0; j < h.size(); j++) {
+ l += Lanczos(taps, pos + j - taps + 1) * h[j][0];
+ r += Lanczos(taps, pos + j - taps + 1) * h[j][1];
+ }
+ output.emplace_back(static_cast<s16>(std::clamp(l, -32768.0, 32767.0)));
+ output.emplace_back(static_cast<s16>(std::clamp(r, -32768.0, 32767.0)));
+
+ pos += ratio;
+ }
+ pos -= 1.0;
+ }
+
+ return output;
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h
new file mode 100644
index 000000000..c79c2eef4
--- /dev/null
+++ b/src/audio_core/algorithm/interpolate.h
@@ -0,0 +1,43 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+#include "audio_core/algorithm/filter.h"
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+struct InterpolationState {
+ static constexpr size_t lanczos_taps = 4;
+ static constexpr size_t history_size = lanczos_taps * 2 - 1;
+
+ double current_ratio = 0.0;
+ CascadingFilter nyquist;
+ std::array<std::array<s16, 2>, history_size> history = {};
+ double position = 0;
+};
+
+/// Interpolates input signal to produce output signal.
+/// @param input The signal to interpolate.
+/// @param ratio Interpolation ratio.
+/// ratio > 1.0 results in fewer output samples.
+/// ratio < 1.0 results in more output samples.
+/// @returns Output signal.
+std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio);
+
+/// Interpolates input signal to produce output signal.
+/// @param input The signal to interpolate.
+/// @param input_rate The sample rate of input.
+/// @param output_rate The desired sample rate of the output.
+/// @returns Output signal.
+inline std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
+ u32 input_rate, u32 output_rate) {
+ const double ratio = static_cast<double>(input_rate) / static_cast<double>(output_rate);
+ return Interpolate(state, std::move(input), ratio);
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 6ebed3fb0..397b107f5 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "audio_core/algorithm/interpolate.h"
#include "audio_core/audio_renderer.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -199,6 +200,8 @@ void AudioRenderer::VoiceState::RefreshBuffer() {
break;
}
+ samples = Interpolate(interp_state, std::move(samples), Info().sample_rate, STREAM_SAMPLE_RATE);
+
is_refresh_pending = false;
}
@@ -224,7 +227,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
break;
}
- samples_remaining -= samples.size();
+ samples_remaining -= samples.size() / stream->GetNumChannels();
for (const auto& sample : samples) {
const s32 buffer_sample{buffer[offset]};
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 13c5d0adc..eba67f28e 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -8,6 +8,7 @@
#include <memory>
#include <vector>
+#include "audio_core/algorithm/interpolate.h"
#include "audio_core/audio_out.h"
#include "audio_core/codec.h"
#include "audio_core/stream.h"
@@ -194,6 +195,7 @@ private:
size_t wave_index{};
size_t offset{};
Codec::ADPCMState adpcm_state{};
+ InterpolationState interp_state{};
std::vector<s16> samples;
VoiceOutStatus out_status{};
VoiceInfo info{};
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 0996f129c..20e5200a8 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -243,9 +243,7 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
}
void ARM_Dynarmic::PrepareReschedule() {
- if (jit->IsExecuting()) {
- jit->HaltExecution();
- }
+ jit->HaltExecution();
}
void ARM_Dynarmic::ClearInstructionCache() {
diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp
index a4823353e..8e05b9d0e 100644
--- a/src/core/file_sys/card_image.cpp
+++ b/src/core/file_sys/card_image.cpp
@@ -107,19 +107,19 @@ VirtualFile XCI::GetNCAFileByType(NCAContentType type) const {
return nullptr;
}
-std::vector<std::shared_ptr<VfsFile>> XCI::GetFiles() const {
+std::vector<VirtualFile> XCI::GetFiles() const {
return {};
}
-std::vector<std::shared_ptr<VfsDirectory>> XCI::GetSubdirectories() const {
- return std::vector<std::shared_ptr<VfsDirectory>>();
+std::vector<VirtualDir> XCI::GetSubdirectories() const {
+ return {};
}
std::string XCI::GetName() const {
return file->GetName();
}
-std::shared_ptr<VfsDirectory> XCI::GetParentDirectory() const {
+VirtualDir XCI::GetParentDirectory() const {
return file->GetContainingDirectory();
}
diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h
index e089d737c..4618d9c00 100644
--- a/src/core/file_sys/card_image.h
+++ b/src/core/file_sys/card_image.h
@@ -71,13 +71,13 @@ public:
std::shared_ptr<NCA> GetNCAByType(NCAContentType type) const;
VirtualFile GetNCAFileByType(NCAContentType type) const;
- std::vector<std::shared_ptr<VfsFile>> GetFiles() const override;
+ std::vector<VirtualFile> GetFiles() const override;
- std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override;
+ std::vector<VirtualDir> GetSubdirectories() const override;
std::string GetName() const override;
- std::shared_ptr<VfsDirectory> GetParentDirectory() const override;
+ VirtualDir GetParentDirectory() const override;
protected:
bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp
index 24e158962..a5ec50b1a 100644
--- a/src/core/file_sys/vfs.cpp
+++ b/src/core/file_sys/vfs.cpp
@@ -74,15 +74,15 @@ VirtualFile VfsFilesystem::CopyFile(std::string_view old_path_, std::string_view
return new_file;
}
-VirtualFile VfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) {
- const auto old_path = FileUtil::SanitizePath(old_path_);
- const auto new_path = FileUtil::SanitizePath(new_path_);
+VirtualFile VfsFilesystem::MoveFile(std::string_view old_path, std::string_view new_path) {
+ const auto sanitized_old_path = FileUtil::SanitizePath(old_path);
+ const auto sanitized_new_path = FileUtil::SanitizePath(new_path);
// Again, non-default impls are highly encouraged to provide a more optimized version of this.
- auto out = CopyFile(old_path_, new_path_);
+ auto out = CopyFile(sanitized_old_path, sanitized_new_path);
if (out == nullptr)
return nullptr;
- if (DeleteFile(old_path))
+ if (DeleteFile(sanitized_old_path))
return out;
return nullptr;
}
@@ -137,15 +137,15 @@ VirtualDir VfsFilesystem::CopyDirectory(std::string_view old_path_, std::string_
return new_dir;
}
-VirtualDir VfsFilesystem::MoveDirectory(std::string_view old_path_, std::string_view new_path_) {
- const auto old_path = FileUtil::SanitizePath(old_path_);
- const auto new_path = FileUtil::SanitizePath(new_path_);
+VirtualDir VfsFilesystem::MoveDirectory(std::string_view old_path, std::string_view new_path) {
+ const auto sanitized_old_path = FileUtil::SanitizePath(old_path);
+ const auto sanitized_new_path = FileUtil::SanitizePath(new_path);
// Non-default impls are highly encouraged to provide a more optimized version of this.
- auto out = CopyDirectory(old_path_, new_path_);
+ auto out = CopyDirectory(sanitized_old_path, sanitized_new_path);
if (out == nullptr)
return nullptr;
- if (DeleteDirectory(old_path))
+ if (DeleteDirectory(sanitized_old_path))
return out;
return nullptr;
}
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 1b0cd0abf..8c19e86d3 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -11,7 +11,7 @@
namespace Kernel {
-unsigned int Object::next_object_id;
+std::atomic<u32> Object::next_object_id{0};
/// Initialize the kernel
void Init() {
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h
index 83df68dfd..526ac9cc3 100644
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -4,6 +4,7 @@
#pragma once
+#include <atomic>
#include <string>
#include <utility>
@@ -42,8 +43,8 @@ public:
virtual ~Object();
/// Returns a unique identifier for the object. For debugging purposes only.
- unsigned int GetObjectId() const {
- return object_id;
+ u32 GetObjectId() const {
+ return object_id.load(std::memory_order_relaxed);
}
virtual std::string GetTypeName() const {
@@ -61,23 +62,23 @@ public:
bool IsWaitable() const;
public:
- static unsigned int next_object_id;
+ static std::atomic<u32> next_object_id;
private:
friend void intrusive_ptr_add_ref(Object*);
friend void intrusive_ptr_release(Object*);
- unsigned int ref_count = 0;
- unsigned int object_id = next_object_id++;
+ std::atomic<u32> ref_count{0};
+ std::atomic<u32> object_id{next_object_id++};
};
// Special functions used by boost::instrusive_ptr to do automatic ref-counting
inline void intrusive_ptr_add_ref(Object* object) {
- ++object->ref_count;
+ object->ref_count.fetch_add(1, std::memory_order_relaxed);
}
inline void intrusive_ptr_release(Object* object) {
- if (--object->ref_count == 0) {
+ if (object->ref_count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
delete object;
}
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 19e7f1161..c9f6b82b7 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -46,8 +46,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RGBA32_FLOAT:
case RenderTargetFormat::RGBA32_UINT:
return 16;
+ case RenderTargetFormat::RGBA16_UINT:
case RenderTargetFormat::RGBA16_FLOAT:
case RenderTargetFormat::RG32_FLOAT:
+ case RenderTargetFormat::RG32_UINT:
return 8;
case RenderTargetFormat::RGBA8_UNORM:
case RenderTargetFormat::RGBA8_SNORM:
@@ -61,12 +63,14 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RG16_FLOAT:
case RenderTargetFormat::R32_FLOAT:
case RenderTargetFormat::R11G11B10_FLOAT:
+ case RenderTargetFormat::R32_UINT:
return 4;
case RenderTargetFormat::R16_UNORM:
case RenderTargetFormat::R16_SNORM:
case RenderTargetFormat::R16_UINT:
case RenderTargetFormat::R16_SINT:
case RenderTargetFormat::R16_FLOAT:
+ case RenderTargetFormat::RG8_UNORM:
case RenderTargetFormat::RG8_SNORM:
return 2;
case RenderTargetFormat::R8_UNORM:
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e008d8f26..8a90a3a66 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -20,8 +20,10 @@ enum class RenderTargetFormat : u32 {
NONE = 0x0,
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
+ RGBA16_UINT = 0xC9,
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,
+ RG32_UINT = 0xCD,
BGRA8_UNORM = 0xCF,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
@@ -33,8 +35,10 @@ enum class RenderTargetFormat : u32 {
RG16_UINT = 0xDD,
RG16_FLOAT = 0xDE,
R11G11B10_FLOAT = 0xE0,
+ R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
+ RG8_UNORM = 0xEA,
RG8_SNORM = 0xEB,
R16_UNORM = 0xEE,
R16_SNORM = 0xEF,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 84c250c63..4b48ab8e2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -101,6 +101,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
+ {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
@@ -134,7 +135,10 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
@@ -234,32 +238,59 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>,
- MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>,
- MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>,
- MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
- MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
- MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>,
- MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>,
- MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>,
- MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>,
- MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>,
- MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>,
- MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>,
- MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>,
- MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>,
- MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::RG8S>,
- MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>,
- MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>,
+ // clang-format off
+ MortonCopy<true, PixelFormat::ABGR8U>,
+ MortonCopy<true, PixelFormat::ABGR8S>,
+ MortonCopy<true, PixelFormat::B5G6R5>,
+ MortonCopy<true, PixelFormat::A2B10G10R10>,
+ MortonCopy<true, PixelFormat::A1B5G5R5>,
+ MortonCopy<true, PixelFormat::R8>,
+ MortonCopy<true, PixelFormat::R8UI>,
+ MortonCopy<true, PixelFormat::RGBA16F>,
+ MortonCopy<true, PixelFormat::RGBA16UI>,
+ MortonCopy<true, PixelFormat::R11FG11FB10F>,
+ MortonCopy<true, PixelFormat::RGBA32UI>,
+ MortonCopy<true, PixelFormat::DXT1>,
+ MortonCopy<true, PixelFormat::DXT23>,
+ MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::DXN2UNORM>,
+ MortonCopy<true, PixelFormat::DXN2SNORM>,
+ MortonCopy<true, PixelFormat::BC7U>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+ MortonCopy<true, PixelFormat::G8R8>,
+ MortonCopy<true, PixelFormat::BGRA8>,
+ MortonCopy<true, PixelFormat::RGBA32F>,
+ MortonCopy<true, PixelFormat::RG32F>,
+ MortonCopy<true, PixelFormat::R32F>,
+ MortonCopy<true, PixelFormat::R16F>,
+ MortonCopy<true, PixelFormat::R16UNORM>,
+ MortonCopy<true, PixelFormat::R16S>,
+ MortonCopy<true, PixelFormat::R16UI>,
+ MortonCopy<true, PixelFormat::R16I>,
+ MortonCopy<true, PixelFormat::RG16>,
+ MortonCopy<true, PixelFormat::RG16F>,
+ MortonCopy<true, PixelFormat::RG16UI>,
+ MortonCopy<true, PixelFormat::RG16I>,
+ MortonCopy<true, PixelFormat::RG16S>,
+ MortonCopy<true, PixelFormat::RGB32F>,
+ MortonCopy<true, PixelFormat::SRGBA8>,
+ MortonCopy<true, PixelFormat::RG8U>,
+ MortonCopy<true, PixelFormat::RG8S>,
+ MortonCopy<true, PixelFormat::RG32UI>,
+ MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::Z24S8>,
+ MortonCopy<true, PixelFormat::S8Z24>,
+ MortonCopy<true, PixelFormat::Z32F>,
+ MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z32FS8>,
+ // clang-format on
};
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
+ // clang-format off
MortonCopy<false, PixelFormat::ABGR8U>,
MortonCopy<false, PixelFormat::ABGR8S>,
MortonCopy<false, PixelFormat::B5G6R5>,
@@ -268,6 +299,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
MortonCopy<false, PixelFormat::R8>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
+ MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
// TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
@@ -297,12 +329,16 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
MortonCopy<false, PixelFormat::RG16S>,
MortonCopy<false, PixelFormat::RGB32F>,
MortonCopy<false, PixelFormat::SRGBA8>,
+ MortonCopy<false, PixelFormat::RG8U>,
MortonCopy<false, PixelFormat::RG8S>,
+ MortonCopy<false, PixelFormat::RG32UI>,
+ MortonCopy<false, PixelFormat::R32UI>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z32FS8>,
+ // clang-format on
};
// Allocate an uninitialized texture of appropriate size and format for the surface
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 202257b58..630b40e77 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -31,43 +31,47 @@ struct SurfaceParams {
R8 = 5,
R8UI = 6,
RGBA16F = 7,
- R11FG11FB10F = 8,
- RGBA32UI = 9,
- DXT1 = 10,
- DXT23 = 11,
- DXT45 = 12,
- DXN1 = 13, // This is also known as BC4
- DXN2UNORM = 14,
- DXN2SNORM = 15,
- BC7U = 16,
- ASTC_2D_4X4 = 17,
- G8R8 = 18,
- BGRA8 = 19,
- RGBA32F = 20,
- RG32F = 21,
- R32F = 22,
- R16F = 23,
- R16UNORM = 24,
- R16S = 25,
- R16UI = 26,
- R16I = 27,
- RG16 = 28,
- RG16F = 29,
- RG16UI = 30,
- RG16I = 31,
- RG16S = 32,
- RGB32F = 33,
- SRGBA8 = 34,
- RG8S = 35,
+ RGBA16UI = 8,
+ R11FG11FB10F = 9,
+ RGBA32UI = 10,
+ DXT1 = 11,
+ DXT23 = 12,
+ DXT45 = 13,
+ DXN1 = 14, // This is also known as BC4
+ DXN2UNORM = 15,
+ DXN2SNORM = 16,
+ BC7U = 17,
+ ASTC_2D_4X4 = 18,
+ G8R8 = 19,
+ BGRA8 = 20,
+ RGBA32F = 21,
+ RG32F = 22,
+ R32F = 23,
+ R16F = 24,
+ R16UNORM = 25,
+ R16S = 26,
+ R16UI = 27,
+ R16I = 28,
+ RG16 = 29,
+ RG16F = 30,
+ RG16UI = 31,
+ RG16I = 32,
+ RG16S = 33,
+ RGB32F = 34,
+ SRGBA8 = 35,
+ RG8U = 36,
+ RG8S = 37,
+ RG32UI = 38,
+ R32UI = 39,
MaxColorFormat,
// DepthStencil formats
- Z24S8 = 36,
- S8Z24 = 37,
- Z32F = 38,
- Z16 = 39,
- Z32FS8 = 40,
+ Z24S8 = 40,
+ S8Z24 = 41,
+ Z32F = 42,
+ Z16 = 43,
+ Z32FS8 = 44,
MaxDepthStencilFormat,
@@ -113,6 +117,7 @@ struct SurfaceParams {
1, // R8
1, // R8UI
1, // RGBA16F
+ 1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
4, // DXT1
@@ -140,7 +145,10 @@ struct SurfaceParams {
1, // RG16S
1, // RGB32F
1, // SRGBA8
+ 1, // RG8U
1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
1, // Z24S8
1, // S8Z24
1, // Z32F
@@ -165,6 +173,7 @@ struct SurfaceParams {
8, // R8
8, // R8UI
64, // RGBA16F
+ 64, // RGBA16UI
32, // R11FG11FB10F
128, // RGBA32UI
64, // DXT1
@@ -192,7 +201,10 @@ struct SurfaceParams {
32, // RG16S
96, // RGB32F
32, // SRGBA8
+ 16, // RG8U
16, // RG8S
+ 64, // RG32UI
+ 32, // R32UI
32, // Z24S8
32, // S8Z24
32, // Z32F
@@ -241,6 +253,8 @@ struct SurfaceParams {
return PixelFormat::A2B10G10R10;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
return PixelFormat::RGBA16F;
+ case Tegra::RenderTargetFormat::RGBA16_UINT:
+ return PixelFormat::RGBA16UI;
case Tegra::RenderTargetFormat::RGBA32_FLOAT:
return PixelFormat::RGBA32F;
case Tegra::RenderTargetFormat::RG32_FLOAT:
@@ -265,6 +279,8 @@ struct SurfaceParams {
return PixelFormat::RG16;
case Tegra::RenderTargetFormat::RG16_SNORM:
return PixelFormat::RG16S;
+ case Tegra::RenderTargetFormat::RG8_UNORM:
+ return PixelFormat::RG8U;
case Tegra::RenderTargetFormat::RG8_SNORM:
return PixelFormat::RG8S;
case Tegra::RenderTargetFormat::R16_FLOAT:
@@ -279,6 +295,10 @@ struct SurfaceParams {
return PixelFormat::R16I;
case Tegra::RenderTargetFormat::R32_FLOAT:
return PixelFormat::R32F;
+ case Tegra::RenderTargetFormat::R32_UINT:
+ return PixelFormat::R32UI;
+ case Tegra::RenderTargetFormat::RG32_UINT:
+ return PixelFormat::RG32UI;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -332,7 +352,15 @@ struct SurfaceParams {
static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32:
- return PixelFormat::RG32F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RG32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::RG32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32_B32:
return PixelFormat::RGB32F;
case Tegra::Texture::TextureFormat::R16:
@@ -352,7 +380,15 @@ struct SurfaceParams {
static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::R32:
- return PixelFormat::R32F;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::R32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::ZF32:
return PixelFormat::Z32F;
case Tegra::Texture::TextureFormat::Z24S8:
@@ -432,6 +468,7 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RG16_UNORM:
case Tegra::RenderTargetFormat::R16_UNORM:
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ case Tegra::RenderTargetFormat::RG8_UNORM:
return ComponentType::UNorm;
case Tegra::RenderTargetFormat::RGBA8_SNORM:
case Tegra::RenderTargetFormat::RG16_SNORM:
@@ -447,9 +484,12 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::R32_FLOAT:
return ComponentType::Float;
case Tegra::RenderTargetFormat::RGBA32_UINT:
+ case Tegra::RenderTargetFormat::RGBA16_UINT:
case Tegra::RenderTargetFormat::RG16_UINT:
case Tegra::RenderTargetFormat::R8_UINT:
case Tegra::RenderTargetFormat::R16_UINT:
+ case Tegra::RenderTargetFormat::RG32_UINT:
+ case Tegra::RenderTargetFormat::R32_UINT:
return ComponentType::UInt;
case Tegra::RenderTargetFormat::RG16_SINT:
case Tegra::RenderTargetFormat::R16_SINT:
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 679e5ceb2..83ea0cfc0 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,6 +27,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Type::UnsignedNorm: {
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;