1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <atomic>
#include <bit>
#include <functional>
#include <mutex>
#include <utility>
#include <vector>
#include "core/device_memory_manager.h"
namespace Core {
class GPUDirtyMemoryManager {
public:
GPUDirtyMemoryManager() : current{default_transform} {
back_buffer.reserve(256);
front_buffer.reserve(256);
}
~GPUDirtyMemoryManager() = default;
void Collect(PAddr address, size_t size) {
TransformAddress t = BuildTransform(address, size);
TransformAddress tmp, original;
do {
tmp = current.load(std::memory_order_acquire);
original = tmp;
if (tmp.address != t.address) {
if (IsValid(tmp.address)) {
std::scoped_lock lk(guard);
back_buffer.emplace_back(tmp);
current.exchange(t, std::memory_order_relaxed);
return;
}
tmp.address = t.address;
tmp.mask = 0;
}
if ((tmp.mask | t.mask) == tmp.mask) {
return;
}
tmp.mask |= t.mask;
} while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
std::memory_order_relaxed));
}
void Gather(std::function<void(PAddr, size_t)>& callback) {
{
std::scoped_lock lk(guard);
TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
front_buffer.swap(back_buffer);
if (IsValid(t.address)) {
front_buffer.emplace_back(t);
}
}
for (auto& transform : front_buffer) {
size_t offset = 0;
u64 mask = transform.mask;
while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits << align_bits;
mask = mask >> empty_bits;
const size_t continuous_bits = std::countr_one(mask);
callback((static_cast<PAddr>(transform.address) << page_bits) + offset,
continuous_bits << align_bits);
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
offset += continuous_bits << align_bits;
}
}
front_buffer.clear();
}
private:
struct alignas(8) TransformAddress {
u32 address;
u32 mask;
};
constexpr static size_t page_bits = DEVICE_PAGEBITS - 1;
constexpr static size_t page_size = 1ULL << page_bits;
constexpr static size_t page_mask = page_size - 1;
constexpr static size_t align_bits = 6U;
constexpr static size_t align_size = 1U << align_bits;
constexpr static size_t align_mask = align_size - 1;
constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
bool IsValid(PAddr address) {
return address < (1ULL << 39);
}
template <typename T>
T CreateMask(size_t top_bit, size_t minor_bit) {
T mask = ~T(0);
mask <<= (sizeof(T) * 8 - top_bit);
mask >>= (sizeof(T) * 8 - top_bit);
mask >>= minor_bit;
mask <<= minor_bit;
return mask;
}
TransformAddress BuildTransform(PAddr address, size_t size) {
const size_t minor_address = address & page_mask;
const size_t minor_bit = minor_address >> align_bits;
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
TransformAddress result{};
result.address = static_cast<u32>(address >> page_bits);
result.mask = CreateMask<u32>(top_bit, minor_bit);
return result;
}
std::atomic<TransformAddress> current{};
std::mutex guard;
std::vector<TransformAddress> back_buffer;
std::vector<TransformAddress> front_buffer;
};
} // namespace Core
|