Use coherent names for source and destination.

This commit is contained in:
weakboson 2025-07-26 23:20:28 +08:00
parent ebefd13e5c
commit 05e31d1e76

View file

@ -187,34 +187,34 @@ void BufferCache<P>::ClearDownload(DAddr device_addr, u64 size) {
}
template <class P>
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dst_address, u64 amount) {
const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
if (!cpu_src_address || !cpu_dest_address) {
const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
if (!cpu_src_address || !cpu_dst_address) {
return false;
}
const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount);
const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount);
if (!source_dirty && !dest_dirty) {
const bool src_dirty = IsRegionRegistered(*cpu_src_address, amount);
const bool dst_dirty = IsRegionRegistered(*cpu_dst_address, amount);
if (!src_dirty && !dst_dirty) {
return false;
}
ClearDownload(*cpu_dest_address, amount);
ClearDownload(*cpu_dst_address, amount);
BufferId buffer_a;
BufferId buffer_b;
do {
channel_state->has_deleted_buffers = false;
buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
buffer_b = FindBuffer(*cpu_dst_address, static_cast<u32>(amount));
} while (channel_state->has_deleted_buffers);
auto& src_buffer = slot_buffers[buffer_a];
auto& dest_buffer = slot_buffers[buffer_b];
auto& dst_buffer = slot_buffers[buffer_b];
SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
SynchronizeBuffer(dst_buffer, *cpu_dst_address, static_cast<u32>(amount));
std::array copies{BufferCopy{
.src_offset = src_buffer.Offset(*cpu_src_address),
.dst_offset = dest_buffer.Offset(*cpu_dest_address),
.dst_offset = dst_buffer.Offset(*cpu_dst_address),
.size = amount,
}};
@ -222,28 +222,28 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
auto mirror = [&](DAddr base_address, DAddr base_address_end) {
const u64 size = base_address_end - base_address;
const DAddr diff = base_address - *cpu_src_address;
const DAddr new_base_address = *cpu_dest_address + diff;
const DAddr new_base_address = *cpu_dst_address + diff;
tmp_intervals.push_back({new_base_address, size});
uncommitted_gpu_modified_ranges.Add(new_base_address, size);
};
gpu_modified_ranges.ForEachInRange(*cpu_src_address, amount, mirror);
// This subtraction in this order is important for overlapping copies.
gpu_modified_ranges.Subtract(*cpu_dest_address, amount);
gpu_modified_ranges.Subtract(*cpu_dst_address, amount);
const bool has_new_downloads = tmp_intervals.size() != 0;
for (const auto& pair : tmp_intervals) {
gpu_modified_ranges.Add(pair.first, pair.second);
}
const auto& copy = copies[0];
src_buffer.MarkUsage(copy.src_offset, copy.size);
dest_buffer.MarkUsage(copy.dst_offset, copy.size);
runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
dst_buffer.MarkUsage(copy.dst_offset, copy.size);
runtime.CopyBuffer(dst_buffer, src_buffer, copies, true);
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
memory_tracker.MarkRegionAsGpuModified(*cpu_dst_address, amount);
}
Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite>
tmp(device_memory, *cpu_src_address, amount, &tmp_buffer);
tmp.SetAddressAndSize(*cpu_dest_address, amount);
tmp.SetAddressAndSize(*cpu_dst_address, amount);
return true;
}
@ -253,8 +253,8 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
if (!cpu_dst_address) {
return false;
}
const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount);
if (!dest_dirty) {
const bool dst_dirty = IsRegionRegistered(*cpu_dst_address, amount);
if (!dst_dirty) {
return false;
}
@ -263,10 +263,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
gpu_modified_ranges.Subtract(*cpu_dst_address, size);
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
Buffer& dest_buffer = slot_buffers[buffer];
const u32 offset = dest_buffer.Offset(*cpu_dst_address);
runtime.ClearBuffer(dest_buffer, offset, size, value);
dest_buffer.MarkUsage(offset, size);
Buffer& dst_buffer = slot_buffers[buffer];
const u32 offset = dst_buffer.Offset(*cpu_dst_address);
runtime.ClearBuffer(dst_buffer, offset, size, value);
dst_buffer.MarkUsage(offset, size);
return true;
}
@ -1074,8 +1074,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE);
if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] {
inline_buffer_id = CreateBuffer(0, buffer_size);
}
if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] {
} else if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] {
slot_buffers.erase(inline_buffer_id);
inline_buffer_id = CreateBuffer(0, buffer_size);
}
@ -1529,38 +1528,38 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
}
template <class P>
bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
bool BufferCache<P>::InlineMemory(DAddr dst_address, size_t copy_size,
std::span<const u8> inlined_buffer) {
const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
const bool is_dirty = IsRegionRegistered(dst_address, copy_size);
if (!is_dirty) {
return false;
}
DAddr aligned_start = Common::AlignDown(dest_address, DEVICE_PAGESIZE);
DAddr aligned_end = Common::AlignUp(dest_address + copy_size, DEVICE_PAGESIZE);
DAddr aligned_start = Common::AlignDown(dst_address, DEVICE_PAGESIZE);
DAddr aligned_end = Common::AlignUp(dst_address + copy_size, DEVICE_PAGESIZE);
if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
return false;
}
InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
InlineMemoryImplementation(dst_address, copy_size, inlined_buffer);
return true;
}
template <class P>
void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
void BufferCache<P>::InlineMemoryImplementation(DAddr dst_address, size_t copy_size,
std::span<const u8> inlined_buffer) {
ClearDownload(dest_address, copy_size);
gpu_modified_ranges.Subtract(dest_address, copy_size);
ClearDownload(dst_address, copy_size);
gpu_modified_ranges.Subtract(dst_address, copy_size);
BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
BufferId buffer_id = FindBuffer(dst_address, static_cast<u32>(copy_size));
auto& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
SynchronizeBuffer(buffer, dst_address, static_cast<u32>(copy_size));
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
std::array copies{BufferCopy{
.src_offset = upload_staging.offset,
.dst_offset = buffer.Offset(dest_address),
.dst_offset = buffer.Offset(dst_address),
.size = copy_size,
}};
u8* const src_pointer = upload_staging.mapped_span.data();
@ -1568,7 +1567,7 @@ void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
buffer.ImmediateUpload(buffer.Offset(dst_address), inlined_buffer.first(copy_size));
}
}