Compare commits

...

3 commits

Author SHA1 Message Date
e81a358d83 [gpu/nvdrv] Remove redundant whitespace
All checks were successful
eden-license / license-header (pull_request) Successful in 36s
2025-09-18 16:16:15 +02:00
7faa63ea4e [gpu/nvdrv] Unstub SetErrorNotifier, add PostErrorNotification function 2025-09-18 16:16:15 +02:00
cf634d4d6f
[gpu/nvdrv] Rewrite ZBC functions (#2501)
This rewrite attempts to implement a fully correct ZBC (Zero Bandwith Clear) mechanism.
The zbc_mutex attempts to mitigate contention by assuring that only threads which hold the mutex can modify the table.
Notify drivers about the index size, I believe some drivers even need the notification.
Only add new entries if a entry was not previously available.

Reviewed-on: #2501
Reviewed-by: Shinmegumi <shinmegumi@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: crueter <crueter@eden-emu.dev>
Co-authored-by: SDK-Chan <sdkchan@eden-emu.dev>
Co-committed-by: SDK-Chan <sdkchan@eden-emu.dev>
2025-09-18 14:46:53 +02:00
4 changed files with 245 additions and 65 deletions

View file

@ -256,61 +256,103 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(IoctlNvgpuGpuZcullGetInfoArgs& params) {
}
NvResult nvhost_ctrl_gpu::ZBCSetTable(IoctlZbcSetTable& params) {
LOG_DEBUG(Service_NVDRV, "called");
ZbcEntry entry = {};
std::memset(&entry, 0, sizeof(entry));
// TODO(ogniK): What does this even actually do?
// TODO(myself): This thing I guess
if (params.type == 1) {
for (auto i = 0; i < 4; ++i) {
entry.color_ds[i] = params.color_ds[i];
entry.color_l2[i] = params.color_l2[i];
}
ASSERT(this->max_color_entries < 16);
this->color_entries[this->max_color_entries] = entry;
++this->max_color_entries;
} else if (params.type == 2) {
entry.depth = params.depth;
ASSERT(this->max_depth_entries < 16);
this->depth_entries[this->max_depth_entries] = entry;
++this->max_depth_entries;
if (params.type > supported_types) {
LOG_ERROR(Service_NVDRV, "ZBCSetTable: invalid type {:#X}", params.type);
return NvResult::BadParameter;
}
std::scoped_lock lk(zbc_mutex);
switch (static_cast<ZBCTypes>(params.type)) {
case ZBCTypes::color: {
ZbcColorEntry color_entry{};
std::copy_n(std::begin(params.color_ds), color_entry.color_ds.size(), color_entry.color_ds.begin());
std::copy_n(std::begin(params.color_l2), color_entry.color_l2.size(), color_entry.color_l2.begin());
color_entry.format = params.format;
color_entry.ref_cnt = 1u;
auto color_it = std::ranges::find_if(zbc_colors,
[&](const ZbcColorEntry& color_in_question) {
return color_entry.format == color_in_question.format &&
color_entry.color_ds == color_in_question.color_ds &&
color_entry.color_l2 == color_in_question.color_l2;
});
if (color_it != zbc_colors.end()) {
++color_it->ref_cnt;
LOG_DEBUG(Service_NVDRV, "ZBCSetTable: reused color entry fmt={:#X}, ref_cnt={:#X}",
params.format, color_it->ref_cnt);
} else {
zbc_colors.push_back(color_entry);
LOG_DEBUG(Service_NVDRV, "ZBCSetTable: added color entry fmt={:#X}, index={:#X}",
params.format, zbc_colors.size() - 1);
}
break;
}
case ZBCTypes::depth: {
ZbcDepthEntry depth_entry{params.depth, params.format, 1u};
auto depth_it = std::ranges::find_if(zbc_depths,
[&](const ZbcDepthEntry& depth_entry_in_question) {
return depth_entry.format == depth_entry_in_question.format &&
depth_entry.depth == depth_entry_in_question.depth;
});
if (depth_it != zbc_depths.end()) {
++depth_it->ref_cnt;
LOG_DEBUG(Service_NVDRV, "ZBCSetTable: reused depth entry fmt={:#X}, ref_cnt={:#X}",
depth_entry.format, depth_it->ref_cnt);
} else {
zbc_depths.push_back(depth_entry);
LOG_DEBUG(Service_NVDRV, "ZBCSetTable: added depth entry fmt={:#X}, index={:#X}",
depth_entry.format, zbc_depths.size() - 1);
}
}
}
return NvResult::Success;
}
NvResult nvhost_ctrl_gpu::ZBCQueryTable(IoctlZbcQueryTable& params) {
LOG_DEBUG(Service_NVDRV, "called");
struct ZbcQueryParams {
u32_le color_ds[4];
u32_le color_l2[4];
u32_le depth;
u32_le ref_cnt;
u32_le format;
u32_le type;
u32_le index_size;
} entry = {};
std::memset(&entry, 0, sizeof(entry));
auto const index = params.index_size;
if (params.type == 0) { //no
entry.index_size = 15;
} else if (params.type == 1) { //color
ASSERT(index < 16);
for (auto i = 0; i < 4; ++i) {
params.color_ds[i] = this->color_entries[index].color_ds[i];
params.color_l2[i] = this->color_entries[index].color_l2[i];
}
// TODO: Only if no error thrown (otherwise dont modify)
params.format = this->color_entries[index].format;
//params.ref_cnt = this->color_entries[index].ref_cnt;
} else if (params.type == 2) { //depth
ASSERT(index < 16);
params.depth = this->depth_entries[index].depth;
// TODO: Only if no error thrown (otherwise dont modify)
params.format = this->depth_entries[index].format;
//params.ref_cnt = this->depth_entries[index].ref_cnt;
} else {
UNREACHABLE();
if (params.type > supported_types) {
LOG_ERROR(Service_NVDRV, "ZBCQueryTable: invalid type {:#X}", params.type);
return NvResult::BadParameter;
}
std::scoped_lock lk(zbc_mutex);
switch (static_cast<ZBCTypes>(params.type)) {
case ZBCTypes::color: {
if (params.index_size >= zbc_colors.size()) {
LOG_ERROR(Service_NVDRV, "ZBCQueryTable: invalid color index {:#X}", params.index_size);
return NvResult::BadParameter;
}
const auto& colors = zbc_colors[params.index_size];
std::copy_n(colors.color_ds.begin(), colors.color_ds.size(), std::begin(params.color_ds));
std::copy_n(colors.color_l2.begin(), colors.color_l2.size(), std::begin(params.color_l2));
params.depth = 0;
params.ref_cnt = colors.ref_cnt;
params.format = colors.format;
params.index_size = static_cast<u32>(zbc_colors.size());
break;
}
case ZBCTypes::depth: {
if (params.index_size >= zbc_depths.size()) {
LOG_ERROR(Service_NVDRV, "ZBCQueryTable: invalid depth index {:#X}", params.index_size);
return NvResult::BadParameter;
}
const auto& depth_entry = zbc_depths[params.index_size];
std::fill(std::begin(params.color_ds), std::end(params.color_ds), 0);
std::fill(std::begin(params.color_l2), std::end(params.color_l2), 0);
params.depth = depth_entry.depth;
params.ref_cnt = depth_entry.ref_cnt;
params.format = depth_entry.format;
params.index_size = static_cast<u32>(zbc_depths.size());
}
}
return NvResult::Success;
}

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -34,6 +37,11 @@ public:
Kernel::KEvent* QueryEvent(u32 event_id) override;
private:
enum class ZBCTypes {
color = 1,
depth = 2,
};
struct IoctlGpuCharacteristics {
u32_le arch; // 0x120 (NVGPU_GPU_ARCH_GM200)
u32_le impl; // 0xB (NVGPU_GPU_IMPL_GM20B)
@ -139,6 +147,21 @@ private:
};
static_assert(sizeof(IoctlZbcQueryTable) == 52, "IoctlZbcQueryTable is incorrect size");
struct ZbcColorEntry {
std::array<u32, 4> color_ds{};
std::array<u32, 4> color_l2{};
u32 format{};
u32 ref_cnt{};
};
static_assert(sizeof(ZbcColorEntry) == 40, "ZbcColorEntry is incorrect size");
struct ZbcDepthEntry {
u32 depth{};
u32 format{};
u32 ref_cnt{};
};
static_assert(sizeof(ZbcDepthEntry) == 12, "ZbcDepthEntry is incorrect size");
struct IoctlFlushL2 {
u32_le flush; // l2_flush | l2_invalidate << 1 | fb_flush << 2
u32_le reserved;
@ -182,17 +205,11 @@ private:
Kernel::KEvent* error_notifier_event;
Kernel::KEvent* unknown_event;
struct ZbcEntry {
u32_le color_ds[4];
u32_le color_l2[4];
u32_le depth;
u32_le type;
u32_le format;
};
std::array<ZbcEntry, 16> color_entries;
std::array<ZbcEntry, 16> depth_entries;
u8 max_color_entries;
u8 max_depth_entries;
// ZBC Tables
std::mutex zbc_mutex{};
std::vector<ZbcColorEntry> zbc_colors{};
std::vector<ZbcDepthEntry> zbc_depths{};
const u32 supported_types = 2u;
};
} // namespace Service::Nvidia::Devices

View file

@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/service/nvdrv/core/container.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
@ -161,11 +162,114 @@ NvResult nvhost_gpu::ZCullBind(IoctlZCullBind& params) {
}
NvResult nvhost_gpu::SetErrorNotifier(IoctlSetErrorNotifier& params) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset,
params.size, params.mem);
LOG_DEBUG(Service_NVDRV, "called, offset={:#X}, size={:#X}, mem={:#X}",
params.offset, params.size, params.mem);
if (!params.mem || !params.size) {
std::scoped_lock lk(channel_mutex);
if (!channel_state->initialized) {
LOG_CRITICAL(Service_NVDRV, "No address space bound for setting up error notifier!");
return NvResult::NotInitialized;
}
error_notifier_params = {};
LOG_DEBUG(Service_NVDRV, "Error notifier disabled!");
return NvResult::Success;
}
constexpr u64 error_notification_size = sizeof(IoctlGetErrorNotification);
if (params.size < error_notification_size) {
LOG_ERROR(Service_NVDRV, "Error notification size: {:#X} is too small. Need at least {:#X}", params.size,
error_notification_size);
return NvResult::InvalidSize;
}
auto handle = nvmap.GetHandle(static_cast<NvCore::NvMap::Handle::Id>(params.mem));
if (!handle) {
LOG_ERROR(Service_NVDRV, "Unknown nvmap handle id {:#X}", params.mem);
return NvResult::BadParameter;
}
if (params.offset > handle->size || params.size > (handle->size - params.offset)) {
LOG_ERROR(Service_NVDRV, "Error notifier out of bounds: offset={:#X} size={:#X} handle size={:#X}", params.offset,
params.size, handle->size);
return NvResult::InvalidSize;
}
u64 write_address, write_offset, handle_id;
{
std::scoped_lock lk(channel_mutex);
if (!channel_state->initialized) {
LOG_CRITICAL(Service_NVDRV, "No address space bound for setting up error notifier!");
return NvResult::NotInitialized;
}
error_notifier_params = params;
write_address = handle->address;
write_offset = params.offset;
handle_id = handle->id;
}
if (write_address) {
IoctlGetErrorNotification error_notification{};
error_notification.status = static_cast<u16>(NotifierStatus::NoError);
system.ApplicationMemory().WriteBlock(write_address + write_offset, &error_notification, sizeof(error_notification));
} else {
LOG_WARNING(Service_NVDRV,
"nvmap handle id {:#X} has no virtual address assigned; "
"skipping initialization write for error notification!",
handle_id);
}
return NvResult::Success;
}
void nvhost_gpu::PostErrorNotification(u32 info32, u16 info16, NotifierStatus status) {
IoctlSetErrorNotifier error_notifier_params_snapshot{};
Kernel::KEvent *error_notifier_event_snapshot{};
{
std::scoped_lock lk(channel_mutex);
error_notifier_params_snapshot = error_notifier_params;
error_notifier_event_snapshot = error_notifier_event;
}
if (!error_notifier_params_snapshot.mem || error_notifier_params_snapshot.size < sizeof(IoctlGetErrorNotification)) {
LOG_DEBUG(Service_NVDRV, "PostErrorNotification: notifier not configured or too small!");
return;
}
auto handle = nvmap.GetHandle(static_cast<NvCore::NvMap::Handle::Id>(error_notifier_params_snapshot.mem));
if (!handle || !handle->address) {
LOG_ERROR(Service_NVDRV, "PostErrorNotification: invalid handle or virtual address!");
return;
}
IoctlGetErrorNotification error_init{};
error_init.info32 = info32;
error_init.info16 = info16;
error_init.status = static_cast<u16>(status);
const u64 write_size = std::min<u64>(sizeof(IoctlGetErrorNotification),
error_notifier_params_snapshot.size);
if (error_notifier_params_snapshot.offset >= handle->size ||
write_size > (handle->size - error_notifier_params_snapshot.offset)) {
LOG_ERROR(Service_NVDRV, "PostErrorNotification: bounds check failed!");
return;
}
const u64 virtual_address = handle->address + error_notifier_params_snapshot.offset;
if (virtual_address < handle->address) {
LOG_ERROR(Service_NVDRV, "PostErrorNotification: virtual address overflow!");
return;
}
auto &application_memory = system.ApplicationMemory();
application_memory.WriteBlock(virtual_address, &error_init, write_size);
if (error_notifier_event_snapshot) {
error_notifier_event_snapshot->Signal();
}
}
NvResult nvhost_gpu::SetChannelPriority(IoctlChannelSetPriority& params) {
channel_priority = params.priority;
LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
@ -251,7 +355,7 @@ NvResult nvhost_gpu::AllocateObjectContext(IoctlAllocObjCtx& params) {
params.flags = allowed_mask;
}
s32_le ctx_class_number_index =
s32_le ctx_class_number_index =
GetObjectContextClassNumberIndex(static_cast<CtxClasses>(params.class_num));
if (ctx_class_number_index < 0) {
LOG_ERROR(Service_NVDRV, "Invalid class number for object context: {:#X}",
@ -324,6 +428,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandL
if (flags.fence_wait.Value()) {
if (flags.increment_value.Value()) {
PostErrorNotification(flags.raw, 0, NotifierStatus::GenericError);
return NvResult::BadParameter;
}
@ -357,7 +462,11 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandL
NvResult nvhost_gpu::SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
std::span<Tegra::CommandListHeader> commands, bool kickoff) {
if (params.num_entries > commands.size()) {
UNIMPLEMENTED();
LOG_ERROR(Service_NVDRV,
"SubmitGPFIFO: num_entries={:#X} > provided commands={:#X}",
params.num_entries, commands.size());
PostErrorNotification(params.num_entries, 0, NotifierStatus::BadGpfifo);
return NvResult::InvalidSize;
}
@ -376,7 +485,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase1(IoctlSubmitGpfifo& params,
NvResult nvhost_gpu::SubmitGPFIFOBase2(IoctlSubmitGpfifo& params,
std::span<const Tegra::CommandListHeader> commands) {
if (params.num_entries > commands.size()) {
UNIMPLEMENTED();
PostErrorNotification(params.num_entries, 0, NotifierStatus::BadGpfifo);
return NvResult::InvalidSize;
}

View file

@ -66,6 +66,16 @@ private:
CtxChannelGPFIFO = 0xB06F,
};
enum class NotifierStatus : u16_le {
NoError = 0xFFFF,
GenericError = 0x0001,
MmuFault = 0x0002,
IllegalMethod= 0x0003,
InvalidObject= 0x0004,
BadGpfifo = 0x0005,
TimeoutHang = 0x0006,
};
struct IoctlSetNvmapFD {
s32_le nvmap_fd{};
};
@ -172,6 +182,8 @@ private:
s32_le nvmap_fd{};
u64_le user_data{};
IoctlZCullBind zcull_params{};
IoctlSetErrorNotifier error_notifier_params{};
void PostErrorNotification(u32 info32, u16 info16, NotifierStatus status);
std::array<std::optional<IoctlAllocObjCtx>, 6> ctxObjs{};
u32_le channel_priority{};
u32_le channel_timeslice{};